1 |
|
2 |
/***************************************************************************** |
3 |
* |
4 |
* Copyright (c) 2003-2013 by University of Queensland |
5 |
* http://www.uq.edu.au |
6 |
* |
7 |
* Primary Business: Queensland, Australia |
8 |
* Licensed under the Open Software License version 3.0 |
9 |
* http://www.opensource.org/licenses/osl-3.0.php |
10 |
* |
11 |
* Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 |
* Development since 2012 by School of Earth Sciences |
13 |
* |
14 |
*****************************************************************************/ |
15 |
|
16 |
/************************************************************************************/ |
17 |
|
18 |
/* Some utility routines: */ |
19 |
|
20 |
/************************************************************************************/ |
21 |
#include "esysUtils/maths.h" |
22 |
#include "Util.h" |
23 |
|
24 |
#ifdef _OPENMP |
25 |
#include <omp.h> |
26 |
#endif |
27 |
|
28 |
#include "esysUtils/index.h" |
29 |
#include "esysUtils/mem.h" |
30 |
#include <limits.h> |
31 |
#include "string.h" /* for memcpy*/ |
32 |
|
33 |
/************************************************************************************/ |
34 |
|
35 |
/* returns true if any of the values in the short array values is not equalt to Zero */ |
36 |
|
37 |
bool_t Dudley_Util_anyNonZeroDouble(dim_t N, double *values) |
38 |
{ |
39 |
dim_t q; |
40 |
for (q = 0; q < N; ++q) |
41 |
if (ABS(values[q]) > 0) |
42 |
return TRUE; |
43 |
return FALSE; |
44 |
} |
45 |
|
46 |
/************************************************************************************/ |
47 |
|
48 |
/* gathers double values out from in by index: */ |
49 |
|
50 |
/* out(1:numData,1:len)=in(1:numData,index(1:len)) */ |
51 |
|
52 |
void Dudley_Util_Gather_double(dim_t len, index_t * index, dim_t numData, double *in, double *out) |
53 |
{ |
54 |
dim_t s, i; |
55 |
for (s = 0; s < len; s++) |
56 |
{ |
57 |
for (i = 0; i < numData; i++) |
58 |
{ |
59 |
out[INDEX2(i, s, numData)] = in[INDEX2(i, index[s], numData)]; |
60 |
} |
61 |
} |
62 |
} |
63 |
|
64 |
/************************************************************************************/ |
65 |
|
66 |
/* gathers maybelong values out from in by index: */ |
67 |
|
68 |
/* out(1:numData,1:len)=in(1:numData,index(1:len)) */ |
69 |
|
70 |
void Dudley_Util_Gather_int(dim_t len, index_t * index, dim_t numData, index_t * in, index_t * out) |
71 |
{ |
72 |
dim_t s, i; |
73 |
for (s = 0; s < len; s++) |
74 |
{ |
75 |
for (i = 0; i < numData; i++) |
76 |
{ |
77 |
out[INDEX2(i, s, numData)] = in[INDEX2(i, index[s], numData)]; |
78 |
} |
79 |
} |
80 |
} |
81 |
|
82 |
/************************************************************************************/ |
83 |
|
84 |
/* adds a vector in into out using and index. */ |
85 |
|
86 |
/* out(1:numData,index[p])+=in(1:numData,p) where p = {k=1...len , index[k]<upperBound}*/ |
87 |
|
88 |
void Dudley_Util_AddScatter(const dim_t len, const index_t * index, const dim_t numData, const double *in, double *out, const index_t upperBound) |
89 |
{ |
90 |
dim_t i, s; |
91 |
for (s = 0; s < len; s++) |
92 |
{ |
93 |
for (i = 0; i < numData; i++) |
94 |
{ |
95 |
if (index[s] < upperBound) |
96 |
{ |
97 |
out[INDEX2(i, index[s], numData)] += in[INDEX2(i, s, numData)]; |
98 |
} |
99 |
} |
100 |
} |
101 |
} |
102 |
|
103 |
/* multiplies two matrices */ |
104 |
|
105 |
/* A(1:A1,1:A2)=B(1:A1,1:B2)*C(1:B2,1:A2) */ |
106 |
|
107 |
void Dudley_Util_SmallMatMult(dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C) |
108 |
{ |
109 |
dim_t i, j, s; |
110 |
register double rtmp; |
111 |
for (i = 0; i < A1; i++) |
112 |
{ |
113 |
for (j = 0; j < A2; j++) |
114 |
{ |
115 |
rtmp = 0; |
116 |
for (s = 0; s < B2; s++) |
117 |
{ |
118 |
rtmp += B[INDEX2(i, s, A1)] * C[INDEX2(s, j, B2)]; |
119 |
} |
120 |
A[INDEX2(i, j, A1)] = rtmp; |
121 |
} |
122 |
} |
123 |
} |
124 |
|
125 |
/* multiplies a two sets of matries: */ |
126 |
|
127 |
/* A(1:A1,1:A2,i)=B(1:A1,1:B2,i)*C(1:B2,1:A2,i) i=1,len */ |
128 |
|
129 |
void Dudley_Util_SmallMatSetMult(dim_t len, dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C) |
130 |
{ |
131 |
dim_t q, i, j, s; |
132 |
register double rtmp; |
133 |
for (q = 0; q < len; q++) |
134 |
{ |
135 |
for (i = 0; i < A1; i++) |
136 |
{ |
137 |
for (j = 0; j < A2; j++) |
138 |
{ |
139 |
rtmp = 0; |
140 |
for (s = 0; s < B2; s++) |
141 |
rtmp += B[INDEX3(i, s, q, A1, B2)] * C[INDEX3(s, j, q, B2, A2)]; |
142 |
A[INDEX3(i, j, q, A1, A2)] = rtmp; |
143 |
} |
144 |
} |
145 |
} |
146 |
} |
147 |
|
148 |
/* multiplies a set of matries with a single matrix: */ |
149 |
|
150 |
/* A(1:A1,1:A2,i)=B(1:A1,1:B2,i)*C(1:B2,1:A2) i=1,len */ |
151 |
|
152 |
void Dudley_Util_SmallMatSetMult1(dim_t len, dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C) |
153 |
{ |
154 |
dim_t q, i, j, s; |
155 |
register double rtmp; |
156 |
for (q = 0; q < len; q++) |
157 |
{ |
158 |
for (i = 0; i < A1; i++) |
159 |
{ |
160 |
for (j = 0; j < A2; j++) |
161 |
{ |
162 |
rtmp = 0; |
163 |
for (s = 0; s < B2; s++) |
164 |
rtmp += B[INDEX3(i, s, q, A1, B2)] * C[INDEX2(s, j, B2)]; |
165 |
A[INDEX3(i, j, q, A1, A2)] = rtmp; |
166 |
} |
167 |
} |
168 |
} |
169 |
} |
170 |
|
171 |
/* inverts the set of dim x dim matrices A(:,:,1:len) with dim=1,2,3 */ |
172 |
/* the determinante is returned. */ |
173 |
|
174 |
void Dudley_Util_InvertSmallMat(dim_t len, dim_t dim, double *A, double *invA, double *det) |
175 |
{ |
176 |
dim_t q; |
177 |
register double D, A11, A12, A13, A21, A22, A23, A31, A32, A33; |
178 |
|
179 |
switch (dim) |
180 |
{ |
181 |
case 1: |
182 |
for (q = 0; q < len; q++) |
183 |
{ |
184 |
D = A[q]; |
185 |
if (ABS(D) > 0) |
186 |
{ |
187 |
det[q] = D; |
188 |
D = 1. / D; |
189 |
invA[q] = D; |
190 |
} |
191 |
else |
192 |
{ |
193 |
Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": Non-regular matrix"); |
194 |
return; |
195 |
} |
196 |
} |
197 |
break; |
198 |
|
199 |
case 2: |
200 |
for (q = 0; q < len; q++) |
201 |
{ |
202 |
A11 = A[INDEX3(0, 0, q, 2, 2)]; |
203 |
A12 = A[INDEX3(0, 1, q, 2, 2)]; |
204 |
A21 = A[INDEX3(1, 0, q, 2, 2)]; |
205 |
A22 = A[INDEX3(1, 1, q, 2, 2)]; |
206 |
|
207 |
D = A11 * A22 - A12 * A21; |
208 |
if (ABS(D) > 0) |
209 |
{ |
210 |
det[q] = D; |
211 |
D = 1. / D; |
212 |
invA[INDEX3(0, 0, q, 2, 2)] = A22 * D; |
213 |
invA[INDEX3(1, 0, q, 2, 2)] = -A21 * D; |
214 |
invA[INDEX3(0, 1, q, 2, 2)] = -A12 * D; |
215 |
invA[INDEX3(1, 1, q, 2, 2)] = A11 * D; |
216 |
} |
217 |
else |
218 |
{ |
219 |
Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": Non-regular matrix"); |
220 |
return; |
221 |
} |
222 |
} |
223 |
break; |
224 |
|
225 |
case 3: |
226 |
for (q = 0; q < len; q++) |
227 |
{ |
228 |
A11 = A[INDEX3(0, 0, q, 3, 3)]; |
229 |
A21 = A[INDEX3(1, 0, q, 3, 3)]; |
230 |
A31 = A[INDEX3(2, 0, q, 3, 3)]; |
231 |
A12 = A[INDEX3(0, 1, q, 3, 3)]; |
232 |
A22 = A[INDEX3(1, 1, q, 3, 3)]; |
233 |
A32 = A[INDEX3(2, 1, q, 3, 3)]; |
234 |
A13 = A[INDEX3(0, 2, q, 3, 3)]; |
235 |
A23 = A[INDEX3(1, 2, q, 3, 3)]; |
236 |
A33 = A[INDEX3(2, 2, q, 3, 3)]; |
237 |
|
238 |
D = A11 * (A22 * A33 - A23 * A32) + A12 * (A31 * A23 - A21 * A33) + A13 * (A21 * A32 - A31 * A22); |
239 |
if (ABS(D) > 0) |
240 |
{ |
241 |
det[q] = D; |
242 |
D = 1. / D; |
243 |
invA[INDEX3(0, 0, q, 3, 3)] = (A22 * A33 - A23 * A32) * D; |
244 |
invA[INDEX3(1, 0, q, 3, 3)] = (A31 * A23 - A21 * A33) * D; |
245 |
invA[INDEX3(2, 0, q, 3, 3)] = (A21 * A32 - A31 * A22) * D; |
246 |
invA[INDEX3(0, 1, q, 3, 3)] = (A13 * A32 - A12 * A33) * D; |
247 |
invA[INDEX3(1, 1, q, 3, 3)] = (A11 * A33 - A31 * A13) * D; |
248 |
invA[INDEX3(2, 1, q, 3, 3)] = (A12 * A31 - A11 * A32) * D; |
249 |
invA[INDEX3(0, 2, q, 3, 3)] = (A12 * A23 - A13 * A22) * D; |
250 |
invA[INDEX3(1, 2, q, 3, 3)] = (A13 * A21 - A11 * A23) * D; |
251 |
invA[INDEX3(2, 2, q, 3, 3)] = (A11 * A22 - A12 * A21) * D; |
252 |
} |
253 |
else |
254 |
{ |
255 |
Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": Non-regular matrix"); |
256 |
return; |
257 |
} |
258 |
} |
259 |
break; |
260 |
|
261 |
} |
262 |
return; |
263 |
} |
264 |
|
265 |
/* sets the derterminate of a set of dim x dim matrices A(:,:,1:len) with dim=1,2,3 */ |
266 |
|
267 |
void Dudley_Util_DetOfSmallMat(dim_t len, dim_t dim, double *A, double *det) |
268 |
{ |
269 |
dim_t q; |
270 |
register double A11, A12, A13, A21, A22, A23, A31, A32, A33; |
271 |
|
272 |
switch (dim) |
273 |
{ |
274 |
case 1: |
275 |
for (q = 0; q < len; q++) |
276 |
{ |
277 |
det[q] = A[q]; |
278 |
} |
279 |
break; |
280 |
|
281 |
case 2: |
282 |
for (q = 0; q < len; q++) |
283 |
{ |
284 |
A11 = A[INDEX3(0, 0, q, 2, 2)]; |
285 |
A12 = A[INDEX3(0, 1, q, 2, 2)]; |
286 |
A21 = A[INDEX3(1, 0, q, 2, 2)]; |
287 |
A22 = A[INDEX3(1, 1, q, 2, 2)]; |
288 |
|
289 |
det[q] = A11 * A22 - A12 * A21; |
290 |
} |
291 |
break; |
292 |
|
293 |
case 3: |
294 |
for (q = 0; q < len; q++) |
295 |
{ |
296 |
A11 = A[INDEX3(0, 0, q, 3, 3)]; |
297 |
A21 = A[INDEX3(1, 0, q, 3, 3)]; |
298 |
A31 = A[INDEX3(2, 0, q, 3, 3)]; |
299 |
A12 = A[INDEX3(0, 1, q, 3, 3)]; |
300 |
A22 = A[INDEX3(1, 1, q, 3, 3)]; |
301 |
A32 = A[INDEX3(2, 1, q, 3, 3)]; |
302 |
A13 = A[INDEX3(0, 2, q, 3, 3)]; |
303 |
A23 = A[INDEX3(1, 2, q, 3, 3)]; |
304 |
A33 = A[INDEX3(2, 2, q, 3, 3)]; |
305 |
|
306 |
det[q] = A11 * (A22 * A33 - A23 * A32) + A12 * (A31 * A23 - A21 * A33) + A13 * (A21 * A32 - A31 * A22); |
307 |
} |
308 |
break; |
309 |
|
310 |
} |
311 |
return; |
312 |
} |
313 |
|
314 |
/* returns the normalized vector Normal[dim,len] orthogonal to A(:,0,q) and A(:,1,q) in the case of dim=3 */ |
315 |
/* or the vector A(:,0,q) in the case of dim=2 */ |
316 |
|
317 |
void Dudley_NormalVector(dim_t len, dim_t dim, dim_t dim1, double *A, double *Normal) |
318 |
{ |
319 |
dim_t q; |
320 |
register double A11, A12, CO_A13, A21, A22, CO_A23, A31, A32, CO_A33, length, invlength; |
321 |
|
322 |
switch (dim) |
323 |
{ |
324 |
case 1: |
325 |
for (q = 0; q < len; q++) |
326 |
Normal[q] = 1; |
327 |
break; |
328 |
case 2: |
329 |
for (q = 0; q < len; q++) |
330 |
{ |
331 |
A11 = A[INDEX3(0, 0, q, 2, dim1)]; |
332 |
A21 = A[INDEX3(1, 0, q, 2, dim1)]; |
333 |
length = sqrt(A11 * A11 + A21 * A21); |
334 |
if (!length > 0) |
335 |
{ |
336 |
Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": area equals zero."); |
337 |
return; |
338 |
} |
339 |
else |
340 |
{ |
341 |
invlength = 1. / length; |
342 |
Normal[INDEX2(0, q, 2)] = A21 * invlength; |
343 |
Normal[INDEX2(1, q, 2)] = -A11 * invlength; |
344 |
} |
345 |
} |
346 |
break; |
347 |
case 3: |
348 |
for (q = 0; q < len; q++) |
349 |
{ |
350 |
A11 = A[INDEX3(0, 0, q, 3, dim1)]; |
351 |
A21 = A[INDEX3(1, 0, q, 3, dim1)]; |
352 |
A31 = A[INDEX3(2, 0, q, 3, dim1)]; |
353 |
A12 = A[INDEX3(0, 1, q, 3, dim1)]; |
354 |
A22 = A[INDEX3(1, 1, q, 3, dim1)]; |
355 |
A32 = A[INDEX3(2, 1, q, 3, dim1)]; |
356 |
CO_A13 = A21 * A32 - A31 * A22; |
357 |
CO_A23 = A31 * A12 - A11 * A32; |
358 |
CO_A33 = A11 * A22 - A21 * A12; |
359 |
length = sqrt(CO_A13 * CO_A13 + CO_A23 * CO_A23 + CO_A33 * CO_A33); |
360 |
if (!length > 0) |
361 |
{ |
362 |
Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": area equals zero."); |
363 |
return; |
364 |
} |
365 |
else |
366 |
{ |
367 |
invlength = 1. / length; |
368 |
Normal[INDEX2(0, q, 3)] = CO_A13 * invlength; |
369 |
Normal[INDEX2(1, q, 3)] = CO_A23 * invlength; |
370 |
Normal[INDEX2(2, q, 3)] = CO_A33 * invlength; |
371 |
} |
372 |
|
373 |
} |
374 |
break; |
375 |
|
376 |
} |
377 |
return; |
378 |
} |
379 |
|
380 |
/* return the length of the vector which is orthogonal to the vectors A(:,0,q) and A(:,1,q) in the case of dim=3 */ |
381 |
/* or the vector A(:,0,q) in the case of dim=2 */ |
382 |
|
383 |
void Dudley_LengthOfNormalVector(dim_t len, dim_t dim, dim_t dim1, double *A, double *length) |
384 |
{ |
385 |
dim_t q; |
386 |
double A11, A12, CO_A13, A21, A22, CO_A23, A31, A32, CO_A33; |
387 |
|
388 |
switch (dim) |
389 |
{ |
390 |
case 1: |
391 |
for (q = 0; q < len; q++) |
392 |
length[q] = 1; |
393 |
break; |
394 |
case 2: |
395 |
for (q = 0; q < len; q++) |
396 |
{ |
397 |
A11 = A[INDEX3(0, 0, q, 2, dim1)]; |
398 |
A21 = A[INDEX3(1, 0, q, 2, dim1)]; |
399 |
length[q] = sqrt(A11 * A11 + A21 * A21); |
400 |
} |
401 |
break; |
402 |
case 3: |
403 |
for (q = 0; q < len; q++) |
404 |
{ |
405 |
A11 = A[INDEX3(0, 0, q, 3, dim1)]; |
406 |
A21 = A[INDEX3(1, 0, q, 3, dim1)]; |
407 |
A31 = A[INDEX3(2, 0, q, 3, dim1)]; |
408 |
A12 = A[INDEX3(0, 1, q, 3, dim1)]; |
409 |
A22 = A[INDEX3(1, 1, q, 3, dim1)]; |
410 |
A32 = A[INDEX3(2, 1, q, 3, dim1)]; |
411 |
CO_A13 = A21 * A32 - A31 * A22; |
412 |
CO_A23 = A31 * A12 - A11 * A32; |
413 |
CO_A33 = A11 * A22 - A21 * A12; |
414 |
length[q] = sqrt(CO_A13 * CO_A13 + CO_A23 * CO_A23 + CO_A33 * CO_A33); |
415 |
} |
416 |
break; |
417 |
|
418 |
} |
419 |
return; |
420 |
} |
421 |
|
422 |
/* inverts the map map of length len */ |
423 |
/* there is no range checking! */ |
424 |
/* at output Map[invMap[i]]=i for i=0:lenInvMap */ |
425 |
|
426 |
void Dudley_Util_InvertMap(dim_t lenInvMap, index_t * invMap, dim_t lenMap, index_t * Map) |
427 |
{ |
428 |
dim_t i; |
429 |
for (i = 0; i < lenInvMap; i++) |
430 |
invMap[i] = 0; |
431 |
for (i = 0; i < lenMap; i++) |
432 |
{ |
433 |
if (Map[i] >= 0) |
434 |
invMap[Map[i]] = i; |
435 |
} |
436 |
} |
437 |
|
438 |
/* orders a Dudley_Util_ValueAndIndex array by value */ |
439 |
/* it is assumed that n is large */ |
440 |
|
441 |
int Dudley_Util_ValueAndIndex_compar(const void *arg1, const void *arg2) |
442 |
{ |
443 |
Dudley_Util_ValueAndIndex *e1, *e2; |
444 |
e1 = (Dudley_Util_ValueAndIndex *) arg1; |
445 |
e2 = (Dudley_Util_ValueAndIndex *) arg2; |
446 |
if (e1->value < e2->value) |
447 |
return -1; |
448 |
if (e1->value > e2->value) |
449 |
return 1; |
450 |
if (e1->index < e2->index) |
451 |
return -1; |
452 |
if (e1->index > e2->index) |
453 |
return 1; |
454 |
return 0; |
455 |
} |
456 |
|
457 |
void Dudley_Util_sortValueAndIndex(dim_t n, Dudley_Util_ValueAndIndex * array) |
458 |
{ |
459 |
/* OMP : needs parallelization ! */ |
460 |
qsort(array, n, sizeof(Dudley_Util_ValueAndIndex), Dudley_Util_ValueAndIndex_compar); |
461 |
} |
462 |
|
463 |
/************************************************************************************/ |
464 |
|
465 |
/* calculates the minimum value from a dim X N integer array */ |
466 |
|
467 |
index_t Dudley_Util_getMinInt(dim_t dim, dim_t N, index_t * values) |
468 |
{ |
469 |
dim_t i, j; |
470 |
index_t out, out_local; |
471 |
out = INDEX_T_MAX; |
472 |
if (values != NULL && dim * N > 0) |
473 |
{ |
474 |
out = values[0]; |
475 |
#pragma omp parallel private(out_local) |
476 |
{ |
477 |
out_local = out; |
478 |
#pragma omp for private(i,j) schedule(static) |
479 |
for (j = 0; j < N; j++) |
480 |
{ |
481 |
for (i = 0; i < dim; i++) |
482 |
out_local = MIN(out_local, values[INDEX2(i, j, dim)]); |
483 |
} |
484 |
#pragma omp critical |
485 |
out = MIN(out_local, out); |
486 |
} |
487 |
} |
488 |
return out; |
489 |
} |
490 |
|
491 |
/* calculates the maximum value from a dim X N integer array */ |
492 |
|
493 |
index_t Dudley_Util_getMaxInt(dim_t dim, dim_t N, index_t * values) |
494 |
{ |
495 |
dim_t i, j; |
496 |
index_t out, out_local; |
497 |
out = -INDEX_T_MAX; |
498 |
if (values != NULL && dim * N > 0) |
499 |
{ |
500 |
out = values[0]; |
501 |
#pragma omp parallel private(out_local) |
502 |
{ |
503 |
out_local = out; |
504 |
#pragma omp for private(i,j) schedule(static) |
505 |
for (j = 0; j < N; j++) |
506 |
{ |
507 |
for (i = 0; i < dim; i++) |
508 |
{ |
509 |
out_local = MAX(out_local, values[INDEX2(i, j, dim)]); |
510 |
|
511 |
} |
512 |
} |
513 |
#pragma omp critical |
514 |
out = MAX(out_local, out); |
515 |
} |
516 |
} |
517 |
return out; |
518 |
} |
519 |
|
520 |
/************************************************************************************/ |
521 |
|
522 |
/* calculates the minimum value from a dim X N integer array */ |
523 |
|
524 |
index_t Dudley_Util_getFlaggedMinInt(dim_t dim, dim_t N, index_t * values, index_t ignore) |
525 |
{ |
526 |
dim_t i, j; |
527 |
index_t out, out_local; |
528 |
out = INDEX_T_MAX; |
529 |
if (values != NULL && dim * N > 0) |
530 |
{ |
531 |
out = values[0]; |
532 |
#pragma omp parallel private(out_local) |
533 |
{ |
534 |
out_local = out; |
535 |
#pragma omp for private(i,j) schedule(static) |
536 |
for (j = 0; j < N; j++) |
537 |
{ |
538 |
for (i = 0; i < dim; i++) |
539 |
if (values[INDEX2(i, j, dim)] != ignore) |
540 |
out_local = MIN(out_local, values[INDEX2(i, j, dim)]); |
541 |
} |
542 |
#pragma omp critical |
543 |
out = MIN(out_local, out); |
544 |
} |
545 |
} |
546 |
return out; |
547 |
} |
548 |
|
549 |
/* calculates the maximum value from a dim X N integer array */ |
550 |
|
551 |
index_t Dudley_Util_getFlaggedMaxInt(dim_t dim, dim_t N, index_t * values, index_t ignore) |
552 |
{ |
553 |
dim_t i, j; |
554 |
index_t out, out_local; |
555 |
out = -INDEX_T_MAX; |
556 |
if (values != NULL && dim * N > 0) |
557 |
{ |
558 |
out = values[0]; |
559 |
#pragma omp parallel private(out_local) |
560 |
{ |
561 |
out_local = out; |
562 |
#pragma omp for private(i,j) schedule(static) |
563 |
for (j = 0; j < N; j++) |
564 |
{ |
565 |
for (i = 0; i < dim; i++) |
566 |
if (values[INDEX2(i, j, dim)] != ignore) |
567 |
out_local = MAX(out_local, values[INDEX2(i, j, dim)]); |
568 |
} |
569 |
#pragma omp critical |
570 |
out = MAX(out_local, out); |
571 |
} |
572 |
} |
573 |
return out; |
574 |
} |
575 |
|
576 |
/* set the index of the positive entries in mask. The length of index is returned. */ |
577 |
|
578 |
dim_t Dudley_Util_packMask(dim_t N, index_t * mask, index_t * index) |
579 |
{ |
580 |
dim_t out, k; |
581 |
out = 0; |
582 |
/*OMP */ |
583 |
for (k = 0; k < N; k++) |
584 |
{ |
585 |
if (mask[k] >= 0) |
586 |
{ |
587 |
index[out] = k; |
588 |
out++; |
589 |
} |
590 |
} |
591 |
return out; |
592 |
} |
593 |
|
594 |
/* returns true if array contains value */ |
595 |
bool_t Dudley_Util_isAny(dim_t N, index_t * array, index_t value) |
596 |
{ |
597 |
bool_t out = FALSE; |
598 |
dim_t i; |
599 |
#pragma omp parallel for private(i) schedule(static) reduction(||:out) |
600 |
for (i = 0; i < N; i++) |
601 |
out = out || (array[i] == value); |
602 |
return out; |
603 |
} |
604 |
|
605 |
/* calculates the cummultative sum in array and returns the total sum */ |
606 |
index_t Dudley_Util_cumsum(dim_t N, index_t * array) |
607 |
{ |
608 |
index_t out = 0, tmp; |
609 |
dim_t i; |
610 |
#ifdef _OPENMP |
611 |
index_t *partial_sums = NULL, sum; |
612 |
partial_sums = new index_t[omp_get_max_threads()]; |
613 |
#pragma omp parallel private(sum,i,tmp) |
614 |
{ |
615 |
sum = 0; |
616 |
#pragma omp for schedule(static) |
617 |
for (i = 0; i < N; ++i) |
618 |
sum += array[i]; |
619 |
partial_sums[omp_get_thread_num()] = sum; |
620 |
#pragma omp barrier |
621 |
#pragma omp master |
622 |
{ |
623 |
out = 0; |
624 |
for (i = 0; i < omp_get_max_threads(); ++i) |
625 |
{ |
626 |
tmp = out; |
627 |
out += partial_sums[i]; |
628 |
partial_sums[i] = tmp; |
629 |
} |
630 |
} |
631 |
#pragma omp barrier |
632 |
sum = partial_sums[omp_get_thread_num()]; |
633 |
#pragma omp for schedule(static) |
634 |
for (i = 0; i < N; ++i) |
635 |
{ |
636 |
tmp = sum; |
637 |
sum += array[i]; |
638 |
array[i] = tmp; |
639 |
} |
640 |
} |
641 |
delete[] partial_sums; |
642 |
#else |
643 |
for (i = 0; i < N; ++i) |
644 |
{ |
645 |
tmp = out; |
646 |
out += array[i]; |
647 |
array[i] = tmp; |
648 |
} |
649 |
#endif |
650 |
return out; |
651 |
} |
652 |
|
653 |
void Dudley_Util_setValuesInUse(const index_t * values, const dim_t numValues, dim_t * numValuesInUse, |
654 |
index_t ** valuesInUse, Esys_MPIInfo * mpiinfo) |
655 |
{ |
656 |
dim_t i; |
657 |
index_t lastFoundValue = INDEX_T_MIN, minFoundValue, local_minFoundValue, *newValuesInUse = NULL; |
658 |
register index_t itmp; |
659 |
bool_t allFound = FALSE; |
660 |
dim_t nv = 0; |
661 |
|
662 |
while (!allFound) |
663 |
{ |
664 |
/* |
665 |
* find smallest value bigger than lastFoundValue |
666 |
*/ |
667 |
minFoundValue = INDEX_T_MAX; |
668 |
#pragma omp parallel private(local_minFoundValue) |
669 |
{ |
670 |
local_minFoundValue = minFoundValue; |
671 |
#pragma omp for private(i,itmp) schedule(static) |
672 |
for (i = 0; i < numValues; i++) |
673 |
{ |
674 |
itmp = values[i]; |
675 |
if ((itmp > lastFoundValue) && (itmp < local_minFoundValue)) |
676 |
local_minFoundValue = itmp; |
677 |
} |
678 |
#pragma omp critical |
679 |
{ |
680 |
if (local_minFoundValue < minFoundValue) |
681 |
minFoundValue = local_minFoundValue; |
682 |
} |
683 |
|
684 |
} |
685 |
#ifdef ESYS_MPI |
686 |
local_minFoundValue = minFoundValue; |
687 |
MPI_Allreduce(&local_minFoundValue, &minFoundValue, 1, MPI_INT, MPI_MIN, mpiinfo->comm); |
688 |
#endif |
689 |
/* if we found a new tag we need to add this too the valuesInUseList */ |
690 |
|
691 |
if (minFoundValue < INDEX_T_MAX) |
692 |
{ |
693 |
newValuesInUse = MEMALLOC(nv + 1, index_t); |
694 |
if (*valuesInUse != NULL) |
695 |
{ |
696 |
memcpy(newValuesInUse, *valuesInUse, sizeof(index_t) * nv); |
697 |
MEMFREE(*valuesInUse); |
698 |
} |
699 |
newValuesInUse[nv] = minFoundValue; |
700 |
*valuesInUse = newValuesInUse; |
701 |
newValuesInUse = NULL; |
702 |
nv++; |
703 |
lastFoundValue = minFoundValue; |
704 |
} |
705 |
else |
706 |
{ |
707 |
allFound = TRUE; |
708 |
} |
709 |
} |
710 |
*numValuesInUse = nv; |
711 |
} |
712 |
|
713 |
#ifdef ESYS_MPI |
714 |
void Dudley_printDoubleArray(FILE * fid, dim_t n, double *array, char *name) |
715 |
{ |
716 |
index_t i; |
717 |
|
718 |
if (name) |
719 |
fprintf(fid, "%s [ ", name); |
720 |
else |
721 |
fprintf(fid, "[ "); |
722 |
for (i = 0; i < (n < 60 ? n : 60); i++) |
723 |
fprintf(fid, "%g ", array[i]); |
724 |
if (n >= 30) |
725 |
fprintf(fid, "... "); |
726 |
fprintf(fid, "]\n"); |
727 |
} |
728 |
|
729 |
void Dudley_printIntArray(FILE * fid, dim_t n, int *array, char *name) |
730 |
{ |
731 |
index_t i; |
732 |
|
733 |
if (name) |
734 |
fprintf(fid, "%s [ ", name); |
735 |
else |
736 |
fprintf(fid, "[ "); |
737 |
for (i = 0; i < (n < 60 ? n : 60); i++) |
738 |
fprintf(fid, "%d ", array[i]); |
739 |
if (n >= 30) |
740 |
fprintf(fid, "... "); |
741 |
fprintf(fid, "]\n"); |
742 |
} |
743 |
|
744 |
void Dudley_printMaskArray(FILE * fid, dim_t n, int *array, char *name) |
745 |
{ |
746 |
index_t i; |
747 |
|
748 |
if (name) |
749 |
fprintf(fid, "%s [ ", name); |
750 |
else |
751 |
fprintf(fid, "[ "); |
752 |
for (i = 0; i < (n < 60 ? n : 60); i++) |
753 |
if (array[i] != -1) |
754 |
fprintf(fid, "%3d ", array[i]); |
755 |
else |
756 |
fprintf(fid, " * "); |
757 |
if (n >= 30) |
758 |
fprintf(fid, "... "); |
759 |
fprintf(fid, "]\n"); |
760 |
} |
761 |
#endif |