1 |
|
2 |
/***************************************************************************** |
3 |
* |
4 |
* Copyright (c) 2003-2013 by University of Queensland |
5 |
* http://www.uq.edu.au |
6 |
* |
7 |
* Primary Business: Queensland, Australia |
8 |
* Licensed under the Open Software License version 3.0 |
9 |
* http://www.opensource.org/licenses/osl-3.0.php |
10 |
* |
11 |
* Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 |
* Development since 2012 by School of Earth Sciences |
13 |
* |
14 |
*****************************************************************************/ |
15 |
|
16 |
|
17 |
/************************************************************************************/ |
18 |
|
19 |
/* Paso: Pattern_unrollBlocks */ |
20 |
|
21 |
/************************************************************************************/ |
22 |
|
23 |
/* Author: Lutz Gross, l.gross@uq.edu.au */ |
24 |
|
25 |
/************************************************************************************/ |
26 |
|
27 |
#include "Paso.h" |
28 |
#include "Pattern.h" |
29 |
|
30 |
/************************************************************************************/ |
31 |
|
32 |
/* creates Pattern */ |
33 |
|
34 |
Paso_Pattern* Paso_Pattern_unrollBlocks(Paso_Pattern* pattern, \ |
35 |
int type, dim_t output_block_size,dim_t input_block_size) { |
36 |
Paso_Pattern*out=NULL; |
37 |
index_t *ptr=NULL,*index=NULL,iPtr; |
38 |
dim_t i,j,k, block_size, new_len, new_numOutput, new_numInput; |
39 |
const index_t index_offset_in=(pattern->type & MATRIX_FORMAT_OFFSET1 ? 1:0); |
40 |
const index_t index_offset_out=(type & MATRIX_FORMAT_OFFSET1 ? 1:0); |
41 |
|
42 |
|
43 |
Esys_resetError(); |
44 |
if ( ( output_block_size == 1 ) && (input_block_size == 1) && ((pattern->type & MATRIX_FORMAT_OFFSET1) == (type & MATRIX_FORMAT_OFFSET1) ) ) { |
45 |
out = Paso_Pattern_getReference(pattern); |
46 |
} else { |
47 |
block_size=output_block_size*input_block_size; |
48 |
new_len=(pattern->len)*block_size; |
49 |
new_numOutput=(pattern->numOutput)*output_block_size; |
50 |
new_numInput=(pattern->numInput)*input_block_size; |
51 |
|
52 |
ptr=new index_t[new_numOutput+1]; |
53 |
index=new index_t[new_len]; |
54 |
if (! ( Esys_checkPtr(ptr) || Esys_checkPtr(index) ) ) { |
55 |
#pragma omp parallel |
56 |
{ |
57 |
#pragma omp for private(i) schedule(static) |
58 |
for (i=0;i<new_numOutput+1;++i) ptr[i]=index_offset_out; |
59 |
|
60 |
#pragma omp single |
61 |
ptr[new_numOutput]=new_len+index_offset_out; |
62 |
|
63 |
#pragma omp for private(i,k) schedule(static) |
64 |
for (i=0;i<pattern->numOutput;++i) { |
65 |
for (k=0;k<output_block_size;++k) { |
66 |
ptr[i*output_block_size+k]=(pattern->ptr[i]-index_offset_in)*block_size+ |
67 |
(pattern->ptr[i+1]-pattern->ptr[i])*input_block_size*k+index_offset_out; |
68 |
} |
69 |
} |
70 |
|
71 |
#pragma omp for private(i,iPtr) schedule(static) |
72 |
for (i=0;i<new_numOutput;++i) { |
73 |
#pragma ivdep |
74 |
for (iPtr=ptr[i]-index_offset_out;iPtr<ptr[i+1]-index_offset_out;++iPtr) index[iPtr]=index_offset_out; |
75 |
} |
76 |
|
77 |
#pragma omp for private(i,j,iPtr,k) schedule(static) |
78 |
for (i=0;i<pattern->numOutput;++i) { |
79 |
for (iPtr=pattern->ptr[i]-index_offset_in;iPtr<pattern->ptr[i+1]-index_offset_in;++iPtr) { |
80 |
for (k=0;k<output_block_size;++k) { |
81 |
#pragma ivdep |
82 |
for (j=0;j<input_block_size;++j) { |
83 |
index[ptr[i*output_block_size+k]-index_offset_out+(iPtr-(pattern->ptr[i]-index_offset_in))*input_block_size+j]=(pattern->index[iPtr]-index_offset_in)*input_block_size+j+index_offset_out; |
84 |
} |
85 |
} |
86 |
} |
87 |
} |
88 |
} |
89 |
out=Paso_Pattern_alloc(type,new_numOutput,new_numInput,ptr,index); |
90 |
} |
91 |
if (! Esys_noError()) { |
92 |
delete[] index; |
93 |
delete[] ptr; |
94 |
} |
95 |
} |
96 |
return out; |
97 |
} |