1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: Pattern_unrollBlocks */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: Lutz Gross, l.gross@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "Paso.h" |
26 |
#include "Pattern.h" |
27 |
|
28 |
/**************************************************************/ |
29 |
|
30 |
/* creates Pattern */ |
31 |
|
32 |
Paso_Pattern* Paso_Pattern_unrollBlocks(Paso_Pattern* pattern, \ |
33 |
int type, dim_t output_block_size,dim_t input_block_size) { |
34 |
Paso_Pattern*out=NULL; |
35 |
index_t *ptr=NULL,*index=NULL,iPtr; |
36 |
dim_t i,j,k, block_size, new_len, new_numOutput, new_numInput; |
37 |
const index_t index_offset_in=(pattern->type & MATRIX_FORMAT_OFFSET1 ? 1:0); |
38 |
const index_t index_offset_out=(type & MATRIX_FORMAT_OFFSET1 ? 1:0); |
39 |
|
40 |
|
41 |
Esys_resetError(); |
42 |
if ( ( output_block_size == 1 ) && (input_block_size == 1) && ((pattern->type & MATRIX_FORMAT_OFFSET1) == (type & MATRIX_FORMAT_OFFSET1) ) ) { |
43 |
out = Paso_Pattern_getReference(pattern); |
44 |
} else { |
45 |
block_size=output_block_size*input_block_size; |
46 |
new_len=(pattern->len)*block_size; |
47 |
new_numOutput=(pattern->numOutput)*output_block_size; |
48 |
new_numInput=(pattern->numInput)*input_block_size; |
49 |
|
50 |
ptr=MEMALLOC(new_numOutput+1,index_t); |
51 |
index=MEMALLOC(new_len,index_t); |
52 |
if (! ( Esys_checkPtr(ptr) || Esys_checkPtr(index) ) ) { |
53 |
#pragma omp parallel |
54 |
{ |
55 |
#pragma omp for private(i) schedule(static) |
56 |
for (i=0;i<new_numOutput+1;++i) ptr[i]=index_offset_out; |
57 |
|
58 |
#pragma omp single |
59 |
ptr[new_numOutput]=new_len+index_offset_out; |
60 |
|
61 |
#pragma omp for private(i,k) schedule(static) |
62 |
for (i=0;i<pattern->numOutput;++i) { |
63 |
for (k=0;k<output_block_size;++k) { |
64 |
ptr[i*output_block_size+k]=(pattern->ptr[i]-index_offset_in)*block_size+ |
65 |
(pattern->ptr[i+1]-pattern->ptr[i])*input_block_size*k+index_offset_out; |
66 |
} |
67 |
} |
68 |
|
69 |
#pragma omp for private(i,iPtr) schedule(static) |
70 |
for (i=0;i<new_numOutput;++i) { |
71 |
#pragma ivdep |
72 |
for (iPtr=ptr[i]-index_offset_out;iPtr<ptr[i+1]-index_offset_out;++iPtr) index[iPtr]=index_offset_out; |
73 |
} |
74 |
|
75 |
#pragma omp for private(i,j,iPtr,k) schedule(static) |
76 |
for (i=0;i<pattern->numOutput;++i) { |
77 |
for (iPtr=pattern->ptr[i]-index_offset_in;iPtr<pattern->ptr[i+1]-index_offset_in;++iPtr) { |
78 |
for (k=0;k<output_block_size;++k) { |
79 |
#pragma ivdep |
80 |
for (j=0;j<input_block_size;++j) { |
81 |
index[ptr[i*output_block_size+k]-index_offset_out+(iPtr-(pattern->ptr[i]-index_offset_in))*input_block_size+j]=(pattern->index[iPtr]-index_offset_in)*input_block_size+j+index_offset_out; |
82 |
} |
83 |
} |
84 |
} |
85 |
} |
86 |
} |
87 |
out=Paso_Pattern_alloc(type,new_numOutput,new_numInput,ptr,index); |
88 |
} |
89 |
if (! Esys_noError()) { |
90 |
MEMFREE(index); |
91 |
MEMFREE(ptr); |
92 |
} |
93 |
} |
94 |
return out; |
95 |
} |