1 |
ksteube |
1313 |
|
2 |
|
|
/******************************************************* |
3 |
ksteube |
1811 |
* |
4 |
|
|
* Copyright (c) 2003-2008 by University of Queensland |
5 |
|
|
* Earth Systems Science Computational Center (ESSCC) |
6 |
|
|
* http://www.uq.edu.au/esscc |
7 |
|
|
* |
8 |
|
|
* Primary Business: Queensland, Australia |
9 |
|
|
* Licensed under the Open Software License version 3.0 |
10 |
|
|
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
|
|
* |
12 |
|
|
*******************************************************/ |
13 |
ksteube |
1313 |
|
14 |
ksteube |
1811 |
|
15 |
ksteube |
1313 |
/**************************************************************/ |
16 |
|
|
|
17 |
|
|
/* Paso: Pattern */ |
18 |
|
|
|
19 |
|
|
/**************************************************************/ |
20 |
|
|
|
21 |
|
|
/* Author: gross@access.edu.au */ |
22 |
|
|
|
23 |
|
|
/**************************************************************/ |
24 |
|
|
|
25 |
|
|
#include "Paso.h" |
26 |
|
|
#include "Pattern.h" |
27 |
|
|
|
28 |
|
|
/**************************************************************/ |
29 |
|
|
|
30 |
|
|
/* allocates a Pattern */ |
31 |
|
|
|
32 |
gross |
1736 |
Paso_Pattern* Paso_Pattern_alloc(int type, dim_t input_block_size, dim_t output_block_size, dim_t numOutput, dim_t numInput, index_t* ptr, index_t* index) { |
33 |
ksteube |
1313 |
Paso_Pattern*out=NULL; |
34 |
|
|
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
35 |
|
|
index_t loc_min_index,loc_max_index,min_index=index_offset,max_index=index_offset-1; |
36 |
|
|
dim_t i, sum=0; |
37 |
|
|
Paso_resetError(); |
38 |
|
|
|
39 |
|
|
if (type & PATTERN_FORMAT_SYM) { |
40 |
|
|
Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: symmetric matrix pattern is not supported yet"); |
41 |
|
|
return NULL; |
42 |
|
|
} |
43 |
|
|
if (ptr!=NULL && index != NULL) { |
44 |
|
|
#pragma omp parallel private(loc_min_index,loc_max_index,i) |
45 |
|
|
{ |
46 |
|
|
loc_min_index=index_offset; |
47 |
|
|
loc_max_index=index_offset-1; |
48 |
|
|
if (type & PATTERN_FORMAT_OFFSET1) { |
49 |
|
|
#pragma omp for schedule(static) |
50 |
|
|
for (i=0;i<numOutput;++i) { |
51 |
|
|
if (ptr[i]<ptr[i+1]) { |
52 |
|
|
#ifdef USE_QSORTG |
53 |
|
|
qsortG(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
54 |
|
|
#else |
55 |
|
|
qsort(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
56 |
|
|
#endif |
57 |
|
|
loc_min_index=MIN(loc_min_index,index[ptr[i]-1]); |
58 |
|
|
loc_max_index=MAX(loc_max_index,index[ptr[i+1]-2]); |
59 |
|
|
} |
60 |
|
|
} |
61 |
|
|
} else { |
62 |
|
|
#pragma omp for schedule(static) |
63 |
|
|
for (i=0;i<numOutput;++i) { |
64 |
|
|
if (ptr[i]<ptr[i+1]) { |
65 |
|
|
#ifdef USE_QSORTG |
66 |
|
|
qsortG(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
67 |
|
|
#else |
68 |
|
|
qsort(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
69 |
|
|
#endif |
70 |
|
|
loc_min_index=MIN(loc_min_index,index[ptr[i]]); |
71 |
|
|
loc_max_index=MAX(loc_max_index,index[ptr[i+1]-1]); |
72 |
|
|
} |
73 |
|
|
} |
74 |
|
|
} |
75 |
|
|
#pragma omp critical |
76 |
|
|
{ |
77 |
|
|
min_index=MIN(loc_min_index,min_index); |
78 |
|
|
max_index=MAX(loc_max_index,max_index); |
79 |
|
|
} |
80 |
|
|
} |
81 |
gross |
1736 |
if ( (min_index<index_offset) || (max_index>=numInput+index_offset) ) { |
82 |
|
|
Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: Pattern index out of range."); |
83 |
ksteube |
1313 |
return NULL; |
84 |
|
|
} |
85 |
|
|
} |
86 |
|
|
out=MEMALLOC(1,Paso_Pattern); |
87 |
|
|
if (! Paso_checkPtr(out)) { |
88 |
|
|
out->type=type; |
89 |
|
|
out->reference_counter=1; |
90 |
|
|
out->numOutput=numOutput; |
91 |
gross |
1736 |
out->numInput=numInput; |
92 |
ksteube |
1313 |
out->ptr=ptr; |
93 |
|
|
out->index=index; |
94 |
|
|
out->input_block_size=input_block_size; |
95 |
|
|
out->output_block_size=output_block_size; |
96 |
|
|
out->block_size=out->input_block_size * out->output_block_size; |
97 |
|
|
if (out->ptr == NULL) { |
98 |
|
|
out->len=0; |
99 |
|
|
} else { |
100 |
|
|
out->len=out->ptr[out->numOutput] - index_offset; |
101 |
|
|
} |
102 |
|
|
} |
103 |
|
|
#ifdef Paso_TRACE |
104 |
|
|
printf("Paso_Pattern_alloc: system matrix pattern as been allocated.\n"); |
105 |
|
|
#endif |
106 |
|
|
return out; |
107 |
|
|
} |
108 |
|
|
|
109 |
|
|
/* returns a reference to in */ |
110 |
|
|
|
111 |
|
|
Paso_Pattern* Paso_Pattern_getReference(Paso_Pattern* in) { |
112 |
|
|
if (in!=NULL) { |
113 |
|
|
++(in->reference_counter); |
114 |
|
|
} |
115 |
|
|
return in; |
116 |
|
|
} |
117 |
|
|
|
118 |
|
|
/* deallocates a Pattern: */ |
119 |
|
|
|
120 |
|
|
void Paso_Pattern_free(Paso_Pattern* in) { |
121 |
|
|
if (in!=NULL) { |
122 |
|
|
in->reference_counter--; |
123 |
|
|
if (in->reference_counter<=0) { |
124 |
|
|
MEMFREE(in->ptr); |
125 |
|
|
MEMFREE(in->index); |
126 |
|
|
MEMFREE(in); |
127 |
|
|
#ifdef Paso_TRACE |
128 |
|
|
printf("Paso_Pattern_free: pattern as been deallocated.\n"); |
129 |
|
|
#endif |
130 |
|
|
} |
131 |
|
|
} |
132 |
|
|
} |
133 |
|
|
/* *************************************************************/ |
134 |
|
|
|
135 |
|
|
/* some routines which help to get the matrix pattern from elements: */ |
136 |
|
|
|
137 |
|
|
/* this routine is used by qsort called in Paso_Pattern_alloc */ |
138 |
|
|
|
139 |
|
|
int Paso_comparIndex(const void *index1,const void *index2){ |
140 |
|
|
index_t Iindex1,Iindex2; |
141 |
|
|
Iindex1=*(index_t*)index1; |
142 |
|
|
Iindex2=*(index_t*)index2; |
143 |
|
|
if (Iindex1<Iindex2) { |
144 |
|
|
return -1; |
145 |
|
|
} else { |
146 |
|
|
if (Iindex1>Iindex2) { |
147 |
|
|
return 1; |
148 |
|
|
} else { |
149 |
|
|
return 0; |
150 |
|
|
} |
151 |
|
|
} |
152 |
|
|
} |
153 |
|
|
|
154 |
|
|
bool_t Paso_Pattern_isEmpty(Paso_Pattern* in) { |
155 |
|
|
if (in != NULL) { |
156 |
|
|
if ((in->ptr != NULL) && (in->index != NULL)) return FALSE; |
157 |
|
|
} |
158 |
|
|
return TRUE; |
159 |
|
|
} |
160 |
artak |
1881 |
|
161 |
|
|
|
162 |
|
|
/* computes the pattern coming from matrix-matrix multiplication |
163 |
|
|
* |
164 |
|
|
**/ |
165 |
|
|
|
166 |
|
|
Paso_Pattern* Paso_Pattern_multiply(int type, Paso_Pattern* A, Paso_Pattern* B) { |
167 |
|
|
Paso_Pattern*out=NULL; |
168 |
|
|
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
169 |
|
|
index_t iptrA,iptrB; |
170 |
|
|
dim_t i,j,k; |
171 |
|
|
Paso_IndexList* index_list=NULL; |
172 |
|
|
|
173 |
|
|
index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList); |
174 |
|
|
if (! Paso_checkPtr(index_list)) { |
175 |
|
|
|
176 |
|
|
#pragma omp parallel private(i) |
177 |
|
|
{ |
178 |
|
|
#pragma omp for schedule(static) |
179 |
|
|
for(i=0;i<A->numOutput;++i) { |
180 |
|
|
index_list[i].extension=NULL; |
181 |
|
|
index_list[i].n=0; |
182 |
|
|
} |
183 |
|
|
} |
184 |
|
|
} |
185 |
|
|
|
186 |
|
|
for(i = 0; i < A->numOutput; i++) { |
187 |
|
|
for(iptrA = A->ptr[i]; iptrA < A->ptr[i+1]; ++iptrA) { |
188 |
|
|
j = A->index[iptrA]; |
189 |
|
|
for(iptrB = B->ptr[j]; iptrB < B->ptr[j+1]; ++iptrB) { |
190 |
|
|
k = B->index[iptrB]; |
191 |
|
|
if(i==k) { |
192 |
|
|
Finley_IndexList_insertIndex(&(index_list[i]),k); |
193 |
|
|
break; |
194 |
|
|
} |
195 |
|
|
} |
196 |
|
|
} |
197 |
|
|
} |
198 |
|
|
|
199 |
|
|
out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,INDEXLIST_LENGTH,0); |
200 |
|
|
|
201 |
|
|
#ifdef Paso_TRACE |
202 |
|
|
printf("Paso_Pattern_multipy: new pattern has been allocated.\n"); |
203 |
|
|
#endif |
204 |
|
|
|
205 |
|
|
/* clean up */ |
206 |
|
|
if (index_list!=NULL) { |
207 |
|
|
#pragma omp parallel for private(i) |
208 |
|
|
for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
209 |
|
|
} |
210 |
|
|
TMPMEMFREE(index_list); |
211 |
|
|
|
212 |
|
|
return out; |
213 |
|
|
} |
214 |
|
|
|
215 |
|
|
|
216 |
|
|
|
217 |
|
|
/* |
218 |
|
|
* Computes the pattern of C = A binary operation B for CSR matrices A,B |
219 |
|
|
* |
220 |
|
|
* Note: we do not check whether A_ij(op)B_ij=0 |
221 |
|
|
* |
222 |
|
|
*/ |
223 |
|
|
Paso_Pattern* Paso_Pattern_binop(int type, Paso_Pattern* A, Paso_Pattern* B) { |
224 |
|
|
Paso_Pattern*out=NULL; |
225 |
|
|
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
226 |
|
|
index_t iptrA,iptrB,*A_row=NULL,*B_row=NULL; |
227 |
|
|
dim_t i,j,k,temp,istart,length; |
228 |
|
|
|
229 |
|
|
Paso_IndexList* index_list=NULL; |
230 |
|
|
|
231 |
|
|
index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList); |
232 |
|
|
if (! Paso_checkPtr(index_list)) { |
233 |
|
|
|
234 |
|
|
#pragma omp parallel private(i) |
235 |
|
|
{ |
236 |
|
|
#pragma omp for schedule(static) |
237 |
|
|
for(i=0;i<A->numOutput;++i) { |
238 |
|
|
index_list[i].extension=NULL; |
239 |
|
|
index_list[i].n=0; |
240 |
|
|
} |
241 |
|
|
} |
242 |
|
|
} |
243 |
|
|
|
244 |
|
|
for(i = 0; i < A->numOutput; i++){ |
245 |
|
|
iptrA = A->ptr[i], |
246 |
|
|
iptrB = B->ptr[i]; |
247 |
|
|
while (iptrA < A->ptr[i+1] && iptrB < B->ptr[i+1]) { |
248 |
|
|
j = A->index[iptrA]; |
249 |
|
|
k = B->index[iptrB]; |
250 |
|
|
if (j<k) { |
251 |
|
|
Finley_IndexList_insertIndex(&(index_list[i]),j); |
252 |
|
|
iptrA++; |
253 |
|
|
} else if (j>k) { |
254 |
|
|
Finley_IndexList_insertIndex(&(index_list[i]),k); |
255 |
|
|
iptrB++; |
256 |
|
|
} else if (j==k) { |
257 |
|
|
Finley_IndexList_insertIndex(&(index_list[i]),j); |
258 |
|
|
iptrB++; |
259 |
|
|
iptrA++; |
260 |
|
|
} |
261 |
|
|
} |
262 |
|
|
while(iptrA < A->ptr[i+1]) { |
263 |
|
|
j = A->index[iptrA]; |
264 |
|
|
Finley_IndexList_insertIndex(&(index_list[i]),j); |
265 |
|
|
iptrA++; |
266 |
|
|
} |
267 |
|
|
while(iptrB < B->ptr[i+1]) { |
268 |
|
|
k = B->index[iptrB]; |
269 |
|
|
Finley_IndexList_insertIndex(&(index_list[i]),k); |
270 |
|
|
iptrB++; |
271 |
|
|
} |
272 |
|
|
} |
273 |
|
|
|
274 |
|
|
out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,INDEXLIST_LENGTH,0); |
275 |
|
|
|
276 |
|
|
#ifdef Paso_TRACE |
277 |
|
|
printf("Paso_Pattern_multipy: new pattern has been allocated.\n"); |
278 |
|
|
#endif |
279 |
|
|
|
280 |
|
|
/* clean up */ |
281 |
|
|
if (index_list!=NULL) { |
282 |
|
|
#pragma omp parallel for private(i) |
283 |
|
|
for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
284 |
|
|
} |
285 |
|
|
TMPMEMFREE(index_list); |
286 |
|
|
|
287 |
|
|
return out; |
288 |
|
|
} |
289 |
|
|
|
290 |
|
|
/* inserts row index row into the Paso_IndexList in if it does not exist */ |
291 |
|
|
|
292 |
|
|
void Paso_IndexList_insertIndex(Paso_IndexList* in, index_t index) { |
293 |
|
|
dim_t i; |
294 |
|
|
/* is index in in? */ |
295 |
|
|
for (i=0;i<in->n;i++) { |
296 |
|
|
if (in->index[i]==index) return; |
297 |
|
|
} |
298 |
|
|
/* index could not be found */ |
299 |
|
|
if (in->n==INDEXLIST_LENGTH) { |
300 |
|
|
/* if in->index is full check the extension */ |
301 |
|
|
if (in->extension==NULL) { |
302 |
|
|
in->extension=TMPMEMALLOC(1,Paso_IndexList); |
303 |
|
|
if (Paso_checkPtr(in->extension)) return; |
304 |
|
|
in->extension->n=0; |
305 |
|
|
in->extension->extension=NULL; |
306 |
|
|
} |
307 |
|
|
Paso_IndexList_insertIndex(in->extension,index); |
308 |
|
|
} else { |
309 |
|
|
/* insert index into in->index*/ |
310 |
|
|
in->index[in->n]=index; |
311 |
|
|
in->n++; |
312 |
|
|
} |
313 |
|
|
} |
314 |
|
|
|
315 |
|
|
/* counts the number of row indices in the Paso_IndexList in */ |
316 |
|
|
|
317 |
|
|
dim_t Paso_IndexList_count(Paso_IndexList* in, index_t range_min,index_t range_max) { |
318 |
|
|
dim_t i; |
319 |
|
|
dim_t out=0; |
320 |
|
|
register index_t itmp; |
321 |
|
|
if (in==NULL) { |
322 |
|
|
return 0; |
323 |
|
|
} else { |
324 |
|
|
for (i=0;i<in->n;i++) { |
325 |
|
|
itmp=in->index[i]; |
326 |
|
|
if ((itmp>=range_min) && (range_max>itmp)) ++out; |
327 |
|
|
} |
328 |
|
|
return out+Paso_IndexList_count(in->extension, range_min,range_max); |
329 |
|
|
} |
330 |
|
|
} |
331 |
|
|
|
332 |
|
|
/* count the number of row indices in the Paso_IndexList in */ |
333 |
|
|
|
334 |
|
|
void Paso_IndexList_toArray(Paso_IndexList* in, index_t* array, index_t range_min,index_t range_max, index_t index_offset) { |
335 |
|
|
dim_t i, ptr; |
336 |
|
|
register index_t itmp; |
337 |
|
|
if (in!=NULL) { |
338 |
|
|
ptr=0; |
339 |
|
|
for (i=0;i<in->n;i++) { |
340 |
|
|
itmp=in->index[i]; |
341 |
|
|
if ((itmp>=range_min) && (range_max>itmp)) { |
342 |
|
|
array[ptr]=itmp+index_offset; |
343 |
|
|
ptr++; |
344 |
|
|
} |
345 |
|
|
|
346 |
|
|
} |
347 |
|
|
Paso_IndexList_toArray(in->extension,&(array[ptr]), range_min, range_max, index_offset); |
348 |
|
|
} |
349 |
|
|
} |
350 |
|
|
|
351 |
|
|
/* deallocates the Paso_IndexList in by recursive calls */ |
352 |
|
|
|
353 |
|
|
void Paso_IndexList_free(Paso_IndexList* in) { |
354 |
|
|
if (in!=NULL) { |
355 |
|
|
Paso_IndexList_free(in->extension); |
356 |
|
|
TMPMEMFREE(in); |
357 |
|
|
} |
358 |
|
|
} |
359 |
|
|
|
360 |
|
|
/* creates a Paso_pattern from a range of indices */ |
361 |
|
|
Paso_Pattern* Paso_IndexList_createPattern(dim_t n0, dim_t n,Paso_IndexList* index_list,index_t range_min,index_t range_max,index_t index_offset) |
362 |
|
|
{ |
363 |
|
|
dim_t *ptr=NULL; |
364 |
|
|
register dim_t s,i,itmp; |
365 |
|
|
index_t *index=NULL; |
366 |
|
|
Paso_Pattern* out=NULL; |
367 |
|
|
|
368 |
|
|
ptr=MEMALLOC(n+1-n0,index_t); |
369 |
|
|
if (! Paso_checkPtr(ptr) ) { |
370 |
|
|
/* get the number of connections per row */ |
371 |
|
|
#pragma omp parallel for schedule(static) private(i) |
372 |
|
|
for(i=n0;i<n;++i) { |
373 |
|
|
ptr[i-n0]=Paso_IndexList_count(&index_list[i],range_min,range_max); |
374 |
|
|
} |
375 |
|
|
/* accumulate ptr */ |
376 |
|
|
s=0; |
377 |
|
|
for(i=n0;i<n;++i) { |
378 |
|
|
itmp=ptr[i-n0]; |
379 |
|
|
ptr[i-n0]=s; |
380 |
|
|
s+=itmp; |
381 |
|
|
} |
382 |
|
|
ptr[n-n0]=s; |
383 |
|
|
/* fill index */ |
384 |
|
|
index=MEMALLOC(ptr[n-n0],index_t); |
385 |
|
|
if (! Paso_checkPtr(index)) { |
386 |
|
|
#pragma omp parallel for schedule(static) |
387 |
|
|
for(i=n0;i<n;++i) { |
388 |
|
|
Paso_IndexList_toArray(&index_list[i],&index[ptr[i-n0]],range_min,range_max,index_offset); |
389 |
|
|
} |
390 |
|
|
out=Paso_Pattern_alloc(PATTERN_FORMAT_DEFAULT,1,1,n-n0,range_max+index_offset,ptr,index); |
391 |
|
|
} |
392 |
|
|
} |
393 |
|
|
if (! Paso_noError()) { |
394 |
|
|
MEMFREE(ptr); |
395 |
|
|
MEMFREE(index); |
396 |
|
|
Paso_Pattern_free(out); |
397 |
|
|
} |
398 |
|
|
return out; |
399 |
|
|
} |