1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2008 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
|
16 |
/**************************************************************/ |
17 |
|
18 |
/* Paso: Pattern */ |
19 |
|
20 |
/**************************************************************/ |
21 |
|
22 |
/* Author: gross@access.edu.au */ |
23 |
|
24 |
/**************************************************************/ |
25 |
|
26 |
#include "Paso.h" |
27 |
#include "Pattern.h" |
28 |
|
29 |
/**************************************************************/ |
30 |
|
31 |
/* allocates a Pattern */ |
32 |
|
33 |
Paso_Pattern* Paso_Pattern_alloc(int type, dim_t input_block_size, dim_t output_block_size, dim_t numOutput, dim_t numInput, index_t* ptr, index_t* index) { |
34 |
Paso_Pattern*out=NULL; |
35 |
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
36 |
index_t loc_min_index,loc_max_index,min_index=index_offset,max_index=index_offset-1; |
37 |
dim_t i, sum=0; |
38 |
Paso_resetError(); |
39 |
|
40 |
if (type & PATTERN_FORMAT_SYM) { |
41 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: symmetric matrix pattern is not supported yet"); |
42 |
return NULL; |
43 |
} |
44 |
if (ptr!=NULL && index != NULL) { |
45 |
#pragma omp parallel private(loc_min_index,loc_max_index,i) |
46 |
{ |
47 |
loc_min_index=index_offset; |
48 |
loc_max_index=index_offset-1; |
49 |
if (type & PATTERN_FORMAT_OFFSET1) { |
50 |
#pragma omp for schedule(static) |
51 |
for (i=0;i<numOutput;++i) { |
52 |
if (ptr[i]<ptr[i+1]) { |
53 |
#ifdef USE_QSORTG |
54 |
qsortG(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
55 |
#else |
56 |
qsort(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
57 |
#endif |
58 |
loc_min_index=MIN(loc_min_index,index[ptr[i]-1]); |
59 |
loc_max_index=MAX(loc_max_index,index[ptr[i+1]-2]); |
60 |
} |
61 |
} |
62 |
} else { |
63 |
#pragma omp for schedule(static) |
64 |
for (i=0;i<numOutput;++i) { |
65 |
if (ptr[i]<ptr[i+1]) { |
66 |
#ifdef USE_QSORTG |
67 |
qsortG(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
68 |
#else |
69 |
qsort(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
70 |
#endif |
71 |
loc_min_index=MIN(loc_min_index,index[ptr[i]]); |
72 |
loc_max_index=MAX(loc_max_index,index[ptr[i+1]-1]); |
73 |
} |
74 |
} |
75 |
} |
76 |
#pragma omp critical |
77 |
{ |
78 |
min_index=MIN(loc_min_index,min_index); |
79 |
max_index=MAX(loc_max_index,max_index); |
80 |
} |
81 |
} |
82 |
if ( (min_index<index_offset) || (max_index>=numInput+index_offset) ) { |
83 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: Pattern index out of range."); |
84 |
return NULL; |
85 |
} |
86 |
} |
87 |
out=MEMALLOC(1,Paso_Pattern); |
88 |
if (! Paso_checkPtr(out)) { |
89 |
out->type=type; |
90 |
out->reference_counter=1; |
91 |
out->numOutput=numOutput; |
92 |
out->numInput=numInput; |
93 |
out->ptr=ptr; |
94 |
out->index=index; |
95 |
out->input_block_size=input_block_size; |
96 |
out->output_block_size=output_block_size; |
97 |
out->block_size=out->input_block_size * out->output_block_size; |
98 |
if (out->ptr == NULL) { |
99 |
out->len=0; |
100 |
} else { |
101 |
out->len=out->ptr[out->numOutput] - index_offset; |
102 |
} |
103 |
} |
104 |
#ifdef Paso_TRACE |
105 |
printf("Paso_Pattern_alloc: system matrix pattern as been allocated.\n"); |
106 |
#endif |
107 |
return out; |
108 |
} |
109 |
|
110 |
/* returns a reference to in */ |
111 |
|
112 |
Paso_Pattern* Paso_Pattern_getReference(Paso_Pattern* in) { |
113 |
if (in!=NULL) { |
114 |
++(in->reference_counter); |
115 |
} |
116 |
return in; |
117 |
} |
118 |
|
119 |
/* deallocates a Pattern: */ |
120 |
|
121 |
void Paso_Pattern_free(Paso_Pattern* in) { |
122 |
if (in!=NULL) { |
123 |
in->reference_counter--; |
124 |
if (in->reference_counter<=0) { |
125 |
MEMFREE(in->ptr); |
126 |
MEMFREE(in->index); |
127 |
MEMFREE(in); |
128 |
#ifdef Paso_TRACE |
129 |
printf("Paso_Pattern_free: pattern as been deallocated.\n"); |
130 |
#endif |
131 |
} |
132 |
} |
133 |
} |
134 |
/* *************************************************************/ |
135 |
|
136 |
/* some routines which help to get the matrix pattern from elements: */ |
137 |
|
138 |
/* this routine is used by qsort called in Paso_Pattern_alloc */ |
139 |
|
140 |
int Paso_comparIndex(const void *index1,const void *index2){ |
141 |
index_t Iindex1,Iindex2; |
142 |
Iindex1=*(index_t*)index1; |
143 |
Iindex2=*(index_t*)index2; |
144 |
if (Iindex1<Iindex2) { |
145 |
return -1; |
146 |
} else { |
147 |
if (Iindex1>Iindex2) { |
148 |
return 1; |
149 |
} else { |
150 |
return 0; |
151 |
} |
152 |
} |
153 |
} |
154 |
|
155 |
bool_t Paso_Pattern_isEmpty(Paso_Pattern* in) { |
156 |
if (in != NULL) { |
157 |
if ((in->ptr != NULL) && (in->index != NULL)) return FALSE; |
158 |
} |
159 |
return TRUE; |
160 |
} |
161 |
|
162 |
|
163 |
/* computes the pattern coming from matrix-matrix multiplication |
164 |
* |
165 |
**/ |
166 |
|
167 |
Paso_Pattern* Paso_Pattern_multiply(int type, Paso_Pattern* A, Paso_Pattern* B) { |
168 |
Paso_Pattern*out=NULL; |
169 |
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
170 |
index_t iptrA,iptrB; |
171 |
dim_t i,j,k; |
172 |
Paso_IndexList* index_list=NULL; |
173 |
|
174 |
index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList); |
175 |
if (! Paso_checkPtr(index_list)) { |
176 |
|
177 |
#pragma omp parallel private(i) |
178 |
{ |
179 |
#pragma omp for schedule(static) |
180 |
for(i=0;i<A->numOutput;++i) { |
181 |
index_list[i].extension=NULL; |
182 |
index_list[i].n=0; |
183 |
} |
184 |
} |
185 |
} |
186 |
|
187 |
for(i = 0; i < A->numOutput; i++) { |
188 |
for(iptrA = A->ptr[i]; iptrA < A->ptr[i+1]; ++iptrA) { |
189 |
j = A->index[iptrA]; |
190 |
for(iptrB = B->ptr[j]; iptrB < B->ptr[j+1]; ++iptrB) { |
191 |
k = B->index[iptrB]; |
192 |
Finley_IndexList_insertIndex(&(index_list[i]),k); |
193 |
} |
194 |
} |
195 |
} |
196 |
|
197 |
out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,INDEXLIST_LENGTH,0); |
198 |
|
199 |
#ifdef Paso_TRACE |
200 |
printf("Paso_Pattern_multipy: new pattern has been allocated.\n"); |
201 |
#endif |
202 |
|
203 |
/* clean up */ |
204 |
if (index_list!=NULL) { |
205 |
#pragma omp parallel for private(i) |
206 |
for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
207 |
} |
208 |
TMPMEMFREE(index_list); |
209 |
|
210 |
return out; |
211 |
} |
212 |
|
213 |
|
214 |
|
215 |
/* |
216 |
* Computes the pattern of C = A binary operation B for CSR matrices A,B |
217 |
* |
218 |
* Note: we do not check whether A_ij(op)B_ij=0 |
219 |
* |
220 |
*/ |
221 |
Paso_Pattern* Paso_Pattern_binop(int type, Paso_Pattern* A, Paso_Pattern* B) { |
222 |
Paso_Pattern*out=NULL; |
223 |
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
224 |
index_t iptrA,iptrB,*A_row=NULL,*B_row=NULL; |
225 |
dim_t i,j,k; |
226 |
|
227 |
Paso_IndexList* index_list=NULL; |
228 |
|
229 |
index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList); |
230 |
if (! Paso_checkPtr(index_list)) { |
231 |
|
232 |
#pragma omp parallel private(i) |
233 |
{ |
234 |
#pragma omp for schedule(static) |
235 |
for(i=0;i<A->numOutput;++i) { |
236 |
index_list[i].extension=NULL; |
237 |
index_list[i].n=0; |
238 |
} |
239 |
} |
240 |
} |
241 |
for(i = 0; i < B->numOutput; i++){ |
242 |
iptrA = A->ptr[i], |
243 |
iptrB = B->ptr[i]; |
244 |
|
245 |
while (iptrA < A->ptr[i+1] && iptrB < B->ptr[i+1]) { |
246 |
j = A->index[iptrA]; |
247 |
k = B->index[iptrB]; |
248 |
if (j<k) { |
249 |
Finley_IndexList_insertIndex(&(index_list[i]),j); |
250 |
iptrA++; |
251 |
} else if (j>k) { |
252 |
Finley_IndexList_insertIndex(&(index_list[i]),k); |
253 |
iptrB++; |
254 |
} else if (j==k) { |
255 |
Finley_IndexList_insertIndex(&(index_list[i]),j); |
256 |
iptrB++; |
257 |
iptrA++; |
258 |
} |
259 |
} |
260 |
while(iptrA < A->ptr[i+1]) { |
261 |
j = A->index[iptrA]; |
262 |
Finley_IndexList_insertIndex(&(index_list[i]),j); |
263 |
iptrA++; |
264 |
} |
265 |
while(iptrB < B->ptr[i+1]) { |
266 |
k = B->index[iptrB]; |
267 |
Finley_IndexList_insertIndex(&(index_list[i]),k); |
268 |
iptrB++; |
269 |
} |
270 |
} |
271 |
|
272 |
out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,INDEXLIST_LENGTH,0); |
273 |
|
274 |
#ifdef Paso_TRACE |
275 |
printf("Paso_Pattern_binop: new pattern has been allocated.\n"); |
276 |
#endif |
277 |
|
278 |
/* clean up */ |
279 |
if (index_list!=NULL) { |
280 |
#pragma omp parallel for private(i) |
281 |
for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
282 |
} |
283 |
TMPMEMFREE(index_list); |
284 |
|
285 |
return out; |
286 |
} |
287 |
|
288 |
/* inserts row index row into the Paso_IndexList in if it does not exist */ |
289 |
|
290 |
void Paso_IndexList_insertIndex(Paso_IndexList* in, index_t index) { |
291 |
dim_t i; |
292 |
/* is index in in? */ |
293 |
for (i=0;i<in->n;i++) { |
294 |
if (in->index[i]==index) return; |
295 |
} |
296 |
/* index could not be found */ |
297 |
if (in->n==INDEXLIST_LENGTH) { |
298 |
/* if in->index is full check the extension */ |
299 |
if (in->extension==NULL) { |
300 |
in->extension=TMPMEMALLOC(1,Paso_IndexList); |
301 |
if (Paso_checkPtr(in->extension)) return; |
302 |
in->extension->n=0; |
303 |
in->extension->extension=NULL; |
304 |
} |
305 |
Paso_IndexList_insertIndex(in->extension,index); |
306 |
} else { |
307 |
/* insert index into in->index*/ |
308 |
in->index[in->n]=index; |
309 |
in->n++; |
310 |
} |
311 |
} |
312 |
|
313 |
/* counts the number of row indices in the Paso_IndexList in */ |
314 |
|
315 |
dim_t Paso_IndexList_count(Paso_IndexList* in, index_t range_min,index_t range_max) { |
316 |
dim_t i; |
317 |
dim_t out=0; |
318 |
register index_t itmp; |
319 |
if (in==NULL) { |
320 |
return 0; |
321 |
} else { |
322 |
for (i=0;i<in->n;i++) { |
323 |
itmp=in->index[i]; |
324 |
if ((itmp>=range_min) && (range_max>itmp)) ++out; |
325 |
} |
326 |
return out+Paso_IndexList_count(in->extension, range_min,range_max); |
327 |
} |
328 |
} |
329 |
|
330 |
/* count the number of row indices in the Paso_IndexList in */ |
331 |
|
332 |
void Paso_IndexList_toArray(Paso_IndexList* in, index_t* array, index_t range_min,index_t range_max, index_t index_offset) { |
333 |
dim_t i, ptr; |
334 |
register index_t itmp; |
335 |
if (in!=NULL) { |
336 |
ptr=0; |
337 |
for (i=0;i<in->n;i++) { |
338 |
itmp=in->index[i]; |
339 |
if ((itmp>=range_min) && (range_max>itmp)) { |
340 |
array[ptr]=itmp+index_offset; |
341 |
ptr++; |
342 |
} |
343 |
|
344 |
} |
345 |
Paso_IndexList_toArray(in->extension,&(array[ptr]), range_min, range_max, index_offset); |
346 |
} |
347 |
} |
348 |
|
349 |
/* deallocates the Paso_IndexList in by recursive calls */ |
350 |
|
351 |
void Paso_IndexList_free(Paso_IndexList* in) { |
352 |
if (in!=NULL) { |
353 |
Paso_IndexList_free(in->extension); |
354 |
TMPMEMFREE(in); |
355 |
} |
356 |
} |
357 |
|
358 |
/* creates a Paso_pattern from a range of indices */ |
359 |
Paso_Pattern* Paso_IndexList_createPattern(dim_t n0, dim_t n,Paso_IndexList* index_list,index_t range_min,index_t range_max,index_t index_offset) |
360 |
{ |
361 |
dim_t *ptr=NULL; |
362 |
register dim_t s,i,itmp; |
363 |
index_t *index=NULL; |
364 |
Paso_Pattern* out=NULL; |
365 |
|
366 |
ptr=MEMALLOC(n+1-n0,index_t); |
367 |
if (! Paso_checkPtr(ptr) ) { |
368 |
/* get the number of connections per row */ |
369 |
#pragma omp parallel for schedule(static) private(i) |
370 |
for(i=n0;i<n;++i) { |
371 |
ptr[i-n0]=Paso_IndexList_count(&index_list[i],range_min,range_max); |
372 |
} |
373 |
/* accumulate ptr */ |
374 |
s=0; |
375 |
for(i=n0;i<n;++i) { |
376 |
itmp=ptr[i-n0]; |
377 |
ptr[i-n0]=s; |
378 |
s+=itmp; |
379 |
} |
380 |
ptr[n-n0]=s; |
381 |
/* fill index */ |
382 |
index=MEMALLOC(ptr[n-n0],index_t); |
383 |
if (! Paso_checkPtr(index)) { |
384 |
#pragma omp parallel for schedule(static) |
385 |
for(i=n0;i<n;++i) { |
386 |
Paso_IndexList_toArray(&index_list[i],&index[ptr[i-n0]],range_min,range_max,index_offset); |
387 |
} |
388 |
out=Paso_Pattern_alloc(PATTERN_FORMAT_DEFAULT,1,1,n-n0,range_max+index_offset,ptr,index); |
389 |
} |
390 |
} |
391 |
if (! Paso_noError()) { |
392 |
MEMFREE(ptr); |
393 |
MEMFREE(index); |
394 |
Paso_Pattern_free(out); |
395 |
} |
396 |
return out; |
397 |
} |