1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2008 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: Pattern */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: gross@access.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "Paso.h" |
26 |
#include "Pattern.h" |
27 |
|
28 |
/**************************************************************/ |
29 |
|
30 |
/* allocates a Pattern */ |
31 |
|
32 |
Paso_Pattern* Paso_Pattern_alloc(int type, dim_t input_block_size, dim_t output_block_size, dim_t numOutput, dim_t numInput, index_t* ptr, index_t* index) { |
33 |
Paso_Pattern*out=NULL; |
34 |
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
35 |
index_t loc_min_index,loc_max_index,min_index=index_offset,max_index=index_offset-1; |
36 |
dim_t i, sum=0; |
37 |
Paso_resetError(); |
38 |
|
39 |
if (type & PATTERN_FORMAT_SYM) { |
40 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: symmetric matrix pattern is not supported yet"); |
41 |
return NULL; |
42 |
} |
43 |
if (ptr!=NULL && index != NULL) { |
44 |
#pragma omp parallel private(loc_min_index,loc_max_index,i) |
45 |
{ |
46 |
loc_min_index=index_offset; |
47 |
loc_max_index=index_offset-1; |
48 |
if (type & PATTERN_FORMAT_OFFSET1) { |
49 |
#pragma omp for schedule(static) |
50 |
for (i=0;i<numOutput;++i) { |
51 |
if (ptr[i]<ptr[i+1]) { |
52 |
#ifdef USE_QSORTG |
53 |
qsortG(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
54 |
#else |
55 |
qsort(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
56 |
#endif |
57 |
loc_min_index=MIN(loc_min_index,index[ptr[i]-1]); |
58 |
loc_max_index=MAX(loc_max_index,index[ptr[i+1]-2]); |
59 |
} |
60 |
} |
61 |
} else { |
62 |
#pragma omp for schedule(static) |
63 |
for (i=0;i<numOutput;++i) { |
64 |
if (ptr[i]<ptr[i+1]) { |
65 |
#ifdef USE_QSORTG |
66 |
qsortG(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
67 |
#else |
68 |
qsort(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
69 |
#endif |
70 |
loc_min_index=MIN(loc_min_index,index[ptr[i]]); |
71 |
loc_max_index=MAX(loc_max_index,index[ptr[i+1]-1]); |
72 |
} |
73 |
} |
74 |
} |
75 |
#pragma omp critical |
76 |
{ |
77 |
min_index=MIN(loc_min_index,min_index); |
78 |
max_index=MAX(loc_max_index,max_index); |
79 |
} |
80 |
} |
81 |
if ( (min_index<index_offset) || (max_index>=numInput+index_offset) ) { |
82 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: Pattern index out of range."); |
83 |
return NULL; |
84 |
} |
85 |
} |
86 |
out=MEMALLOC(1,Paso_Pattern); |
87 |
if (! Paso_checkPtr(out)) { |
88 |
out->type=type; |
89 |
out->reference_counter=1; |
90 |
out->numOutput=numOutput; |
91 |
out->numInput=numInput; |
92 |
out->ptr=ptr; |
93 |
out->index=index; |
94 |
out->input_block_size=input_block_size; |
95 |
out->output_block_size=output_block_size; |
96 |
out->block_size=out->input_block_size * out->output_block_size; |
97 |
if (out->ptr == NULL) { |
98 |
out->len=0; |
99 |
} else { |
100 |
out->len=out->ptr[out->numOutput] - index_offset; |
101 |
} |
102 |
} |
103 |
#ifdef Paso_TRACE |
104 |
printf("Paso_Pattern_alloc: system matrix pattern as been allocated.\n"); |
105 |
#endif |
106 |
return out; |
107 |
} |
108 |
|
109 |
/* returns a reference to in */ |
110 |
|
111 |
Paso_Pattern* Paso_Pattern_getReference(Paso_Pattern* in) { |
112 |
if (in!=NULL) { |
113 |
++(in->reference_counter); |
114 |
} |
115 |
return in; |
116 |
} |
117 |
|
118 |
/* deallocates a Pattern: */ |
119 |
|
120 |
void Paso_Pattern_free(Paso_Pattern* in) { |
121 |
if (in!=NULL) { |
122 |
in->reference_counter--; |
123 |
if (in->reference_counter<=0) { |
124 |
MEMFREE(in->ptr); |
125 |
MEMFREE(in->index); |
126 |
MEMFREE(in); |
127 |
#ifdef Paso_TRACE |
128 |
printf("Paso_Pattern_free: pattern as been deallocated.\n"); |
129 |
#endif |
130 |
} |
131 |
} |
132 |
} |
133 |
/* *************************************************************/ |
134 |
|
135 |
/* some routines which help to get the matrix pattern from elements: */ |
136 |
|
137 |
/* this routine is used by qsort called in Paso_Pattern_alloc */ |
138 |
|
139 |
int Paso_comparIndex(const void *index1,const void *index2){ |
140 |
index_t Iindex1,Iindex2; |
141 |
Iindex1=*(index_t*)index1; |
142 |
Iindex2=*(index_t*)index2; |
143 |
if (Iindex1<Iindex2) { |
144 |
return -1; |
145 |
} else { |
146 |
if (Iindex1>Iindex2) { |
147 |
return 1; |
148 |
} else { |
149 |
return 0; |
150 |
} |
151 |
} |
152 |
} |
153 |
|
154 |
bool_t Paso_Pattern_isEmpty(Paso_Pattern* in) { |
155 |
if (in != NULL) { |
156 |
if ((in->ptr != NULL) && (in->index != NULL)) return FALSE; |
157 |
} |
158 |
return TRUE; |
159 |
} |
160 |
|
161 |
|
162 |
/* computes the pattern coming from matrix-matrix multiplication |
163 |
* |
164 |
**/ |
165 |
|
166 |
Paso_Pattern* Paso_Pattern_multiply(int type, Paso_Pattern* A, Paso_Pattern* B) { |
167 |
Paso_Pattern*out=NULL; |
168 |
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
169 |
index_t iptrA,iptrB; |
170 |
dim_t i,j,k; |
171 |
Paso_IndexList* index_list=NULL; |
172 |
|
173 |
index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList); |
174 |
if (! Paso_checkPtr(index_list)) { |
175 |
|
176 |
#pragma omp parallel private(i) |
177 |
{ |
178 |
#pragma omp for schedule(static) |
179 |
for(i=0;i<A->numOutput;++i) { |
180 |
index_list[i].extension=NULL; |
181 |
index_list[i].n=0; |
182 |
} |
183 |
} |
184 |
} |
185 |
|
186 |
for(i = 0; i < A->numOutput; i++) { |
187 |
for(iptrA = A->ptr[i]; iptrA < A->ptr[i+1]; ++iptrA) { |
188 |
j = A->index[iptrA]; |
189 |
for(iptrB = B->ptr[j]; iptrB < B->ptr[j+1]; ++iptrB) { |
190 |
k = B->index[iptrB]; |
191 |
if(i==k) { |
192 |
Finley_IndexList_insertIndex(&(index_list[i]),k); |
193 |
break; |
194 |
} |
195 |
} |
196 |
} |
197 |
} |
198 |
|
199 |
out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,INDEXLIST_LENGTH,0); |
200 |
|
201 |
#ifdef Paso_TRACE |
202 |
printf("Paso_Pattern_multipy: new pattern has been allocated.\n"); |
203 |
#endif |
204 |
|
205 |
/* clean up */ |
206 |
if (index_list!=NULL) { |
207 |
#pragma omp parallel for private(i) |
208 |
for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
209 |
} |
210 |
TMPMEMFREE(index_list); |
211 |
|
212 |
return out; |
213 |
} |
214 |
|
215 |
|
216 |
|
217 |
/* |
218 |
* Computes the pattern of C = A binary operation B for CSR matrices A,B |
219 |
* |
220 |
* Note: we do not check whether A_ij(op)B_ij=0 |
221 |
* |
222 |
*/ |
223 |
Paso_Pattern* Paso_Pattern_binop(int type, Paso_Pattern* A, Paso_Pattern* B) { |
224 |
Paso_Pattern*out=NULL; |
225 |
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
226 |
index_t iptrA,iptrB,*A_row=NULL,*B_row=NULL; |
227 |
dim_t i,j,k; |
228 |
|
229 |
Paso_IndexList* index_list=NULL; |
230 |
|
231 |
index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList); |
232 |
if (! Paso_checkPtr(index_list)) { |
233 |
|
234 |
#pragma omp parallel private(i) |
235 |
{ |
236 |
#pragma omp for schedule(static) |
237 |
for(i=0;i<A->numOutput;++i) { |
238 |
index_list[i].extension=NULL; |
239 |
index_list[i].n=0; |
240 |
} |
241 |
} |
242 |
} |
243 |
for(i = 0; i < A->numOutput; i++){ |
244 |
iptrA = A->ptr[i], |
245 |
iptrB = B->ptr[i]; |
246 |
|
247 |
while (iptrA < A->ptr[i+1] && iptrB < B->ptr[i+1]) { |
248 |
j = A->index[iptrA]; |
249 |
k = B->index[iptrB]; |
250 |
if (j<k) { |
251 |
Finley_IndexList_insertIndex(&(index_list[i]),j); |
252 |
iptrA++; |
253 |
} else if (j>k) { |
254 |
Finley_IndexList_insertIndex(&(index_list[i]),k); |
255 |
iptrB++; |
256 |
} else if (j==k) { |
257 |
Finley_IndexList_insertIndex(&(index_list[i]),j); |
258 |
iptrB++; |
259 |
iptrA++; |
260 |
} |
261 |
} |
262 |
while(iptrA < A->ptr[i+1]) { |
263 |
j = A->index[iptrA]; |
264 |
Finley_IndexList_insertIndex(&(index_list[i]),j); |
265 |
iptrA++; |
266 |
} |
267 |
while(iptrB < B->ptr[i+1]) { |
268 |
k = B->index[iptrB]; |
269 |
Finley_IndexList_insertIndex(&(index_list[i]),k); |
270 |
iptrB++; |
271 |
} |
272 |
} |
273 |
|
274 |
out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,INDEXLIST_LENGTH,0); |
275 |
|
276 |
#ifdef Paso_TRACE |
277 |
printf("Paso_Pattern_binop: new pattern has been allocated.\n"); |
278 |
#endif |
279 |
|
280 |
/* clean up */ |
281 |
if (index_list!=NULL) { |
282 |
#pragma omp parallel for private(i) |
283 |
for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
284 |
} |
285 |
TMPMEMFREE(index_list); |
286 |
|
287 |
return out; |
288 |
} |
289 |
|
290 |
/* inserts row index row into the Paso_IndexList in if it does not exist */ |
291 |
|
292 |
void Paso_IndexList_insertIndex(Paso_IndexList* in, index_t index) { |
293 |
dim_t i; |
294 |
/* is index in in? */ |
295 |
for (i=0;i<in->n;i++) { |
296 |
if (in->index[i]==index) return; |
297 |
} |
298 |
/* index could not be found */ |
299 |
if (in->n==INDEXLIST_LENGTH) { |
300 |
/* if in->index is full check the extension */ |
301 |
if (in->extension==NULL) { |
302 |
in->extension=TMPMEMALLOC(1,Paso_IndexList); |
303 |
if (Paso_checkPtr(in->extension)) return; |
304 |
in->extension->n=0; |
305 |
in->extension->extension=NULL; |
306 |
} |
307 |
Paso_IndexList_insertIndex(in->extension,index); |
308 |
} else { |
309 |
/* insert index into in->index*/ |
310 |
in->index[in->n]=index; |
311 |
in->n++; |
312 |
} |
313 |
} |
314 |
|
315 |
/* counts the number of row indices in the Paso_IndexList in */ |
316 |
|
317 |
dim_t Paso_IndexList_count(Paso_IndexList* in, index_t range_min,index_t range_max) { |
318 |
dim_t i; |
319 |
dim_t out=0; |
320 |
register index_t itmp; |
321 |
if (in==NULL) { |
322 |
return 0; |
323 |
} else { |
324 |
for (i=0;i<in->n;i++) { |
325 |
itmp=in->index[i]; |
326 |
if ((itmp>=range_min) && (range_max>itmp)) ++out; |
327 |
} |
328 |
return out+Paso_IndexList_count(in->extension, range_min,range_max); |
329 |
} |
330 |
} |
331 |
|
332 |
/* count the number of row indices in the Paso_IndexList in */ |
333 |
|
334 |
void Paso_IndexList_toArray(Paso_IndexList* in, index_t* array, index_t range_min,index_t range_max, index_t index_offset) { |
335 |
dim_t i, ptr; |
336 |
register index_t itmp; |
337 |
if (in!=NULL) { |
338 |
ptr=0; |
339 |
for (i=0;i<in->n;i++) { |
340 |
itmp=in->index[i]; |
341 |
if ((itmp>=range_min) && (range_max>itmp)) { |
342 |
array[ptr]=itmp+index_offset; |
343 |
ptr++; |
344 |
} |
345 |
|
346 |
} |
347 |
Paso_IndexList_toArray(in->extension,&(array[ptr]), range_min, range_max, index_offset); |
348 |
} |
349 |
} |
350 |
|
351 |
/* deallocates the Paso_IndexList in by recursive calls */ |
352 |
|
353 |
void Paso_IndexList_free(Paso_IndexList* in) { |
354 |
if (in!=NULL) { |
355 |
Paso_IndexList_free(in->extension); |
356 |
TMPMEMFREE(in); |
357 |
} |
358 |
} |
359 |
|
360 |
/* creates a Paso_pattern from a range of indices */ |
361 |
Paso_Pattern* Paso_IndexList_createPattern(dim_t n0, dim_t n,Paso_IndexList* index_list,index_t range_min,index_t range_max,index_t index_offset) |
362 |
{ |
363 |
dim_t *ptr=NULL; |
364 |
register dim_t s,i,itmp; |
365 |
index_t *index=NULL; |
366 |
Paso_Pattern* out=NULL; |
367 |
|
368 |
ptr=MEMALLOC(n+1-n0,index_t); |
369 |
if (! Paso_checkPtr(ptr) ) { |
370 |
/* get the number of connections per row */ |
371 |
#pragma omp parallel for schedule(static) private(i) |
372 |
for(i=n0;i<n;++i) { |
373 |
ptr[i-n0]=Paso_IndexList_count(&index_list[i],range_min,range_max); |
374 |
} |
375 |
/* accumulate ptr */ |
376 |
s=0; |
377 |
for(i=n0;i<n;++i) { |
378 |
itmp=ptr[i-n0]; |
379 |
ptr[i-n0]=s; |
380 |
s+=itmp; |
381 |
} |
382 |
ptr[n-n0]=s; |
383 |
/* fill index */ |
384 |
index=MEMALLOC(ptr[n-n0],index_t); |
385 |
if (! Paso_checkPtr(index)) { |
386 |
#pragma omp parallel for schedule(static) |
387 |
for(i=n0;i<n;++i) { |
388 |
Paso_IndexList_toArray(&index_list[i],&index[ptr[i-n0]],range_min,range_max,index_offset); |
389 |
} |
390 |
out=Paso_Pattern_alloc(PATTERN_FORMAT_DEFAULT,1,1,n-n0,range_max+index_offset,ptr,index); |
391 |
} |
392 |
} |
393 |
if (! Paso_noError()) { |
394 |
MEMFREE(ptr); |
395 |
MEMFREE(index); |
396 |
Paso_Pattern_free(out); |
397 |
} |
398 |
return out; |
399 |
} |