1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
|
16 |
/**************************************************************/ |
17 |
|
18 |
/* Paso: Pattern */ |
19 |
|
20 |
/**************************************************************/ |
21 |
|
22 |
/* Author: Lutz Gross, l.gross@uq.edu.au */ |
23 |
|
24 |
/**************************************************************/ |
25 |
|
26 |
#include "Paso.h" |
27 |
#include "Pattern.h" |
28 |
|
29 |
/**************************************************************/ |
30 |
|
31 |
/* allocates a Pattern */ |
32 |
|
33 |
Paso_Pattern* Paso_Pattern_alloc(int type, dim_t numOutput, dim_t numInput, index_t* ptr, index_t* index) { |
34 |
Paso_Pattern *out=NULL; |
35 |
index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0); |
36 |
index_t loc_min_index,loc_max_index,min_index=index_offset,max_index=index_offset-1; |
37 |
dim_t i; |
38 |
Paso_resetError(); |
39 |
|
40 |
if (type & PATTERN_FORMAT_SYM) { |
41 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: symmetric matrix pattern is not supported yet"); |
42 |
return NULL; |
43 |
} |
44 |
if (ptr!=NULL && index != NULL) { |
45 |
#pragma omp parallel private(loc_min_index,loc_max_index,i) |
46 |
{ |
47 |
loc_min_index=index_offset; |
48 |
loc_max_index=index_offset-1; |
49 |
if (type & PATTERN_FORMAT_OFFSET1) { |
50 |
#pragma omp for schedule(static) |
51 |
for (i=0;i<numOutput;++i) { |
52 |
if (ptr[i]<ptr[i+1]) { |
53 |
#ifdef USE_QSORTG |
54 |
qsortG(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
55 |
#else |
56 |
qsort(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
57 |
#endif |
58 |
loc_min_index=MIN(loc_min_index,index[ptr[i]-1]); |
59 |
loc_max_index=MAX(loc_max_index,index[ptr[i+1]-2]); |
60 |
} |
61 |
} |
62 |
} else { |
63 |
#pragma omp for schedule(static) |
64 |
for (i=0;i<numOutput;++i) { |
65 |
if (ptr[i]<ptr[i+1]) { |
66 |
#ifdef USE_QSORTG |
67 |
qsortG(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
68 |
#else |
69 |
qsort(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex); |
70 |
#endif |
71 |
loc_min_index=MIN(loc_min_index,index[ptr[i]]); |
72 |
loc_max_index=MAX(loc_max_index,index[ptr[i+1]-1]); |
73 |
} |
74 |
} |
75 |
} |
76 |
#pragma omp critical |
77 |
{ |
78 |
min_index=MIN(loc_min_index,min_index); |
79 |
max_index=MAX(loc_max_index,max_index); |
80 |
} |
81 |
} |
82 |
if ( (min_index<index_offset) || (max_index>=numInput+index_offset) ) { |
83 |
Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: Pattern index out of range."); |
84 |
return NULL; |
85 |
} |
86 |
} |
87 |
out=MEMALLOC(1,Paso_Pattern); |
88 |
if (! Paso_checkPtr(out)) { |
89 |
out->type=type; |
90 |
out->reference_counter=1; |
91 |
out->numOutput=numOutput; |
92 |
out->numInput=numInput; |
93 |
out->ptr=ptr; |
94 |
out->index=index; |
95 |
out->main_iptr = NULL; |
96 |
|
97 |
if (out->ptr == NULL) { |
98 |
out->len=0; |
99 |
} else { |
100 |
out->len=out->ptr[out->numOutput] - index_offset; |
101 |
} |
102 |
} |
103 |
return out; |
104 |
} |
105 |
|
106 |
/* returns a reference to in */ |
107 |
|
108 |
Paso_Pattern* Paso_Pattern_getReference(Paso_Pattern* in) { |
109 |
if (in!=NULL) { |
110 |
++(in->reference_counter); |
111 |
} |
112 |
return in; |
113 |
} |
114 |
|
115 |
/* deallocates a Pattern: */ |
116 |
|
117 |
void Paso_Pattern_free(Paso_Pattern* in) { |
118 |
if (in!=NULL) { |
119 |
in->reference_counter--; |
120 |
if (in->reference_counter<=0) { |
121 |
MEMFREE(in->ptr); |
122 |
MEMFREE(in->index); |
123 |
MEMFREE(in->main_iptr); |
124 |
MEMFREE(in); |
125 |
} |
126 |
} |
127 |
} |
128 |
/* *************************************************************/ |
129 |
|
130 |
/* some routines which help to get the matrix pattern from elements: */ |
131 |
|
132 |
/* this routine is used by qsort called in Paso_Pattern_alloc */ |
133 |
|
134 |
int Paso_comparIndex(const void *index1,const void *index2){ |
135 |
index_t Iindex1,Iindex2; |
136 |
Iindex1=*(index_t*)index1; |
137 |
Iindex2=*(index_t*)index2; |
138 |
if (Iindex1<Iindex2) { |
139 |
return -1; |
140 |
} else { |
141 |
if (Iindex1>Iindex2) { |
142 |
return 1; |
143 |
} else { |
144 |
return 0; |
145 |
} |
146 |
} |
147 |
} |
148 |
|
149 |
bool_t Paso_Pattern_isEmpty(Paso_Pattern* in) { |
150 |
if (in != NULL) { |
151 |
if ((in->ptr != NULL) && (in->index != NULL)) return FALSE; |
152 |
} |
153 |
return TRUE; |
154 |
} |
155 |
|
156 |
|
157 |
/* computes the pattern coming from matrix-matrix multiplication |
158 |
* |
159 |
**/ |
160 |
|
161 |
Paso_Pattern* Paso_Pattern_multiply(int type, Paso_Pattern* A, Paso_Pattern* B) { |
162 |
Paso_Pattern*out=NULL; |
163 |
index_t iptrA,iptrB; |
164 |
dim_t i,j,k; |
165 |
Paso_IndexList* index_list=NULL; |
166 |
|
167 |
index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList); |
168 |
if (! Paso_checkPtr(index_list)) { |
169 |
#pragma omp parallel for private(i) schedule(static) |
170 |
for(i=0;i<A->numOutput;++i) { |
171 |
index_list[i].extension=NULL; |
172 |
index_list[i].n=0; |
173 |
} |
174 |
} |
175 |
|
176 |
#pragma omp parallel for private(i,iptrA,j,iptrB,k) schedule(static) |
177 |
for(i = 0; i < A->numOutput; i++) { |
178 |
for(iptrA = A->ptr[i]; iptrA < A->ptr[i+1]; ++iptrA) { |
179 |
j = A->index[iptrA]; |
180 |
for(iptrB = B->ptr[j]; iptrB < B->ptr[j+1]; ++iptrB) { |
181 |
k = B->index[iptrB]; |
182 |
Paso_IndexList_insertIndex(&(index_list[i]),k); |
183 |
} |
184 |
} |
185 |
} |
186 |
|
187 |
out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,B->numInput,0); |
188 |
|
189 |
/* clean up */ |
190 |
if (index_list!=NULL) { |
191 |
#pragma omp parallel for private(i) schedule(static) |
192 |
for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
193 |
} |
194 |
TMPMEMFREE(index_list); |
195 |
|
196 |
return out; |
197 |
} |
198 |
|
199 |
|
200 |
|
201 |
/* |
202 |
* Computes the pattern of C = A binary operation B for CSR matrices A,B |
203 |
* |
204 |
* Note: we do not check whether A_ij(op)B_ij=0 |
205 |
* |
206 |
*/ |
207 |
Paso_Pattern* Paso_Pattern_binop(int type, Paso_Pattern* A, Paso_Pattern* B) { |
208 |
Paso_Pattern *out=NULL; |
209 |
index_t iptrA,iptrB; |
210 |
dim_t i,j,k; |
211 |
|
212 |
Paso_IndexList* index_list=NULL; |
213 |
|
214 |
index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList); |
215 |
if (! Paso_checkPtr(index_list)) { |
216 |
#pragma omp parallel for private(i) schedule(static) |
217 |
for(i=0;i<A->numOutput;++i) { |
218 |
index_list[i].extension=NULL; |
219 |
index_list[i].n=0; |
220 |
} |
221 |
} |
222 |
|
223 |
#pragma omp parallel for private(i,iptrA,j,iptrB,k) schedule(static) |
224 |
for(i = 0; i < B->numOutput; i++){ |
225 |
iptrA = A->ptr[i], |
226 |
iptrB = B->ptr[i]; |
227 |
|
228 |
while (iptrA < A->ptr[i+1] && iptrB < B->ptr[i+1]) { |
229 |
j = A->index[iptrA]; |
230 |
k = B->index[iptrB]; |
231 |
if (j<k) { |
232 |
Paso_IndexList_insertIndex(&(index_list[i]),j); |
233 |
iptrA++; |
234 |
} else if (j>k) { |
235 |
Paso_IndexList_insertIndex(&(index_list[i]),k); |
236 |
iptrB++; |
237 |
} else if (j==k) { |
238 |
Paso_IndexList_insertIndex(&(index_list[i]),j); |
239 |
iptrB++; |
240 |
iptrA++; |
241 |
} |
242 |
} |
243 |
while(iptrA < A->ptr[i+1]) { |
244 |
j = A->index[iptrA]; |
245 |
Paso_IndexList_insertIndex(&(index_list[i]),j); |
246 |
iptrA++; |
247 |
} |
248 |
while(iptrB < B->ptr[i+1]) { |
249 |
k = B->index[iptrB]; |
250 |
Paso_IndexList_insertIndex(&(index_list[i]),k); |
251 |
iptrB++; |
252 |
} |
253 |
} |
254 |
|
255 |
out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,A->numInput,0); |
256 |
|
257 |
|
258 |
/* clean up */ |
259 |
if (index_list!=NULL) { |
260 |
#pragma omp parallel for private(i) schedule(static) |
261 |
for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension); |
262 |
} |
263 |
TMPMEMFREE(index_list); |
264 |
|
265 |
return out; |
266 |
} |
267 |
|
268 |
/* inserts row index row into the Paso_IndexList in if it does not exist */ |
269 |
|
270 |
void Paso_IndexList_insertIndex(Paso_IndexList* in, index_t index) { |
271 |
dim_t i; |
272 |
/* is index in in? */ |
273 |
for (i=0;i<in->n;i++) { |
274 |
if (in->index[i]==index) return; |
275 |
} |
276 |
/* index could not be found */ |
277 |
if (in->n==INDEXLIST_LENGTH) { |
278 |
/* if in->index is full check the extension */ |
279 |
if (in->extension==NULL) { |
280 |
in->extension=TMPMEMALLOC(1,Paso_IndexList); |
281 |
if (Paso_checkPtr(in->extension)) return; |
282 |
in->extension->n=0; |
283 |
in->extension->extension=NULL; |
284 |
} |
285 |
Paso_IndexList_insertIndex(in->extension,index); |
286 |
} else { |
287 |
/* insert index into in->index*/ |
288 |
in->index[in->n]=index; |
289 |
in->n++; |
290 |
} |
291 |
} |
292 |
|
293 |
/* counts the number of row indices in the Paso_IndexList in */ |
294 |
|
295 |
dim_t Paso_IndexList_count(Paso_IndexList* in, index_t range_min,index_t range_max) { |
296 |
dim_t i; |
297 |
dim_t out=0; |
298 |
register index_t itmp; |
299 |
if (in==NULL) { |
300 |
return 0; |
301 |
} else { |
302 |
for (i=0;i<in->n;i++) { |
303 |
itmp=in->index[i]; |
304 |
if ((itmp>=range_min) && (range_max>itmp)) ++out; |
305 |
} |
306 |
return out+Paso_IndexList_count(in->extension, range_min,range_max); |
307 |
} |
308 |
} |
309 |
|
310 |
/* count the number of row indices in the Paso_IndexList in */ |
311 |
|
312 |
void Paso_IndexList_toArray(Paso_IndexList* in, index_t* array, index_t range_min,index_t range_max, index_t index_offset) { |
313 |
dim_t i, ptr; |
314 |
register index_t itmp; |
315 |
if (in!=NULL) { |
316 |
ptr=0; |
317 |
for (i=0;i<in->n;i++) { |
318 |
itmp=in->index[i]; |
319 |
if ((itmp>=range_min) && (range_max>itmp)) { |
320 |
array[ptr]=itmp+index_offset; |
321 |
ptr++; |
322 |
} |
323 |
|
324 |
} |
325 |
Paso_IndexList_toArray(in->extension,&(array[ptr]), range_min, range_max, index_offset); |
326 |
} |
327 |
} |
328 |
|
329 |
/* deallocates the Paso_IndexList in by recursive calls */ |
330 |
|
331 |
void Paso_IndexList_free(Paso_IndexList* in) { |
332 |
if (in!=NULL) { |
333 |
Paso_IndexList_free(in->extension); |
334 |
TMPMEMFREE(in); |
335 |
} |
336 |
} |
337 |
|
338 |
/* creates a Paso_pattern from a range of indices */ |
339 |
Paso_Pattern* Paso_IndexList_createPattern(dim_t n0, dim_t n,Paso_IndexList* index_list,index_t range_min,index_t range_max,index_t index_offset) |
340 |
{ |
341 |
dim_t *ptr=NULL; |
342 |
register dim_t s,i,itmp; |
343 |
index_t *index=NULL; |
344 |
Paso_Pattern* out=NULL; |
345 |
|
346 |
ptr=MEMALLOC(n+1-n0,index_t); |
347 |
if (! Paso_checkPtr(ptr) ) { |
348 |
/* get the number of connections per row */ |
349 |
#pragma omp parallel for private(i) schedule(static) |
350 |
for(i=n0;i<n;++i) { |
351 |
ptr[i-n0]=Paso_IndexList_count(&index_list[i],range_min,range_max); |
352 |
} |
353 |
/* accumulate ptr */ |
354 |
s=0; |
355 |
for(i=n0;i<n;++i) { |
356 |
itmp=ptr[i-n0]; |
357 |
ptr[i-n0]=s; |
358 |
s+=itmp; |
359 |
} |
360 |
ptr[n-n0]=s; |
361 |
/* fill index */ |
362 |
index=MEMALLOC(ptr[n-n0],index_t); |
363 |
if (! Paso_checkPtr(index)) { |
364 |
#pragma omp parallel for private(i) schedule(static) |
365 |
for(i=n0;i<n;++i) { |
366 |
Paso_IndexList_toArray(&index_list[i],&index[ptr[i-n0]],range_min,range_max,index_offset); |
367 |
} |
368 |
out=Paso_Pattern_alloc(PATTERN_FORMAT_DEFAULT,n-n0,range_max+index_offset,ptr,index); |
369 |
} |
370 |
} |
371 |
if (! Paso_noError()) { |
372 |
MEMFREE(ptr); |
373 |
MEMFREE(index); |
374 |
Paso_Pattern_free(out); |
375 |
} |
376 |
return out; |
377 |
} |
378 |
|
379 |
index_t* Paso_Pattern_borrowMainDiagonalPointer(Paso_Pattern* A) |
380 |
{ |
381 |
const dim_t n=A->numOutput; |
382 |
int fail=0; |
383 |
index_t i,iptr,iptr_main; |
384 |
|
385 |
if (A->main_iptr == NULL) { |
386 |
A->main_iptr=MEMALLOC(n,index_t); |
387 |
if (! Paso_checkPtr(A->main_iptr) ) { |
388 |
#pragma omp parallel |
389 |
{ |
390 |
/* identify the main diagonals */ |
391 |
#pragma omp for schedule(static) private(i,iptr,iptr_main) |
392 |
for (i = 0; i < n; ++i) { |
393 |
iptr_main=A->ptr[0]-1; |
394 |
for (iptr=A->ptr[i];iptr<A->ptr[i+1]; iptr++) { |
395 |
if (A->index[iptr]==i) { |
396 |
iptr_main=iptr; |
397 |
break; |
398 |
} |
399 |
} |
400 |
A->main_iptr[i]=iptr_main; |
401 |
if (iptr_main==A->ptr[0]-1) fail=1; |
402 |
} |
403 |
|
404 |
} |
405 |
if (fail > 0) { |
406 |
MEMFREE(A->main_iptr); |
407 |
A->main_iptr=NULL; |
408 |
} |
409 |
|
410 |
} |
411 |
} |
412 |
return A->main_iptr; |
413 |
} |