/[escript]/trunk/paso/src/Pattern.c
ViewVC logotype

Annotation of /trunk/paso/src/Pattern.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2551 - (hide annotations)
Thu Jul 23 09:19:15 2009 UTC (10 years, 3 months ago) by gross
File MIME type: text/plain
File size: 11236 byte(s)
a problem with the sparse matrix unrolling fixed.
1 ksteube 1313
2     /*******************************************************
3 ksteube 1811 *
4 jfenwick 2548 * Copyright (c) 2003-2009 by University of Queensland
5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 ksteube 1313
14 ksteube 1811
15 phornby 1916
16 ksteube 1313 /**************************************************************/
17    
18     /* Paso: Pattern */
19    
20     /**************************************************************/
21    
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "Pattern.h"
28    
29     /**************************************************************/
30    
31     /* allocates a Pattern */
32    
33 gross 2551 Paso_Pattern* Paso_Pattern_alloc(int type, dim_t numOutput, dim_t numInput, index_t* ptr, index_t* index) {
34 artak 1976 Paso_Pattern *out=NULL;
35 ksteube 1313 index_t index_offset=(type & PATTERN_FORMAT_OFFSET1 ? 1:0);
36     index_t loc_min_index,loc_max_index,min_index=index_offset,max_index=index_offset-1;
37 artak 1976 dim_t i;
38 ksteube 1313 Paso_resetError();
39    
40     if (type & PATTERN_FORMAT_SYM) {
41     Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: symmetric matrix pattern is not supported yet");
42     return NULL;
43     }
44     if (ptr!=NULL && index != NULL) {
45     #pragma omp parallel private(loc_min_index,loc_max_index,i)
46     {
47     loc_min_index=index_offset;
48     loc_max_index=index_offset-1;
49     if (type & PATTERN_FORMAT_OFFSET1) {
50     #pragma omp for schedule(static)
51     for (i=0;i<numOutput;++i) {
52     if (ptr[i]<ptr[i+1]) {
53     #ifdef USE_QSORTG
54     qsortG(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex);
55     #else
56     qsort(&(index[ptr[i]-1]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex);
57     #endif
58     loc_min_index=MIN(loc_min_index,index[ptr[i]-1]);
59     loc_max_index=MAX(loc_max_index,index[ptr[i+1]-2]);
60     }
61     }
62     } else {
63     #pragma omp for schedule(static)
64     for (i=0;i<numOutput;++i) {
65     if (ptr[i]<ptr[i+1]) {
66     #ifdef USE_QSORTG
67     qsortG(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex);
68     #else
69     qsort(&(index[ptr[i]]),(size_t)(ptr[i+1]-ptr[i]),sizeof(index_t),Paso_comparIndex);
70     #endif
71     loc_min_index=MIN(loc_min_index,index[ptr[i]]);
72     loc_max_index=MAX(loc_max_index,index[ptr[i+1]-1]);
73     }
74     }
75     }
76     #pragma omp critical
77     {
78     min_index=MIN(loc_min_index,min_index);
79     max_index=MAX(loc_max_index,max_index);
80     }
81     }
82 gross 1736 if ( (min_index<index_offset) || (max_index>=numInput+index_offset) ) {
83     Paso_setError(TYPE_ERROR,"Paso_Pattern_alloc: Pattern index out of range.");
84 ksteube 1313 return NULL;
85     }
86     }
87     out=MEMALLOC(1,Paso_Pattern);
88     if (! Paso_checkPtr(out)) {
89     out->type=type;
90     out->reference_counter=1;
91     out->numOutput=numOutput;
92 gross 1736 out->numInput=numInput;
93 ksteube 1313 out->ptr=ptr;
94     out->index=index;
95 gross 2551
96 ksteube 1313 if (out->ptr == NULL) {
97     out->len=0;
98     } else {
99     out->len=out->ptr[out->numOutput] - index_offset;
100     }
101     }
102     #ifdef Paso_TRACE
103 artak 2380 printf("Paso_Pattern_alloc: system matrix pattern has been allocated.\n");
104 ksteube 1313 #endif
105     return out;
106     }
107    
108     /* returns a reference to in */
109    
110     Paso_Pattern* Paso_Pattern_getReference(Paso_Pattern* in) {
111     if (in!=NULL) {
112     ++(in->reference_counter);
113     }
114     return in;
115     }
116    
117     /* deallocates a Pattern: */
118    
119     void Paso_Pattern_free(Paso_Pattern* in) {
120     if (in!=NULL) {
121     in->reference_counter--;
122     if (in->reference_counter<=0) {
123     MEMFREE(in->ptr);
124     MEMFREE(in->index);
125     MEMFREE(in);
126     #ifdef Paso_TRACE
127     printf("Paso_Pattern_free: pattern as been deallocated.\n");
128     #endif
129     }
130     }
131     }
132     /* *************************************************************/
133    
134     /* some routines which help to get the matrix pattern from elements: */
135    
136     /* this routine is used by qsort called in Paso_Pattern_alloc */
137    
138     int Paso_comparIndex(const void *index1,const void *index2){
139     index_t Iindex1,Iindex2;
140     Iindex1=*(index_t*)index1;
141     Iindex2=*(index_t*)index2;
142     if (Iindex1<Iindex2) {
143     return -1;
144     } else {
145     if (Iindex1>Iindex2) {
146     return 1;
147     } else {
148     return 0;
149     }
150     }
151     }
152    
153     bool_t Paso_Pattern_isEmpty(Paso_Pattern* in) {
154     if (in != NULL) {
155     if ((in->ptr != NULL) && (in->index != NULL)) return FALSE;
156     }
157     return TRUE;
158     }
159 artak 1881
160    
161     /* computes the pattern coming from matrix-matrix multiplication
162     *
163     **/
164    
165     Paso_Pattern* Paso_Pattern_multiply(int type, Paso_Pattern* A, Paso_Pattern* B) {
166     Paso_Pattern*out=NULL;
167     index_t iptrA,iptrB;
168     dim_t i,j,k;
169     Paso_IndexList* index_list=NULL;
170    
171     index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList);
172     if (! Paso_checkPtr(index_list)) {
173 artak 2107 #pragma omp parallel for private(i) schedule(static)
174 artak 1881 for(i=0;i<A->numOutput;++i) {
175     index_list[i].extension=NULL;
176     index_list[i].n=0;
177     }
178 artak 2107 }
179 artak 1881
180 artak 2107 #pragma omp parallel for private(i,iptrA,j,iptrB,k) schedule(static)
181 artak 1881 for(i = 0; i < A->numOutput; i++) {
182     for(iptrA = A->ptr[i]; iptrA < A->ptr[i+1]; ++iptrA) {
183     j = A->index[iptrA];
184     for(iptrB = B->ptr[j]; iptrB < B->ptr[j+1]; ++iptrB) {
185     k = B->index[iptrB];
186 phornby 1923 Paso_IndexList_insertIndex(&(index_list[i]),k);
187 artak 1881 }
188     }
189     }
190    
191 artak 1931 out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,B->numInput,0);
192 artak 1881
193     #ifdef Paso_TRACE
194     printf("Paso_Pattern_multipy: new pattern has been allocated.\n");
195     #endif
196    
197     /* clean up */
198     if (index_list!=NULL) {
199 artak 2107 #pragma omp parallel for private(i) schedule(static)
200 artak 1881 for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension);
201     }
202     TMPMEMFREE(index_list);
203    
204     return out;
205     }
206    
207    
208    
209     /*
210     * Computes the pattern of C = A binary operation B for CSR matrices A,B
211     *
212     * Note: we do not check whether A_ij(op)B_ij=0
213     *
214     */
215     Paso_Pattern* Paso_Pattern_binop(int type, Paso_Pattern* A, Paso_Pattern* B) {
216 artak 1976 Paso_Pattern *out=NULL;
217     index_t iptrA,iptrB;
218 artak 1890 dim_t i,j,k;
219 artak 1881
220     Paso_IndexList* index_list=NULL;
221    
222     index_list=TMPMEMALLOC(A->numOutput,Paso_IndexList);
223     if (! Paso_checkPtr(index_list)) {
224 artak 2107 #pragma omp parallel for private(i) schedule(static)
225 artak 1881 for(i=0;i<A->numOutput;++i) {
226     index_list[i].extension=NULL;
227     index_list[i].n=0;
228     }
229 artak 2107 }
230    
231     #pragma omp parallel for private(i,iptrA,j,iptrB,k) schedule(static)
232 artak 1902 for(i = 0; i < B->numOutput; i++){
233 artak 1881 iptrA = A->ptr[i],
234     iptrB = B->ptr[i];
235 artak 1890
236 artak 1881 while (iptrA < A->ptr[i+1] && iptrB < B->ptr[i+1]) {
237     j = A->index[iptrA];
238     k = B->index[iptrB];
239     if (j<k) {
240 phornby 1923 Paso_IndexList_insertIndex(&(index_list[i]),j);
241 artak 1881 iptrA++;
242     } else if (j>k) {
243 phornby 1923 Paso_IndexList_insertIndex(&(index_list[i]),k);
244 artak 1881 iptrB++;
245     } else if (j==k) {
246 phornby 1923 Paso_IndexList_insertIndex(&(index_list[i]),j);
247 artak 1881 iptrB++;
248     iptrA++;
249     }
250     }
251     while(iptrA < A->ptr[i+1]) {
252     j = A->index[iptrA];
253 phornby 1923 Paso_IndexList_insertIndex(&(index_list[i]),j);
254 artak 1881 iptrA++;
255     }
256     while(iptrB < B->ptr[i+1]) {
257     k = B->index[iptrB];
258 phornby 1923 Paso_IndexList_insertIndex(&(index_list[i]),k);
259 artak 1881 iptrB++;
260     }
261     }
262    
263 artak 1931 out=Paso_IndexList_createPattern(0, A->numOutput,index_list,0,A->numInput,0);
264 artak 1881
265     #ifdef Paso_TRACE
266 artak 1892 printf("Paso_Pattern_binop: new pattern has been allocated.\n");
267 artak 1881 #endif
268    
269     /* clean up */
270     if (index_list!=NULL) {
271 artak 2107 #pragma omp parallel for private(i) schedule(static)
272 artak 1881 for(i=0;i<A->numOutput;++i) Paso_IndexList_free(index_list[i].extension);
273     }
274     TMPMEMFREE(index_list);
275    
276     return out;
277     }
278    
279     /* inserts row index row into the Paso_IndexList in if it does not exist */
280    
281     void Paso_IndexList_insertIndex(Paso_IndexList* in, index_t index) {
282     dim_t i;
283     /* is index in in? */
284     for (i=0;i<in->n;i++) {
285     if (in->index[i]==index) return;
286     }
287     /* index could not be found */
288     if (in->n==INDEXLIST_LENGTH) {
289     /* if in->index is full check the extension */
290     if (in->extension==NULL) {
291     in->extension=TMPMEMALLOC(1,Paso_IndexList);
292     if (Paso_checkPtr(in->extension)) return;
293     in->extension->n=0;
294     in->extension->extension=NULL;
295     }
296     Paso_IndexList_insertIndex(in->extension,index);
297     } else {
298     /* insert index into in->index*/
299     in->index[in->n]=index;
300     in->n++;
301     }
302     }
303    
304     /* counts the number of row indices in the Paso_IndexList in */
305    
306     dim_t Paso_IndexList_count(Paso_IndexList* in, index_t range_min,index_t range_max) {
307     dim_t i;
308     dim_t out=0;
309     register index_t itmp;
310     if (in==NULL) {
311     return 0;
312     } else {
313     for (i=0;i<in->n;i++) {
314     itmp=in->index[i];
315     if ((itmp>=range_min) && (range_max>itmp)) ++out;
316     }
317     return out+Paso_IndexList_count(in->extension, range_min,range_max);
318     }
319     }
320    
321     /* count the number of row indices in the Paso_IndexList in */
322    
323     void Paso_IndexList_toArray(Paso_IndexList* in, index_t* array, index_t range_min,index_t range_max, index_t index_offset) {
324     dim_t i, ptr;
325     register index_t itmp;
326     if (in!=NULL) {
327     ptr=0;
328     for (i=0;i<in->n;i++) {
329     itmp=in->index[i];
330     if ((itmp>=range_min) && (range_max>itmp)) {
331     array[ptr]=itmp+index_offset;
332     ptr++;
333     }
334    
335     }
336     Paso_IndexList_toArray(in->extension,&(array[ptr]), range_min, range_max, index_offset);
337     }
338     }
339    
340     /* deallocates the Paso_IndexList in by recursive calls */
341    
342     void Paso_IndexList_free(Paso_IndexList* in) {
343     if (in!=NULL) {
344     Paso_IndexList_free(in->extension);
345     TMPMEMFREE(in);
346     }
347     }
348    
349     /* creates a Paso_pattern from a range of indices */
350     Paso_Pattern* Paso_IndexList_createPattern(dim_t n0, dim_t n,Paso_IndexList* index_list,index_t range_min,index_t range_max,index_t index_offset)
351     {
352     dim_t *ptr=NULL;
353     register dim_t s,i,itmp;
354     index_t *index=NULL;
355     Paso_Pattern* out=NULL;
356    
357     ptr=MEMALLOC(n+1-n0,index_t);
358     if (! Paso_checkPtr(ptr) ) {
359     /* get the number of connections per row */
360 artak 2107 #pragma omp parallel for private(i) schedule(static)
361 artak 1881 for(i=n0;i<n;++i) {
362     ptr[i-n0]=Paso_IndexList_count(&index_list[i],range_min,range_max);
363     }
364     /* accumulate ptr */
365     s=0;
366     for(i=n0;i<n;++i) {
367     itmp=ptr[i-n0];
368     ptr[i-n0]=s;
369     s+=itmp;
370     }
371     ptr[n-n0]=s;
372     /* fill index */
373     index=MEMALLOC(ptr[n-n0],index_t);
374     if (! Paso_checkPtr(index)) {
375 artak 2107 #pragma omp parallel for private(i) schedule(static)
376 artak 1881 for(i=n0;i<n;++i) {
377     Paso_IndexList_toArray(&index_list[i],&index[ptr[i-n0]],range_min,range_max,index_offset);
378     }
379 gross 2551 out=Paso_Pattern_alloc(PATTERN_FORMAT_DEFAULT,n-n0,range_max+index_offset,ptr,index);
380 artak 1881 }
381     }
382     if (! Paso_noError()) {
383     MEMFREE(ptr);
384     MEMFREE(index);
385     Paso_Pattern_free(out);
386     }
387     return out;
388     }

  ViewVC Help
Powered by ViewVC 1.1.26