/[escript]/trunk/paso/src/SparseMatrix.c
ViewVC logotype

Contents of /trunk/paso/src/SparseMatrix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3259 - (show annotations)
Mon Oct 11 01:48:14 2010 UTC (8 years, 11 months ago) by jfenwick
File MIME type: text/plain
File size: 15115 byte(s)
Merging dudley and scons updates from branches

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: SparseMatrix */
18
19 /**************************************************************/
20
21 /* Author: Lutz Gross, l.gross@uq.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "SparseMatrix.h"
27 #include "TRILINOS.h"
28 #include "mmio.h"
29
30 /**************************************************************/
31 static void swap( index_t*, index_t*, double*, int, int );
32 static void q_sort( index_t*, index_t*, double*, int, int );
33 /*static void print_entries( index_t*, index_t*, double* );*/
34
35 static int M, N, nz;
36
37
38 /* debug: print the entries */
39 /*
40 void print_entries( index_t *r, index_t *c, double *v )
41 {
42 int i;
43
44 for( i=0; i<nz; i++ )
45 {
46 printf( "(%ld, %ld) == %e\n", (long)r[i], (long)c[i], v[i] );
47 }
48 }
49 */
50
51 /* swap function */
52 void swap( index_t *r, index_t *c, double *v, int left, int right )
53 {
54 double v_temp;
55 index_t temp;
56
57 temp = r[left];
58 r[left] = r[right];
59 r[right] = temp;
60
61 temp = c[left];
62 c[left] = c[right];
63 c[right] = temp;
64
65 v_temp = v[left];
66 v[left] = v[right];
67 v[right] = v_temp;
68 }
69
70 void q_sort( index_t *row, index_t *col, double *val, int begin, int end )
71 {
72 int l, r;
73 index_t pivot, lval;
74
75 if( end > begin )
76 {
77 pivot = N * row[begin] + col[begin];
78 l = begin + 1;
79 r = end;
80
81 while( l < r )
82 {
83 lval = N * row[l] + col[l];
84 if( lval < pivot )
85 l++;
86 else
87 {
88 r--;
89 swap( row, col, val, l, r );
90 }
91 }
92 l--;
93 swap( row, col, val, begin, l );
94 q_sort( row, col, val, begin, l );
95 q_sort( row, col, val, r, end );
96 }
97 }
98
99
100 /* allocates a SparseMatrix of type type using the given matrix pattern
101 Values are initialized by zero.
102 if patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed that the pattern is allready unrolled to match the requested block size
103 and offsets otherwise unrolling and offset adjustment will be performed.
104 */
105 Paso_SparseMatrix* Paso_SparseMatrix_alloc(Paso_SparseMatrixType type,Paso_Pattern *pattern, int row_block_size, int col_block_size, const bool_t patternIsUnrolled) {
106
107 Paso_SparseMatrix*out=NULL;
108 Paso_SparseMatrixType pattern_format_out;
109 bool_t unroll=FALSE;
110
111 if (type & MATRIX_FORMAT_SYM) {
112 Esys_setError(TYPE_ERROR,"Paso_SparseMatrix_alloc: symmetric matrix pattern are not supported.");
113 return NULL;
114 }
115
116 if (patternIsUnrolled) {
117 if (! XNOR(type & MATRIX_FORMAT_OFFSET1, pattern->type & PATTERN_FORMAT_OFFSET1) ) {
118 Esys_setError(TYPE_ERROR,"Paso_SparseMatrix_alloc: requested offset and pattern offset does not match.");
119 return NULL;
120 }
121 }
122 /* do we need to apply unrolling ? */
123 unroll
124 /* we don't like non-square blocks */
125 = (row_block_size!=col_block_size)
126 /* or any block size bigger than 3 */
127 || (col_block_size>3)
128 /* or if lock size one requested and the block size is not 1 */
129 || ((type & MATRIX_FORMAT_BLK1) && (col_block_size>1) )
130 /* offsets don't match */
131 || ( (type & MATRIX_FORMAT_OFFSET1) != ( pattern->type & PATTERN_FORMAT_OFFSET1) ) ;
132
133 pattern_format_out= (type & MATRIX_FORMAT_OFFSET1)? PATTERN_FORMAT_OFFSET1: PATTERN_FORMAT_DEFAULT;
134
135 Esys_resetError();
136 out=MEMALLOC(1,Paso_SparseMatrix);
137 if (! Esys_checkPtr(out)) {
138 out->pattern=NULL;
139 out->val=NULL;
140 out->reference_counter=1;
141 out->type=type;
142 out->solver=NULL;
143
144 /* ====== compressed sparse columns === */
145 if (type & MATRIX_FORMAT_CSC) {
146 if (unroll) {
147 if (patternIsUnrolled) {
148 out->pattern=Paso_Pattern_getReference(pattern);
149 } else {
150 out->pattern=Paso_Pattern_unrollBlocks(pattern,pattern_format_out,col_block_size,row_block_size);
151 }
152 out->row_block_size=1;
153 out->col_block_size=1;
154 } else {
155 out->pattern=Paso_Pattern_unrollBlocks(pattern,pattern_format_out,1,1);
156 out->row_block_size=row_block_size;
157 out->col_block_size=col_block_size;
158 }
159 if (Esys_noError()) {
160 out->numRows = out->pattern->numInput;
161 out->numCols = out->pattern->numOutput;
162 }
163 } else {
164 /* ====== compressed sparse row === */
165 if (unroll) {
166 if (patternIsUnrolled) {
167 out->pattern=Paso_Pattern_getReference(pattern);
168 } else {
169 out->pattern=Paso_Pattern_unrollBlocks(pattern,pattern_format_out,row_block_size,col_block_size);
170 }
171 out->row_block_size=1;
172 out->col_block_size=1;
173 } else {
174 out->pattern=Paso_Pattern_unrollBlocks(pattern,pattern_format_out,1,1);
175 out->row_block_size=row_block_size;
176 out->col_block_size=col_block_size;
177 }
178 if (Esys_noError()) {
179 out->numRows = out->pattern->numOutput;
180 out->numCols = out->pattern->numInput;
181 }
182 }
183 if (Esys_noError()) {
184 out->block_size=out->row_block_size*out->col_block_size;
185 out->len=(size_t)(out->pattern->len)*(size_t)(out->block_size);
186
187 out->val=MEMALLOC(out->len,double);
188 if (! Esys_checkPtr(out->val)) Paso_SparseMatrix_setValues(out,DBLE(0));
189 }
190 }
191 /* all done: */
192 if (Esys_noError()) {
193 return out;
194 } else {
195 Paso_SparseMatrix_free(out);
196 return NULL;
197 }
198 }
199
200 /* returns a reference to Paso_SparseMatrix in */
201
202 Paso_SparseMatrix* Paso_SparseMatrix_getReference(Paso_SparseMatrix* in) {
203 if (in!=NULL) ++(in->reference_counter);
204 return in;
205 }
206
207 /* deallocates a SparseMatrix: */
208
209 void Paso_SparseMatrix_free(Paso_SparseMatrix* in) {
210 if (in!=NULL) {
211 in->reference_counter--;
212 if (in->reference_counter<=0) {
213 Paso_Pattern_free(in->pattern);
214 MEMFREE(in->val);
215 if(in->solver!=NULL) {
216 MEMFREE(in->solver);
217 }
218 MEMFREE(in);
219 }
220 }
221 }
222
223 Paso_SparseMatrix* Paso_SparseMatrix_loadMM_toCSR( char *fileName_p )
224 {
225 index_t *col_ind = NULL;
226 index_t *row_ind = NULL;
227 index_t *row_ptr = NULL;
228 double *val = NULL;
229 FILE *fileHandle_p = NULL;
230 Paso_Pattern* mainPattern=NULL;
231 Paso_SparseMatrix *out = NULL;
232 int i, curr_row, scan_ret;
233 MM_typecode matrixCode;
234 Esys_resetError();
235
236 /* open the file */
237 fileHandle_p = fopen( fileName_p, "r" );
238 if( fileHandle_p == NULL )
239 {
240 Esys_setError(IO_ERROR, "Paso_SparseMatrix_loadMM_toCSR: Cannot read file for reading.");
241 return NULL;
242 }
243
244 /* process banner */
245 if( mm_read_banner(fileHandle_p, &matrixCode) != 0 )
246 {
247 Esys_setError(IO_ERROR, "Paso_SparseMatrix_loadMM_toCSR: Error processing MM banner.");
248 fclose( fileHandle_p );
249 return NULL;
250 }
251 if( !(mm_is_real(matrixCode) && mm_is_sparse(matrixCode) && mm_is_general(matrixCode)) )
252 {
253
254 Esys_setError(TYPE_ERROR,"Paso_SparseMatrix_loadMM_toCSR: found Matrix Market type is not supported.");
255 fclose( fileHandle_p );
256 return NULL;
257 }
258
259 /* get matrix size */
260 if( mm_read_mtx_crd_size(fileHandle_p, &M, &N, &nz) != 0 )
261 {
262 Esys_setError(IO_ERROR, "Paso_SparseMatrix_loadMM_toCSR: Could not parse matrix size");
263 fclose( fileHandle_p );
264 return NULL;
265 }
266
267 /* prepare storage */
268 col_ind = MEMALLOC( nz, index_t );
269 row_ind = MEMALLOC( nz, index_t );
270 val = MEMALLOC( nz, double );
271
272 row_ptr = MEMALLOC( (M+1), index_t );
273
274 if( col_ind == NULL || row_ind == NULL || val == NULL || row_ptr == NULL )
275 {
276 Esys_setError(MEMORY_ERROR, "Paso_SparseMatrix_loadMM_toCSR: Could not allocate memory" );
277 fclose( fileHandle_p );
278 return NULL;
279 }
280
281 /* perform actual read of elements */
282 for( i=0; i<nz; i++ )
283 {
284 scan_ret = fscanf( fileHandle_p, "%d %d %le\n", &row_ind[i], &col_ind[i], &val[i] );
285 if (scan_ret!=3)
286 {
287 MEMFREE( val );
288 MEMFREE( row_ind );
289 MEMFREE( col_ind );
290 MEMFREE( row_ptr );
291 fclose(fileHandle_p);
292 return NULL;
293 }
294 row_ind[i]--;
295 col_ind[i]--;
296 }
297 fclose( fileHandle_p );
298
299 /* sort the entries */
300 q_sort( row_ind, col_ind, val, 0, nz );
301
302 /* setup row_ptr */
303 curr_row = 0;
304 for( i=0; (i<nz && curr_row<M); curr_row++ )
305 {
306 while( row_ind[i] != curr_row )
307 i++;
308 row_ptr[curr_row] = i;
309 }
310 row_ptr[M] = nz;
311
312 mainPattern=Paso_Pattern_alloc(PATTERN_FORMAT_DEFAULT,M,N,row_ptr,col_ind);
313 out = Paso_SparseMatrix_alloc(MATRIX_FORMAT_DEFAULT, mainPattern, 1, 1, TRUE);
314 /* copy values and cleanup temps */
315 for( i=0; i<nz; i++ ) out->val[i] = val[i];
316
317 Paso_Pattern_free(mainPattern);
318 MEMFREE( val );
319 MEMFREE( row_ind );
320 return out;
321 }
322
323
324 void Paso_SparseMatrix_saveMM(Paso_SparseMatrix * A_p, char * fileName_p) {
325 FILE * fileHandle_p = NULL;
326 dim_t N,M,i, iptr_ij;
327 MM_typecode matcode;
328
329 if (A_p->col_block_size !=A_p->row_block_size) {
330 Esys_setError(TYPE_ERROR, "Paso_SparseMatrix_saveMM: currently only square blocks are supported.");
331 return;
332 }
333 if (A_p->row_block_size>3) {
334 Esys_setError(TYPE_ERROR,"Paso_SparseMatrix_saveMM: currently only block size 3 is supported.\n");
335 return;
336 }
337
338 if (A_p->type & MATRIX_FORMAT_SYM) {
339 Esys_setError(TYPE_ERROR,"Paso_SparseMatrix_saveMM does not support symmetric storage scheme");
340 return;
341 }
342
343 /* open the file */
344 fileHandle_p = fopen(fileName_p, "w");
345 if (fileHandle_p==NULL) {
346 Esys_setError(IO_ERROR,"file could not be opened for writing");
347 return;
348 }
349
350 if (A_p->type & MATRIX_FORMAT_CSC) {
351 Esys_setError(TYPE_ERROR,"Paso_SparseMatrix_saveMM does not support CSC yet.");
352 } else {
353 mm_initialize_typecode(&matcode);
354 mm_set_matrix(&matcode);
355 mm_set_coordinate(&matcode);
356 mm_set_real(&matcode);
357
358 N= A_p->numRows;
359 M= A_p->numCols;
360 mm_write_banner(fileHandle_p, matcode);
361 mm_write_mtx_crd_size(fileHandle_p, N, M, A_p->pattern->ptr[N]);
362 if(A_p->row_block_size==1)
363 for (i=0; i<N; i++)
364 for (iptr_ij=A_p->pattern->ptr[i];iptr_ij<A_p->pattern->ptr[i+1]; ++iptr_ij) {
365 fprintf(fileHandle_p, "%d %d %25.15e\n", i+1, A_p->pattern->index[iptr_ij]+1, A_p->val[iptr_ij]);
366 }
367
368 if(A_p->row_block_size==2)
369 for (i=0; i<N; i++) {
370 for (iptr_ij=A_p->pattern->ptr[i];iptr_ij<A_p->pattern->ptr[i+1]; ++iptr_ij) {
371 fprintf(fileHandle_p, "%d %d %25.15e\n", i*2+1, 2*(A_p->pattern->index[iptr_ij])+1, A_p->val[iptr_ij*4]);
372 fprintf(fileHandle_p, "%d %d %25.15e\n", i*2+1, 2*(A_p->pattern->index[iptr_ij])+1+1, A_p->val[iptr_ij*4+2]);
373 fprintf(fileHandle_p, "%d %d %25.15e\n", i*2+1+1, 2*(A_p->pattern->index[iptr_ij])+1, A_p->val[iptr_ij*4+1]);
374 fprintf(fileHandle_p, "%d %d %25.15e\n", i*2+1+1, 2*(A_p->pattern->index[iptr_ij])+1+1, A_p->val[iptr_ij*4+3]);
375 }
376 }
377
378 if(A_p->row_block_size==3)
379 for (i=0; i<N; i++) {
380 for (iptr_ij=A_p->pattern->ptr[i];iptr_ij<A_p->pattern->ptr[i+1]; ++iptr_ij) {
381 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+1, 3*(A_p->pattern->index[iptr_ij])+1, A_p->val[iptr_ij*9]);
382 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+1, 3*(A_p->pattern->index[iptr_ij])+1+1, A_p->val[iptr_ij*9+3]);
383 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+1, 3*(A_p->pattern->index[iptr_ij])+2+1, A_p->val[iptr_ij*9+6]);
384 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+1+1, 3*(A_p->pattern->index[iptr_ij])+1, A_p->val[iptr_ij*9+1]);
385 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+1+1, 3*(A_p->pattern->index[iptr_ij])+1+1, A_p->val[iptr_ij*9+4]);
386 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+1+1, 3*(A_p->pattern->index[iptr_ij])+2+1, A_p->val[iptr_ij*9+7]);
387 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+2+1, 3*(A_p->pattern->index[iptr_ij])+1, A_p->val[iptr_ij*9+2]);
388 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+2+1, 3*(A_p->pattern->index[iptr_ij])+1+1, A_p->val[iptr_ij*9+5]);
389 fprintf(fileHandle_p, "%d %d %25.15e\n", i*3+2+1, 3*(A_p->pattern->index[iptr_ij])+2+1, A_p->val[iptr_ij*9+8]);
390 }
391 }
392
393 }
394
395 /* close the file */
396 fclose(fileHandle_p);
397
398 return;
399 }
400
401 index_t* Paso_SparseMatrix_borrowMainDiagonalPointer(Paso_SparseMatrix * A_p)
402 {
403 return Paso_Pattern_borrowMainDiagonalPointer(A_p->pattern);
404 }
405
406 dim_t Paso_SparseMatrix_getNumColors(Paso_SparseMatrix* A_p)
407 {
408 return Paso_Pattern_getNumColors(A_p->pattern);
409 }
410 index_t* Paso_SparseMatrix_borrowColoringPointer(Paso_SparseMatrix* A_p)
411 {
412 return Paso_Pattern_borrowColoringPointer(A_p->pattern);
413 }
414
415 Paso_SparseMatrix* Paso_SparseMatrix_unroll(Paso_SparseMatrix* A) {
416 Paso_SparseMatrix *out = NULL;
417 /*Paso_Pattern* mainPattern=NULL;*/
418 int i;
419 dim_t block_size=A->row_block_size;
420 index_t iptr, optr1=0, optr2=0, optr3=0;
421
422 /*mainPattern= Paso_Pattern_unrollBlocks(A->pattern, PATTERN_FORMAT_DEFAULT, A->row_block_size,A->col_block_size);*/
423 out = Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1, A->pattern, A->row_block_size,A->col_block_size, FALSE);
424
425 if (block_size==1) {
426 #pragma omp parallel for private(i,iptr) schedule(static)
427 for (i=0;i<A->numRows;++i) {
428 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
429 out->val[iptr]=A->val[iptr];
430 }
431 }
432 } else if (block_size==2) {
433 #pragma omp parallel for private(i,iptr,optr1,optr2) schedule(static)
434 for (i=0;i<A->numRows;++i) {
435 optr1=out->pattern->ptr[i*block_size];
436 optr2=out->pattern->ptr[i*block_size+1];
437 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
438 out->val[optr1]=A->val[iptr*block_size*block_size];
439 optr1++;
440 out->val[optr1]=A->val[iptr*block_size*block_size+2];
441 optr1++;
442 out->val[optr2]=A->val[iptr*block_size*block_size+1];
443 optr2++;
444 out->val[optr2]=A->val[iptr*block_size*block_size+3];
445 optr2++;
446 }
447 }
448
449 } else if (block_size==3) {
450 #pragma omp parallel for private(i,iptr,optr1,optr2,optr3) schedule(static)
451 for (i=0;i<A->numRows;++i) {
452 optr1=out->pattern->ptr[i*block_size];
453 optr2=out->pattern->ptr[i*block_size+1];
454 optr3=out->pattern->ptr[i*block_size+2];
455 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
456 out->val[optr1]=A->val[iptr*block_size*block_size];
457 optr1++;
458 out->val[optr1]=A->val[iptr*block_size*block_size+3];
459 optr1++;
460 out->val[optr1]=A->val[iptr*block_size*block_size+6];
461 optr1++;
462 out->val[optr2]=A->val[iptr*block_size*block_size+1];
463 optr2++;
464 out->val[optr2]=A->val[iptr*block_size*block_size+4];
465 optr2++;
466 out->val[optr2]=A->val[iptr*block_size*block_size+7];
467 optr2++;
468 out->val[optr3]=A->val[iptr*block_size*block_size+2];
469 optr3++;
470 out->val[optr3]=A->val[iptr*block_size*block_size+5];
471 optr3++;
472 out->val[optr3]=A->val[iptr*block_size*block_size+8];
473 optr3++;
474 }
475 }
476
477
478 }
479
480 return out;
481 }

  ViewVC Help
Powered by ViewVC 1.1.26