/[escript]/branches/doubleplusgood/paso/src/SystemMatrix_mergeMainAndCouple.cpp
ViewVC logotype

Contents of /branches/doubleplusgood/paso/src/SystemMatrix_mergeMainAndCouple.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4275 - (show annotations)
Tue Mar 5 09:32:52 2013 UTC (6 years, 1 month ago) by jfenwick
File size: 11279 byte(s)
some experiments
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /**********************************************************************************/
18 /* Paso: SystemMatrix */
19 /* */
20 /* Merge the MainBlock and CoupleBlock in the matrix */
21 /* Input: SystemMatrix A */
22 /* Output: */
23 /* p_ptr: the pointer to a vector of locations that */
24 /* start a row. */
25 /* p_idx: the pointer to the column indices for each */
26 /* of the rows, ordered by rows. */
27 /* p_val: the pointer to the data corresponding */
28 /* directly to the column entries in p_idx. */
29 /**********************************************************************************/
30
31 /* Copyrights by ACcESS Australia 2003 */
32 /* Author: Lin Gao, l.gao@uq.edu.au */
33
34 /**********************************************************************************/
35
36 #include "Paso.h"
37 #include "SystemMatrix.h"
38
39 #ifdef _OPENMP
40 #include <omp.h>
41 #endif
42
43
44 void Paso_SystemMatrix_mergeMainAndCouple(Paso_SystemMatrix* A, index_t** p_ptr, index_t** p_idx, double** p_val){
45 if (A->type & MATRIX_FORMAT_DEFAULT) {
46 Paso_SystemMatrix_mergeMainAndCouple_CSR_OFFSET0(A, p_ptr, p_idx, p_val);
47 } else if (A->type & MATRIX_FORMAT_CSC) {
48 /* CSC part is for PASTIX */
49 if (A->type & (MATRIX_FORMAT_OFFSET1 + MATRIX_FORMAT_BLK1)) {
50 Paso_SystemMatrix_mergeMainAndCouple_CSC_OFFSET1(A, p_ptr, p_idx, p_val);
51 } else {
52 Esys_setError(SYSTEM_ERROR,"Paso_SystemMatrix_mergeMainAndCouple: CSC with index 0 or block size larger than 1 is not supported.");
53 }
54 } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
55 Esys_setError(SYSTEM_ERROR,"Paso_SystemMatrix_mergeMainAndCouple: TRILINOS is not supported.");
56 } else {
57 Esys_setError(SYSTEM_ERROR,"Paso_SystemMatrix_mergeMainAndCouple: CRS is not supported.");
58 }
59 }
60
61
62 void Paso_SystemMatrix_mergeMainAndCouple_CSR_OFFSET0(Paso_SystemMatrix* A, index_t** p_ptr, index_t** p_idx, double** p_val) {
63
64 index_t i, j, i_ub, j_lb, j_ub, row, num_vals, main_num_vals;
65 index_t couple_num_vals, rank, row_offset, ij_ptr=0, idx=0, idx2=0;
66 index_t main_num_rows, couple_num_rows, col_offset, num_cols;
67 index_t *main_ptr, *main_idx, *couple_ptr, *couple_idx, *global_id;
68 double *main_val, *couple_val, *rows=NULL;
69 Paso_Coupler* coupler=NULL;
70
71 if (A->mainBlock->col_block_size!=1 ||
72 A->mainBlock->row_block_size!=1 ||
73 A->col_coupleBlock->col_block_size!=1 ||
74 A->col_coupleBlock->row_block_size!=1) {
75 Paso_SystemMatrix_mergeMainAndCouple_CSR_OFFSET0_Block(A, p_ptr, p_idx, p_val);
76 return;
77 }
78
79 if (A->mpi_info->size == 1) {
80 /* initialization */
81 main_num_rows=A->mainBlock->numRows;
82 main_ptr=A->mainBlock->pattern->ptr;
83 main_idx=A->mainBlock->pattern->index;
84 main_val=A->mainBlock->val;
85
86 /* allocate arrays "ptr", "index" and "val" */
87 num_vals = main_ptr[main_num_rows]-1;
88 *p_ptr = new index_t[main_num_rows+1];
89 *p_idx = new index_t[num_vals];
90 *p_val = new double[num_vals];
91
92 #pragma omp for schedule(static) private(i,ij_ptr,j_lb,j_ub)
93 for (i=0; i<main_num_rows; i++) {
94 j_lb = main_ptr[i];
95 j_ub = main_ptr[i+1];
96 (*p_ptr)[i] = j_lb;
97 for (ij_ptr=j_lb; ij_ptr<j_ub; ++ij_ptr) {
98 (*p_idx)[ij_ptr] = main_idx[ij_ptr];
99 (*p_val)[ij_ptr] = main_val[ij_ptr];
100 }
101 }
102 (*p_ptr)[main_num_rows] = main_ptr[main_num_rows];
103 return;
104 }
105
106 main_num_rows=A->mainBlock->numRows;
107 couple_num_rows=A->col_coupleBlock->numRows;
108 rank = A->mpi_info->rank;
109
110 if (main_num_rows != couple_num_rows) {
111 Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_mergeMainAndCouple_CSR_OFFSET0: number of rows do not match.");
112 return;
113 }
114
115 if (A->global_id == NULL) {
116 /* prepare for global coordinates in colCoupleBlock, the results are
117 in coupler->recv_buffer */
118 rows=new double[main_num_rows];
119 row_offset = A->row_distribution->first_component[rank];
120 #pragma omp parallel for private(i) schedule(static)
121 for (i=0; i<main_num_rows; ++i) rows[i]=row_offset+i;
122 coupler= Paso_Coupler_alloc(A->col_coupler->connector, 1);
123 Paso_Coupler_startCollect(coupler, rows);
124 }
125
126 /* initialization, including allocate arrays "ptr", "index" and "val" */
127 main_ptr=A->mainBlock->pattern->ptr;
128 main_idx=A->mainBlock->pattern->index;
129 main_val=A->mainBlock->val;
130 couple_ptr=A->col_coupleBlock->pattern->ptr;
131 couple_idx=A->col_coupleBlock->pattern->index;
132 couple_val=A->col_coupleBlock->val;
133 col_offset = A->col_distribution->first_component[rank];
134 main_num_vals = main_ptr[main_num_rows]-main_ptr[0];
135 couple_num_vals = couple_ptr[couple_num_rows]-couple_ptr[0];
136 num_vals = main_num_vals + couple_num_vals;
137 *p_ptr = new index_t[main_num_rows+1];
138 *p_idx = new index_t[num_vals];
139 *p_val = new double[num_vals];
140 (*p_ptr)[0] = 0;
141 i = 0;
142 j = 0;
143
144 if (A->global_id == NULL) {
145 Paso_Coupler_finishCollect(coupler);
146 delete[] rows;
147 num_cols = A->col_coupleBlock->numCols;
148 global_id = new index_t[num_cols];
149 #pragma omp parallel for private(i) schedule(static)
150 for (i=0; i<num_cols; ++i)
151 global_id[i] = coupler->recv_buffer[i];
152 A->global_id = global_id;
153 Paso_Coupler_free(coupler);
154 }
155 global_id = A->global_id;
156
157 /* merge mainBlock and col_coupleBlock */
158 for (row=1; row<=main_num_rows; row++) {
159 i_ub = main_ptr[row];
160 j_ub = couple_ptr[row];
161 while (i < i_ub || j < j_ub) {
162 ij_ptr = i + j;
163 if (j < j_ub) {
164 idx = global_id[couple_idx[j]];
165 }
166 if (i < i_ub) {
167 idx2 = main_idx[i] + col_offset;
168 }
169 if (j == j_ub || (i < i_ub && idx2 < idx)){
170 (*p_idx)[ij_ptr] = idx2;
171 (*p_val)[ij_ptr] = main_val[i];
172 i++;
173 } else {
174 (*p_idx)[ij_ptr] = idx;
175 (*p_val)[ij_ptr] = couple_val[j];
176 j++;
177 }
178 }
179 (*p_ptr)[row] = ij_ptr+1;
180 }
181
182 }
183
184
185 void Paso_SystemMatrix_mergeMainAndCouple_CSR_OFFSET0_Block(Paso_SystemMatrix* A, index_t** p_ptr, index_t** p_idx, double** p_val) {
186
187 index_t i, j, i_ub, j_lb, j_ub, row, num_vals, main_num_vals, ib;
188 index_t couple_num_vals, rank, row_offset, ij_ptr=0, idx=0, idx2=0;
189 index_t main_num_rows, couple_num_rows, col_offset, num_cols, block_size;
190 index_t *main_ptr, *main_idx, *couple_ptr, *couple_idx, *global_id;
191 double *main_val, *couple_val, *rows=NULL;
192 Paso_Coupler* coupler=NULL;
193
194 block_size = A->block_size;
195 if (A->mpi_info->size == 1) {
196 /* initialization */
197 main_num_rows=A->mainBlock->numRows;
198 main_ptr=A->mainBlock->pattern->ptr;
199 main_idx=A->mainBlock->pattern->index;
200 main_val=A->mainBlock->val;
201
202 /* allocate arrays "ptr", "index" and "val" */
203 num_vals = main_ptr[main_num_rows]-1;
204 *p_ptr = new index_t[main_num_rows+1];
205 *p_idx = new index_t[num_vals];
206 *p_val = new double[num_vals * block_size];
207
208 #pragma omp for schedule(static) private(i,ij_ptr,j_lb,j_ub, ib)
209 for (i=0; i<main_num_rows; i++) {
210 j_lb = main_ptr[i];
211 j_ub = main_ptr[i+1];
212 (*p_ptr)[i] = j_lb;
213 for (ij_ptr=j_lb; ij_ptr<j_ub; ij_ptr++) {
214 (*p_idx)[ij_ptr] = main_idx[ij_ptr];
215 for (ib=0; ib<block_size; ib++)
216 (*p_val)[ij_ptr*block_size+ib] = main_val[ij_ptr*block_size+ib];
217 }
218 }
219 (*p_ptr)[main_num_rows] = main_ptr[main_num_rows];
220 return;
221 }
222
223 main_num_rows=A->mainBlock->numRows;
224 couple_num_rows=A->col_coupleBlock->numRows;
225 rank = A->mpi_info->rank;
226
227 if (main_num_rows != couple_num_rows) {
228 Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_mergeMainAndCouple_CSR_OFFSET0: number of rows do not match.");
229 return;
230 }
231
232 if (A->global_id == NULL) {
233 /* prepare for global coordinates in colCoupleBlock, the results are
234 in coupler->recv_buffer */
235 rows=new double[main_num_rows];
236 row_offset = A->row_distribution->first_component[rank];
237 #pragma omp parallel for private(i) schedule(static)
238 for (i=0; i<main_num_rows; ++i) rows[i]=row_offset+i;
239 coupler= Paso_Coupler_alloc(A->col_coupler->connector, 1);
240 Paso_Coupler_startCollect(coupler, rows);
241 }
242
243 /* initalization, including allocate arrays "ptr", "index" and "val" */
244 main_ptr=A->mainBlock->pattern->ptr;
245 main_idx=A->mainBlock->pattern->index;
246 main_val=A->mainBlock->val;
247 couple_ptr=A->col_coupleBlock->pattern->ptr;
248 couple_idx=A->col_coupleBlock->pattern->index;
249 couple_val=A->col_coupleBlock->val;
250 col_offset = A->col_distribution->first_component[rank];
251 main_num_vals = main_ptr[main_num_rows]-main_ptr[0];
252 couple_num_vals = couple_ptr[couple_num_rows]-couple_ptr[0];
253 num_vals = main_num_vals + couple_num_vals;
254 *p_ptr = new index_t[main_num_rows+1];
255 *p_idx = new index_t[num_vals];
256 *p_val = new double[num_vals*block_size];
257 (*p_ptr)[0] = 0;
258 i = 0;
259 j = 0;
260
261 if (A->global_id == NULL) {
262 Paso_Coupler_finishCollect(coupler);
263 delete[] rows;
264 num_cols = A->col_coupleBlock->numCols;
265 global_id = new index_t[num_cols];
266 #pragma omp parallel for private(i) schedule(static)
267 for (i=0; i<num_cols; ++i)
268 global_id[i] = coupler->recv_buffer[i];
269 A->global_id = global_id;
270 Paso_Coupler_free(coupler);
271 }
272 global_id = A->global_id;
273
274 /* merge mainBlock and col_coupleBlock */
275 for (row=1; row<=main_num_rows; row++) {
276 i_ub = main_ptr[row];
277 j_ub = couple_ptr[row];
278 while (i < i_ub || j < j_ub) {
279 ij_ptr = i + j;
280 if (j < j_ub) {
281 idx = global_id[couple_idx[j]];
282 }
283 if (i < i_ub) {
284 idx2 = main_idx[i] + col_offset;
285 }
286 if (j == j_ub || (i < i_ub && idx2 < idx)){
287 (*p_idx)[ij_ptr] = idx2;
288 for (ib=0; ib<block_size; ib++)
289 (*p_val)[ij_ptr*block_size+ib] = main_val[i*block_size+ib];
290 i++;
291 } else {
292 (*p_idx)[ij_ptr] = idx;
293 for (ib=0; ib<block_size; ib++)
294 (*p_val)[ij_ptr*block_size+ib] = couple_val[j*block_size+ib];
295 j++;
296 }
297 }
298 (*p_ptr)[row] = ij_ptr+1;
299 }
300
301 }
302
303
304 void Paso_SystemMatrix_mergeMainAndCouple_CSC_OFFSET1(Paso_SystemMatrix* A, index_t** p_ptr, index_t** p_idx, double** p_val) {
305 /*
306 index_t i, j, i_ub, j_ub, col, num_vals, main_num_vals;
307 index_t couple_num_vals, idx, rank, main_offset;
308 index_t main_num_cols=A->mainBlock->pattern->numOutput;
309 index_t couple_num_cols=A->col_coupleBlock->pattern->numOutput;
310 index_t *main_ptr=A->mainBlock->pattern->ptr;
311 index_t *main_idx=A->mainBlock->pattern->index;
312 double *main_val=A->mainBlock->val;
313 index_t *couple_ptr=A->col_coupleBlock->pattern->ptr;
314 index_t *couple_idx=A->col_coupleBlock->pattern->index;
315 double *couple_val=A->col_coupleBlock->val;
316 index_t *couple_global=NULL;
317 Paso_Coupler* coupler=A->col_coupler;
318 */
319 }
320
321

  ViewVC Help
Powered by ViewVC 1.1.26