/[escript]/trunk/paso/src/SystemMatrix_extendedRows.cpp
ViewVC logotype

Contents of /trunk/paso/src/SystemMatrix_extendedRows.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4869 - (show annotations)
Mon Apr 14 10:39:22 2014 UTC (5 years, 5 months ago) by caltinay
File size: 9320 byte(s)
all of paso now lives in its own namespace.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19 /* Paso: SystemMatrix */
20 /* */
21 /* Extend the ST sets of rows in row_coupleBlock */
22 /* Input: SystemMatrix A and ST info */
23 /* Output: */
24 /* degree_ST: */
25 /* offset_ST: */
26 /* ST: */
27 /****************************************************************************/
28
29 /* Copyrights by ACcESS Australia 2003 */
30 /* Author: Lin Gao, l.gao@uq.edu.au */
31
32 /****************************************************************************/
33
34 #include "Paso.h"
35 #include "SystemMatrix.h"
36
37 #include <cstring> // memcpy
38
39 namespace paso {
40
41 void SystemMatrix::extendedRowsForST(dim_t* degree_ST, index_t* offset_ST,
42 index_t* ST)
43 {
44 if (mpi_info->size == 1) return;
45
46 // sending/receiving unknown's global ID
47 index_t num_main_cols = mainBlock->numCols;
48 double* cols = new double[num_main_cols];
49 const index_t rank = mpi_info->rank;
50 const index_t offset = col_distribution->first_component[rank];
51 index_t i, j, k, p, z, z0, z1, size, len;
52
53 #pragma omp parallel for private(i) schedule(static)
54 for (i=0; i<num_main_cols; ++i)
55 cols[i] = offset + i;
56
57 Coupler_ptr coupler;
58 if (global_id == NULL) {
59 coupler.reset(new Coupler(col_coupler->connector, 1));
60 coupler->startCollect(cols);
61 }
62
63 const index_t my_n = mainBlock->numRows;
64 double* rows = new double[my_n];
65 #pragma omp parallel for private(i) schedule(static)
66 for (i=0; i<my_n; i++)
67 rows[i] = degree_ST[i];
68
69 index_t num_couple_cols = col_coupleBlock->numCols;
70 size = num_main_cols + num_couple_cols;
71 index_t overlapped_n = row_coupleBlock->numRows;
72 index_t* recv_offset_ST = new index_t[overlapped_n+1];
73 dim_t * recv_degree_ST = new dim_t[overlapped_n];
74 index_t* send_ST = new index_t[offset_ST[my_n]];
75 len = row_coupler->connector->send->offsetInShared[row_coupler->connector->send->numNeighbors] * size;
76 index_t* send_buf = new index_t[len];
77
78 // waiting for receiving unknown's global ID
79 if (global_id == NULL) {
80 coupler->finishCollect();
81 global_id = new index_t[num_couple_cols];
82 #pragma omp parallel for private(i) schedule(static)
83 for (i=0; i<num_couple_cols; ++i)
84 global_id[i] = coupler->recv_buffer[i];
85 }
86
87 // sending/receiving the degree_ST
88 coupler.reset(new Coupler(row_coupler->connector, 1));
89 coupler->startCollect(rows);
90
91 // prepare ST with global ID
92 index_t* B = new index_t[size*2];
93 // find the point z in array of global_id that
94 // forall i < z, global_id[i] < offset; and
95 // forall i >= z, global_id[i] > offset + my_n
96 for (i=0; i<num_couple_cols; i++)
97 if (global_id[i] > offset) break;
98 z = i;
99 for (i=0; i<num_main_cols; i++) {
100 p = offset_ST[i+1];
101 z0 = 0;
102 z1 = offset_ST[i];
103 bool flag = (degree_ST[i] > 0 && (ST[p-1] < my_n || z == 0));
104 for (j=offset_ST[i]; j<p; j++) {
105 k = ST[j];
106 if (!flag && k < my_n) {
107 send_buf[z0] = k + offset;
108 z0++;
109 } else if (!flag && k - my_n >= z) {
110 if (z0 >0)
111 memcpy(&(send_ST[z1]), &(send_buf[0]), z0*sizeof(index_t));
112 z1 += z0;
113 flag = true;
114 send_ST[z1] = global_id[k - my_n];
115 z1++;
116 } else if (!flag && j == p-1) {
117 send_ST[z1] = global_id[k - my_n];
118 z1++;
119 if (z0 >0)
120 memcpy(&(send_ST[z1]), &(send_buf[0]), z0*sizeof(index_t));
121 flag = true;
122 z1 += z0;
123 } else if (k < my_n) {
124 send_ST[z1] = k + offset;
125 z1++;
126 } else {
127 send_ST[z1] = global_id[k - my_n];
128 z1++;
129 }
130 }
131 }
132
133 // waiting to receive the degree_ST
134 coupler->finishCollect();
135 delete[] rows;
136
137 // preparing degree_ST and offset_ST for the to-be-received extended rows
138 #pragma omp parallel for private(i) schedule(static)
139 for (i=0; i<overlapped_n; i++) recv_degree_ST[i] = coupler->recv_buffer[i];
140 recv_offset_ST[0] = 0;
141 for (i=0; i<overlapped_n; i++) {
142 recv_offset_ST[i+1] = recv_offset_ST[i] + coupler->recv_buffer[i];
143 }
144 index_t* recv_ST = new index_t[recv_offset_ST[overlapped_n]];
145 coupler.reset();
146
147 /* receiving ST for the extended rows */
148 z = 0;
149 for (p=0; p<row_coupler->connector->recv->numNeighbors; p++) {
150 const index_t j_min = row_coupler->connector->recv->offsetInShared[p];
151 const index_t j_max = row_coupler->connector->recv->offsetInShared[p+1];
152 j = recv_offset_ST[j_max] - recv_offset_ST[j_min];
153 #ifdef ESYS_MPI
154 MPI_Irecv(&recv_ST[z], j, MPI_INT,
155 row_coupler->connector->recv->neighbor[p],
156 mpi_info->msg_tag_counter+row_coupler->connector->recv->neighbor[p],
157 mpi_info->comm, &row_coupler->mpi_requests[p]);
158 #endif
159 z += j;
160 }
161
162 /* sending ST for the extended rows */
163 z0 = 0;
164 for (p=0; p<row_coupler->connector->send->numNeighbors; p++) {
165 const index_t j_min = row_coupler->connector->send->offsetInShared[p];
166 const index_t j_max = row_coupler->connector->send->offsetInShared[p+1];
167 z = z0;
168 for (j=j_min; j<j_max; j++) {
169 const index_t row=row_coupler->connector->send->shared[j];
170 if (degree_ST[row] > 0) {
171 memcpy(&(send_buf[z]), &(send_ST[offset_ST[row]]), degree_ST[row] * sizeof(index_t));
172 z += degree_ST[row];
173 }
174 }
175 #ifdef ESYS_MPI
176 MPI_Issend(&send_buf[z0], z-z0, MPI_INT,
177 row_coupler->connector->send->neighbor[p],
178 mpi_info->msg_tag_counter+mpi_info->rank,
179 mpi_info->comm,
180 &row_coupler->mpi_requests[p+row_coupler->connector->recv->numNeighbors]);
181 #endif
182 z0 = z;
183 }
184
185 // first merge "cols" and "global_id" into array "B"
186 i = 0;
187 j = 0;
188 z = 0;
189 while (i+j < size) {
190 if (i < num_main_cols && j < num_couple_cols) {
191 if (cols[i] < global_id[j]) {
192 B[2*z] = cols[i];
193 B[2*z+1] = i;
194 i++;
195 } else {
196 B[2*z] = global_id[j];
197 B[2*z+1] = j + num_main_cols;
198 j++;
199 }
200 z++;
201 } else if (i >= num_main_cols) {
202 for (; j<num_couple_cols; j++, z++) {
203 B[2*z] = global_id[j];
204 B[2*z+1] = j + num_main_cols;
205 }
206 } else {/* j >= num_couple_cols */
207 for (; i<num_main_cols; i++, z++) {
208 B[2*z] = cols[i];
209 B[2*z+1] = i;
210 }
211 }
212 }
213
214 // wait until everything is done
215 #ifdef ESYS_MPI
216 MPI_Waitall(row_coupler->connector->send->numNeighbors +
217 row_coupler->connector->recv->numNeighbors,
218 row_coupler->mpi_requests, row_coupler->mpi_stati);
219 #endif
220 ESYS_MPI_INC_COUNTER(*mpi_info, mpi_info->size)
221
222 // filter the received ST (for extended rows) with cols in mainBlock as
223 // well as cols in col_coupleBlock, their global ids are listed in "B"
224 len = offset_ST[my_n];
225 size = 2 * size;
226 for (i=0; i<overlapped_n; i++) {
227 p = recv_offset_ST[i+1];
228 z = 0;
229 for (j=recv_offset_ST[i]; j<p; j++) {
230 if (recv_ST[j] == B[z]) {
231 ST[len] = B[z+1];
232 len++;
233 z+=2;
234 if (z >= size) break;
235 } else if (recv_ST[j] > B[z]) {
236 while (recv_ST[j] > B[z] && z < size) z+=2;
237 if (z >= size) break;
238 if (recv_ST[j] == B[z]) {
239 ST[len] = B[z+1];
240 len++;
241 z+=2;
242 if (z >= size) break;
243 }
244 }
245 }
246 j = my_n + i;
247 degree_ST[j] = len - offset_ST[j];
248 offset_ST[j+1] = len;
249 qsort(&ST[offset_ST[j]], (size_t) degree_ST[j], sizeof(index_t), util::comparIndex);
250 }
251
252 // release memory
253 delete[] cols;
254 delete[] B;
255 delete[] send_buf;
256 delete[] send_ST;
257 delete[] recv_ST;
258 delete[] recv_offset_ST;
259 delete[] recv_degree_ST;
260 }
261
262 } // namespace paso
263

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/SystemMatrix_extendedRows.cpp:3531-3826 /branches/lapack2681/paso/src/SystemMatrix_extendedRows.cpp:2682-2741 /branches/pasowrap/paso/src/SystemMatrix_extendedRows.cpp:3661-3674 /branches/py3_attempt2/paso/src/SystemMatrix_extendedRows.cpp:3871-3891 /branches/restext/paso/src/SystemMatrix_extendedRows.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/SystemMatrix_extendedRows.cpp:3669-3791 /branches/stage3.0/paso/src/SystemMatrix_extendedRows.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/SystemMatrix_extendedRows.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/SystemMatrix_extendedRows.cpp:3517-3974 /release/3.0/paso/src/SystemMatrix_extendedRows.cpp:2591-2601 /trunk/paso/src/SystemMatrix_extendedRows.cpp:4257-4344 /trunk/ripley/test/python/paso/src/SystemMatrix_extendedRows.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26