/[escript]/trunk/paso/src/Pattern.cpp
ViewVC logotype

Contents of /trunk/paso/src/Pattern.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5148 - (show annotations)
Mon Sep 15 01:25:23 2014 UTC (4 years, 11 months ago) by caltinay
File size: 12834 byte(s)
Merging ripley diagonal storage + CUDA support into trunk.
Options file version has been incremented due to new options
'cuda' and 'nvccflags'.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: Pattern */
21
22 /****************************************************************************/
23
24 /* Author: Lutz Gross, l.gross@uq.edu.au */
25
26 /****************************************************************************/
27
28 #include "Pattern.h"
29 #include "PasoUtil.h"
30 #include <boost/scoped_array.hpp>
31
32 using esysUtils::IndexList;
33
34 namespace paso {
35
36 Pattern::Pattern(int ntype, dim_t numOut, dim_t numIn, index_t* inPtr,
37 index_t* idx) :
38 type(ntype),
39 numOutput(numOut),
40 numInput(numIn),
41 len(0),
42 ptr(inPtr),
43 index(idx),
44 main_iptr(NULL),
45 numColors(-1),
46 coloring(NULL)
47 {
48 const index_t index_offset = (ntype & MATRIX_FORMAT_OFFSET1 ? 1:0);
49 index_t min_index = index_offset, max_index = index_offset-1;
50 Esys_resetError();
51
52 if (inPtr!=NULL && idx != NULL) {
53 #pragma omp parallel
54 {
55 index_t loc_min_index=index_offset;
56 index_t loc_max_index=index_offset-1;
57 if (ntype & MATRIX_FORMAT_OFFSET1) {
58 #pragma omp for schedule(static)
59 for (dim_t i=0; i < numOut; ++i) {
60 if (inPtr[i] < inPtr[i+1]) {
61 qsort(&(idx[inPtr[i]-1]),(size_t)(inPtr[i+1]-inPtr[i]),sizeof(index_t), util::comparIndex);
62 loc_min_index = std::min(loc_min_index, idx[inPtr[i]-1]);
63 loc_max_index = std::max(loc_max_index, idx[inPtr[i+1]-2]);
64 }
65 }
66 } else {
67 #pragma omp for schedule(static)
68 for (dim_t i=0; i < numOut; ++i) {
69 if (inPtr[i] < inPtr[i+1]) {
70 qsort(&(idx[inPtr[i]]),(size_t)(inPtr[i+1]-inPtr[i]),sizeof(index_t), util::comparIndex);
71 loc_min_index = std::min(loc_min_index, idx[inPtr[i]]);
72 loc_max_index = std::max(loc_max_index, idx[inPtr[i+1]-1]);
73 }
74 }
75 }
76 #pragma omp critical
77 {
78 min_index=std::min(loc_min_index, min_index);
79 max_index=std::max(loc_max_index, max_index);
80 }
81 } // parallel section
82
83 if (min_index < index_offset || max_index >= numIn+index_offset) {
84 Esys_setError(TYPE_ERROR, "Pattern: Pattern index out of range.");
85 }
86 len = ptr[numOutput] - index_offset;
87 }
88 }
89
90 /* deallocates a Pattern */
91 Pattern::~Pattern()
92 {
93 delete[] ptr;
94 delete[] index;
95 delete[] main_iptr;
96 delete[] coloring;
97 }
98
99 /* creates a pattern from a range of indices */
100 Pattern_ptr Pattern::fromIndexListArray(dim_t n0, dim_t n,
101 const IndexList* index_list_array,
102 index_t range_min, index_t range_max,
103 index_t index_offset)
104 {
105 dim_t* ptr = new index_t[n+1-n0];
106
107 // get the number of connections per row
108 #pragma omp parallel for schedule(static)
109 for (dim_t i=n0; i < n; ++i) {
110 ptr[i-n0]=index_list_array[i].count(range_min, range_max);
111 }
112 // accumulate ptr
113 dim_t s=0;
114 for (dim_t i=n0; i < n; ++i) {
115 const dim_t itmp=ptr[i-n0];
116 ptr[i-n0]=s;
117 s+=itmp;
118 }
119 ptr[n-n0]=s;
120
121 // fill index
122 index_t* index = new index_t[ptr[n-n0]];
123 #pragma omp parallel for schedule(static)
124 for (dim_t i=n0; i < n; ++i) {
125 index_list_array[i].toArray(&index[ptr[i-n0]], range_min, range_max,
126 index_offset);
127 }
128 Pattern_ptr out(new Pattern(MATRIX_FORMAT_DEFAULT, n-n0,
129 range_max+index_offset, ptr, index));
130
131 if (!Esys_noError()) {
132 delete[] ptr;
133 delete[] index;
134 out.reset();
135 }
136 return out;
137 }
138
139 index_t* Pattern::borrowMainDiagonalPointer()
140 {
141 if (main_iptr == NULL) {
142 const dim_t n = numOutput;
143 main_iptr=new index_t[n];
144 bool fail = false;
145 // identify the main diagonals
146 #pragma omp parallel for schedule(static)
147 for (index_t i = 0; i < n; ++i) {
148 index_t* idx = &index[ptr[i]];
149 index_t* where_p=reinterpret_cast<index_t*>(bsearch(&i, idx,
150 (size_t)(ptr[i+1] - ptr[i]),
151 sizeof(index_t), util::comparIndex));
152 if (where_p == NULL) {
153 fail = true;
154 } else {
155 main_iptr[i] = ptr[i]+(index_t)(where_p-idx);
156 }
157 }
158 if (fail) {
159 delete[] main_iptr;
160 main_iptr = NULL;
161 }
162 }
163 return main_iptr;
164 }
165
166 index_t* Pattern::borrowColoringPointer()
167 {
168 // is coloring available?
169 if (coloring == NULL) {
170 coloring = new index_t[numInput];
171 index_t out = 0;
172 const dim_t n = numOutput;
173 index_t* mis_marker = new index_t[n];
174 // get coloring
175 #pragma omp parallel for schedule(static)
176 for (index_t i = 0; i < n; i++) {
177 coloring[i] = -1;
178 mis_marker[i] = -1;
179 }
180
181 while (util::isAny(n, coloring, -1) && Esys_noError()) {
182 mis(mis_marker);
183
184 #pragma omp parallel for schedule(static)
185 for (index_t i = 0; i < n; ++i) {
186 if (mis_marker[i])
187 coloring[i] = out;
188 mis_marker[i] = coloring[i];
189 }
190 ++out;
191 }
192 delete[] mis_marker;
193 numColors = out;
194 }
195 return coloring;
196 }
197
198 // creates a subpattern
199 Pattern_ptr Pattern::getSubpattern(int newNumRows, int newNumCols,
200 const index_t* row_list,
201 const index_t* new_col_index) const
202 {
203 const index_t index_offset=(type & MATRIX_FORMAT_OFFSET1 ? 1:0);
204 Esys_resetError();
205
206 index_t* newPtr = new index_t[newNumRows+1];
207 #pragma omp parallel
208 {
209 #pragma omp for schedule(static)
210 for (dim_t i=0; i < newNumRows+1; ++i) newPtr[i]=0;
211
212 // find the number of column entries in each row
213 #pragma omp for schedule(static)
214 for (dim_t i=0; i < newNumRows; ++i) {
215 index_t j=0;
216 const index_t subpattern_row=row_list[i];
217 #pragma ivdep
218 for (index_t k=ptr[subpattern_row]-index_offset;
219 k < ptr[subpattern_row+1]-index_offset; ++k) {
220 if (new_col_index[index[k]-index_offset] > -1)
221 j++;
222 }
223 newPtr[i]=j;
224 }
225 } // parallel section
226
227 // accumulate ptr
228 newPtr[newNumRows]=util::cumsum(newNumRows, newPtr);
229 index_t* newIndex = new index_t[newPtr[newNumRows]];
230
231 // find the number of column entries in each row
232 #pragma omp parallel for schedule(static)
233 for (dim_t i=0; i < newNumRows; ++i) {
234 index_t j=newPtr[i];
235 const index_t subpattern_row=row_list[i];
236 #pragma ivdep
237 for (index_t k=ptr[subpattern_row]-index_offset; k < ptr[subpattern_row+1]-index_offset; ++k) {
238 const index_t tmp=new_col_index[index[k]-index_offset];
239 if (tmp > -1) {
240 newIndex[j]=tmp;
241 ++j;
242 }
243 }
244 }
245 // create return value
246 Pattern_ptr out(new Pattern(type, newNumRows, newNumCols, newPtr, newIndex));
247 if (!Esys_noError()) {
248 delete[] newIndex;
249 delete[] newPtr;
250 }
251 return out;
252 }
253
254 Pattern_ptr Pattern::unrollBlocks(int newType, dim_t output_block_size,
255 dim_t input_block_size)
256 {
257 Pattern_ptr out;
258 const index_t index_offset_in=(type & MATRIX_FORMAT_OFFSET1 ? 1:0);
259 const index_t index_offset_out=(newType & MATRIX_FORMAT_OFFSET1 ? 1:0);
260
261 Esys_resetError();
262 if (output_block_size == 1 && input_block_size == 1 &&
263 (type & MATRIX_FORMAT_OFFSET1) == (newType & MATRIX_FORMAT_OFFSET1)) {
264 out = shared_from_this();
265 } else {
266 const dim_t block_size = output_block_size*input_block_size;
267 const dim_t new_len = len * block_size;
268 const dim_t new_numOutput = numOutput * output_block_size;
269 const dim_t new_numInput = numInput * input_block_size;
270
271 index_t* newPtr = new index_t[new_numOutput+1];
272 index_t* newIndex = new index_t[new_len];
273 #pragma omp parallel
274 {
275 #pragma omp for schedule(static)
276 for (dim_t i=0; i < new_numOutput+1; ++i)
277 newPtr[i]=index_offset_out;
278
279 #pragma omp single
280 newPtr[new_numOutput]=new_len+index_offset_out;
281
282 #pragma omp for schedule(static)
283 for (dim_t i=0; i < numOutput; ++i) {
284 for (dim_t k=0; k < output_block_size; ++k) {
285 newPtr[i*output_block_size+k]=(ptr[i]-index_offset_in)*block_size+
286 (ptr[i+1]-ptr[i])*input_block_size*k+index_offset_out;
287 }
288 }
289
290 #pragma omp for schedule(static)
291 for (dim_t i=0; i < new_numOutput; ++i) {
292 #pragma ivdep
293 for (index_t iPtr=newPtr[i]-index_offset_out; iPtr<newPtr[i+1]-index_offset_out; ++iPtr) {
294 newIndex[iPtr]=index_offset_out;
295 }
296 }
297
298 #pragma omp for schedule(static)
299 for (dim_t i=0; i < numOutput; ++i) {
300 for (index_t iPtr=ptr[i]-index_offset_in; iPtr < ptr[i+1]-index_offset_in; ++iPtr) {
301 for (dim_t k=0; k < output_block_size; ++k) {
302 #pragma ivdep
303 for (dim_t j=0; j < input_block_size; ++j) {
304 newIndex[newPtr[i*output_block_size+k]-index_offset_out+(iPtr-(ptr[i]-index_offset_in))*input_block_size+j] =
305 (index[iPtr]-index_offset_in)*input_block_size+j+index_offset_out;
306 }
307 }
308 }
309 }
310 } // parallel section
311 out.reset(new Pattern(newType, new_numOutput, new_numInput, newPtr, newIndex));
312 if (!Esys_noError()) {
313 delete[] newIndex;
314 delete[] newPtr;
315 }
316 }
317 return out;
318 }
319
320 // computes the pattern coming from a matrix-matrix multiplication
321 Pattern_ptr Pattern::multiply(int type, const_Pattern_ptr B) const
322 {
323 boost::scoped_array<IndexList> index_list(new IndexList[numOutput]);
324
325 #pragma omp parallel for schedule(static)
326 for (dim_t i = 0; i < numOutput; i++) {
327 for (index_t iptrA = ptr[i]; iptrA < ptr[i+1]; ++iptrA) {
328 const dim_t j = index[iptrA];
329 for (index_t iptrB = B->ptr[j]; iptrB < B->ptr[j+1]; ++iptrB) {
330 const dim_t k = B->index[iptrB];
331 index_list[i].insertIndex(k);
332 }
333 }
334 }
335 return Pattern::fromIndexListArray(0, numOutput, index_list.get(), 0,
336 B->numInput, 0);
337 }
338
339 /*
340 * Computes the pattern of C = A binary operation B for CSR matrices A,B
341 * Note: we do not check whether A_ij(op)B_ij=0
342 */
343 Pattern_ptr Pattern::binop(int type, const_Pattern_ptr B) const
344 {
345 boost::scoped_array<IndexList> index_list(new IndexList[numOutput]);
346 const dim_t nRowsB = B->numOutput;
347
348 #pragma omp parallel for schedule(static)
349 for (dim_t i = 0; i < nRowsB; i++) {
350 index_t iptrA = ptr[i];
351 index_t iptrB = B->ptr[i];
352
353 while (iptrA < ptr[i+1] && iptrB < B->ptr[i+1]) {
354 const dim_t j = index[iptrA];
355 const dim_t k = B->index[iptrB];
356 if (j < k) {
357 index_list[i].insertIndex(j);
358 iptrA++;
359 } else if (j > k) {
360 index_list[i].insertIndex(k);
361 iptrB++;
362 } else { // (j == k)
363 index_list[i].insertIndex(j);
364 iptrB++;
365 iptrA++;
366 }
367 }
368 while(iptrA < ptr[i+1]) {
369 const dim_t j = index[iptrA];
370 index_list[i].insertIndex(j);
371 iptrA++;
372 }
373 while(iptrB < B->ptr[i+1]) {
374 const dim_t k = B->index[iptrB];
375 index_list[i].insertIndex(k);
376 iptrB++;
377 }
378 }
379
380 return Pattern::fromIndexListArray(0, numOutput, index_list.get(), 0,
381 numInput, 0);
382 }
383
384 } // namespace paso
385

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/Pattern.cpp:3531-3826 /branches/diaplayground/paso/src/Pattern.cpp:4940-5147 /branches/lapack2681/paso/src/Pattern.cpp:2682-2741 /branches/pasowrap/paso/src/Pattern.cpp:3661-3674 /branches/py3_attempt2/paso/src/Pattern.cpp:3871-3891 /branches/restext/paso/src/Pattern.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/Pattern.cpp:3669-3791 /branches/stage3.0/paso/src/Pattern.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/Pattern.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/Pattern.cpp:3517-3974 /release/3.0/paso/src/Pattern.cpp:2591-2601 /trunk/paso/src/Pattern.cpp:4257-4344 /trunk/ripley/test/python/paso/src/Pattern.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26