36 |
[I_CC] |
[I_CC] |
37 |
*/ |
*/ |
38 |
|
|
39 |
Paso_SparseMatrix* Paso_SparseMatrix_getProlongation(Paso_SparseMatrix* W, index_t* mis_marker){ |
Paso_SparseMatrix* Paso_SparseMatrix_getProlongation(Paso_SparseMatrix* W, index_t* marker_F){ |
40 |
|
|
41 |
Paso_Pattern *outpattern=NULL; |
Paso_Pattern *outpattern=NULL; |
42 |
Paso_SparseMatrix *out=NULL; |
Paso_SparseMatrix *out=NULL; |
43 |
index_t iptr,wptr; |
index_t iptr,wptr; |
44 |
|
|
45 |
Paso_IndexList* index_list=NULL; |
const dim_t n=W->numRows+W->numCols; |
|
dim_t n=W->numRows+W->numCols; |
|
46 |
dim_t i,j,k=0; |
dim_t i,j,k=0; |
47 |
dim_t block_size=W->row_block_size; |
dim_t block_size=W->row_block_size; |
48 |
|
Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(n); |
49 |
index_list=TMPMEMALLOC(n,Paso_IndexList); |
|
|
if (! Esys_checkPtr(index_list)) { |
|
|
#pragma omp parallel for private(i) schedule(static) |
|
|
for(i=0;i<n;++i) { |
|
|
index_list[i].extension=NULL; |
|
|
index_list[i].n=0; |
|
|
} |
|
|
} |
|
|
|
|
50 |
|
|
51 |
for (i=0;i<n;++i) { |
for (i=0;i<n;++i) { |
52 |
if (mis_marker[i]) { |
if (marker_F[i]) { |
53 |
for (iptr=W->pattern->ptr[k];iptr<W->pattern->ptr[k+1]; ++iptr) { |
for (iptr=W->pattern->ptr[k];iptr<W->pattern->ptr[k+1]; ++iptr) { |
54 |
j=W->pattern->index[iptr]; |
j=W->pattern->index[iptr]; |
55 |
Paso_IndexList_insertIndex(&(index_list[i]),j); |
Paso_IndexListArray_insertIndex(index_list,i,j); |
56 |
} |
} |
57 |
k++; |
k++; |
58 |
} |
} |
59 |
else { |
else { |
60 |
Paso_IndexList_insertIndex(&(index_list[i]),i-k); |
Paso_IndexListArray_insertIndex(index_list,i,i-k); |
61 |
} |
} |
62 |
} |
} |
63 |
|
|
64 |
outpattern=Paso_IndexList_createPattern(0, n,index_list,0,W->numCols,0); |
outpattern=Paso_Pattern_fromIndexListArray(0,index_list,0,W->numCols,0); |
65 |
out=Paso_SparseMatrix_alloc(W->type,outpattern,block_size,block_size,FALSE); |
out=Paso_SparseMatrix_alloc(W->type,outpattern,block_size,block_size,FALSE); |
66 |
|
|
67 |
k=0; |
k=0; |
68 |
|
|
69 |
if (block_size==1) { |
if (block_size==1) { |
70 |
for (i=0;i<n;++i) { |
for (i=0;i<n;++i) { |
71 |
if (mis_marker[i]) { |
if (marker_F[i]) { |
72 |
wptr=W->pattern->ptr[k]; |
wptr=W->pattern->ptr[k]; |
73 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
74 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
83 |
} |
} |
84 |
} else if (block_size==2) { |
} else if (block_size==2) { |
85 |
for (i=0;i<n;++i) { |
for (i=0;i<n;++i) { |
86 |
if (mis_marker[i]) { |
if (marker_F[i]) { |
87 |
wptr=W->pattern->ptr[k]; |
wptr=W->pattern->ptr[k]; |
88 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
89 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
104 |
} |
} |
105 |
} else if (block_size==3) { |
} else if (block_size==3) { |
106 |
for (i=0;i<n;++i) { |
for (i=0;i<n;++i) { |
107 |
if (mis_marker[i]) { |
if (marker_F[i]) { |
108 |
wptr=W->pattern->ptr[k]; |
wptr=W->pattern->ptr[k]; |
109 |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
110 |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
out->val[iptr*block_size*block_size]=W->val[wptr*block_size*block_size]; |
135 |
} |
} |
136 |
} |
} |
137 |
|
|
138 |
/* clean up */ |
/* clean up */ |
139 |
if (index_list!=NULL) { |
Paso_IndexListArray_free(index_list); |
|
#pragma omp parallel for private(i) schedule(static) |
|
|
for(i=0;i<n;++i) Paso_IndexList_free(index_list[i].extension); |
|
|
} |
|
|
TMPMEMFREE(index_list); |
|
140 |
Paso_Pattern_free(outpattern); |
Paso_Pattern_free(outpattern); |
141 |
return out; |
return out; |
142 |
} |
} |
|
|
|
|
|
|
143 |
/* Restriction matrix R=P^T */ |
/* Restriction matrix R=P^T */ |
144 |
|
|
|
Paso_SparseMatrix* Paso_SparseMatrix_getRestriction(Paso_SparseMatrix* P){ |
|
|
|
|
|
Paso_Pattern *outpattern=NULL; |
|
|
Paso_SparseMatrix *out=NULL; |
|
|
|
|
|
Paso_IndexList* index_list=NULL; |
|
|
dim_t C=P->numCols; |
|
|
dim_t F=P->numRows-C; |
|
|
dim_t n=C+F; |
|
|
dim_t block_size=P->row_block_size; |
|
|
dim_t i,j,k=0; |
|
|
index_t iptr,jptr; |
|
|
|
|
|
index_list=TMPMEMALLOC(C,Paso_IndexList); |
|
|
if (! Esys_checkPtr(index_list)) { |
|
|
#pragma omp parallel for private(i) schedule(static) |
|
|
for(i=0;i<C;++i) { |
|
|
index_list[i].extension=NULL; |
|
|
index_list[i].n=0; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for (i=0;i<n;++i) { |
|
|
for (iptr=P->pattern->ptr[i];iptr<P->pattern->ptr[i+1]; ++iptr) { |
|
|
j=P->pattern->index[iptr]; |
|
|
Paso_IndexList_insertIndex(&(index_list[j]),i); |
|
|
} |
|
|
} |
|
|
|
|
|
outpattern=Paso_IndexList_createPattern(0, C,index_list,0,C+F,0); |
|
|
out=Paso_SparseMatrix_alloc(P->type,outpattern,block_size,block_size,FALSE); |
|
|
|
|
|
|
|
|
if (block_size==1) { |
|
|
for (i=0;i<out->numRows;++i) { |
|
|
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
|
|
j=out->pattern->index[iptr]; |
|
|
/*This can be replaced by bsearch!!*/ |
|
|
for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) { |
|
|
k=P->pattern->index[jptr]; |
|
|
if(k==i) { |
|
|
out->val[iptr]=P->val[jptr]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} else if (block_size==2) { |
|
|
for (i=0;i<out->numRows;++i) { |
|
|
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
|
|
j=out->pattern->index[iptr]; |
|
|
/*This can be replaced by bsearch!!*/ |
|
|
for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) { |
|
|
k=P->pattern->index[jptr]; |
|
|
if(k==i) { |
|
|
out->val[iptr*block_size*block_size]=P->val[jptr*block_size*block_size]; |
|
|
out->val[iptr*block_size*block_size+1]=P->val[jptr*block_size*block_size+2]; |
|
|
out->val[iptr*block_size*block_size+2]=P->val[jptr*block_size*block_size+1]; |
|
|
out->val[iptr*block_size*block_size+3]=P->val[jptr*block_size*block_size+3]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} else if (block_size==3) { |
|
|
for (i=0;i<out->numRows;++i) { |
|
|
for (iptr=out->pattern->ptr[i];iptr<out->pattern->ptr[i+1]; ++iptr) { |
|
|
j=out->pattern->index[iptr]; |
|
|
/*This can be replaced by bsearch!!*/ |
|
|
for (jptr=P->pattern->ptr[j];jptr<P->pattern->ptr[j+1]; ++jptr) { |
|
|
k=P->pattern->index[jptr]; |
|
|
if(k==i) { |
|
|
out->val[iptr*block_size*block_size]=P->val[jptr*block_size*block_size]; |
|
|
out->val[iptr*block_size*block_size+1]=P->val[jptr*block_size*block_size+3]; |
|
|
out->val[iptr*block_size*block_size+2]=P->val[jptr*block_size*block_size+6]; |
|
|
out->val[iptr*block_size*block_size+3]=P->val[jptr*block_size*block_size+1]; |
|
|
out->val[iptr*block_size*block_size+4]=P->val[jptr*block_size*block_size+4]; |
|
|
out->val[iptr*block_size*block_size+5]=P->val[jptr*block_size*block_size+7]; |
|
|
out->val[iptr*block_size*block_size+6]=P->val[jptr*block_size*block_size+2]; |
|
|
out->val[iptr*block_size*block_size+7]=P->val[jptr*block_size*block_size+5]; |
|
|
out->val[iptr*block_size*block_size+8]=P->val[jptr*block_size*block_size+8]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
/* clean up */ |
|
|
if (index_list!=NULL) { |
|
|
#pragma omp parallel for private(i) schedule(static) |
|
|
for(i=0;i<C;++i) Paso_IndexList_free(index_list[i].extension); |
|
|
} |
|
|
TMPMEMFREE(index_list); |
|
|
Paso_Pattern_free(outpattern); |
|
|
return out; |
|
|
} |
|
145 |
|
|
146 |
|
|
147 |
void Paso_SparseMatrix_updateWeights(Paso_SparseMatrix* A,Paso_SparseMatrix* W_FC, index_t* mis_marker){ |
void Paso_SparseMatrix_updateWeights(Paso_SparseMatrix* A,Paso_SparseMatrix* W_FC, index_t* marker_F){ |
148 |
|
|
149 |
double *alpha; |
double *alpha; |
150 |
double *beta; |
double *beta; |
171 |
if (block_size==1) { |
if (block_size==1) { |
172 |
k=0; |
k=0; |
173 |
for (i = 0; i < n; ++i) { |
for (i = 0; i < n; ++i) { |
174 |
if(mis_marker[i]) { |
if(marker_F[i]) { |
175 |
alpha[k]=0; |
alpha[k]=0; |
176 |
beta[k]=0; |
beta[k]=0; |
177 |
sum_all_neg=0; |
sum_all_neg=0; |
184 |
if(j!=i) { |
if(j!=i) { |
185 |
if(A->val[iPtr]<0) { |
if(A->val[iPtr]<0) { |
186 |
sum_all_neg+=A->val[iPtr]; |
sum_all_neg+=A->val[iPtr]; |
187 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
188 |
sum_strong_neg+=A->val[iPtr]; |
sum_strong_neg+=A->val[iPtr]; |
189 |
} |
} |
190 |
} |
} |
191 |
else if(A->val[iPtr]>0) { |
else if(A->val[iPtr]>0) { |
192 |
sum_all_pos+=A->val[iPtr]; |
sum_all_pos+=A->val[iPtr]; |
193 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
194 |
sum_strong_pos+=A->val[iPtr]; |
sum_strong_pos+=A->val[iPtr]; |
195 |
} |
} |
196 |
} |
} |
220 |
} else if (block_size==2) { |
} else if (block_size==2) { |
221 |
k=0; |
k=0; |
222 |
for (i = 0; i < n; ++i) { |
for (i = 0; i < n; ++i) { |
223 |
if(mis_marker[i]) { |
if(marker_F[i]) { |
224 |
alpha[k*block_size]=0; |
alpha[k*block_size]=0; |
225 |
alpha[k*block_size+1]=0; |
alpha[k*block_size+1]=0; |
226 |
beta[k*block_size]=0; |
beta[k*block_size]=0; |
239 |
if(j!=i) { |
if(j!=i) { |
240 |
if(A->val[iPtr*block_size*block_size]<0) { |
if(A->val[iPtr*block_size*block_size]<0) { |
241 |
sum_all_neg1+=A->val[iPtr*block_size*block_size]; |
sum_all_neg1+=A->val[iPtr*block_size*block_size]; |
242 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
243 |
sum_strong_neg1+=A->val[iPtr*block_size*block_size]; |
sum_strong_neg1+=A->val[iPtr*block_size*block_size]; |
244 |
} |
} |
245 |
} |
} |
246 |
else if(A->val[iPtr*block_size*block_size]>0) { |
else if(A->val[iPtr*block_size*block_size]>0) { |
247 |
sum_all_pos1+=A->val[iPtr*block_size*block_size]; |
sum_all_pos1+=A->val[iPtr*block_size*block_size]; |
248 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
249 |
sum_strong_pos1+=A->val[iPtr*block_size*block_size]; |
sum_strong_pos1+=A->val[iPtr*block_size*block_size]; |
250 |
} |
} |
251 |
} |
} |
252 |
if(A->val[iPtr*block_size*block_size+3]<0) { |
if(A->val[iPtr*block_size*block_size+3]<0) { |
253 |
sum_all_neg2+=A->val[iPtr*block_size*block_size+3]; |
sum_all_neg2+=A->val[iPtr*block_size*block_size+3]; |
254 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
255 |
sum_strong_neg2+=A->val[iPtr*block_size*block_size+3]; |
sum_strong_neg2+=A->val[iPtr*block_size*block_size+3]; |
256 |
} |
} |
257 |
} else if(A->val[iPtr*block_size*block_size+3]>0) { |
} else if(A->val[iPtr*block_size*block_size+3]>0) { |
258 |
sum_all_pos2+=A->val[iPtr*block_size*block_size+3]; |
sum_all_pos2+=A->val[iPtr*block_size*block_size+3]; |
259 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
260 |
sum_strong_pos2+=A->val[iPtr*block_size*block_size+3]; |
sum_strong_pos2+=A->val[iPtr*block_size*block_size+3]; |
261 |
} |
} |
262 |
} |
} |
302 |
} else if (block_size==3) { |
} else if (block_size==3) { |
303 |
k=0; |
k=0; |
304 |
for (i = 0; i < n; ++i) { |
for (i = 0; i < n; ++i) { |
305 |
if(mis_marker[i]) { |
if(marker_F[i]) { |
306 |
alpha[k*block_size]=0; |
alpha[k*block_size]=0; |
307 |
alpha[k*block_size+1]=0; |
alpha[k*block_size+1]=0; |
308 |
alpha[k*block_size+2]=0; |
alpha[k*block_size+2]=0; |
327 |
if(j!=i) { |
if(j!=i) { |
328 |
if(A->val[iPtr*block_size*block_size]<0) { |
if(A->val[iPtr*block_size*block_size]<0) { |
329 |
sum_all_neg1+=A->val[iPtr*block_size*block_size]; |
sum_all_neg1+=A->val[iPtr*block_size*block_size]; |
330 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
331 |
sum_strong_neg1+=A->val[iPtr*block_size*block_size]; |
sum_strong_neg1+=A->val[iPtr*block_size*block_size]; |
332 |
} |
} |
333 |
} |
} |
334 |
else if(A->val[iPtr*block_size*block_size]>0) { |
else if(A->val[iPtr*block_size*block_size]>0) { |
335 |
sum_all_pos1+=A->val[iPtr*block_size*block_size]; |
sum_all_pos1+=A->val[iPtr*block_size*block_size]; |
336 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
337 |
sum_strong_pos1+=A->val[iPtr*block_size*block_size]; |
sum_strong_pos1+=A->val[iPtr*block_size*block_size]; |
338 |
} |
} |
339 |
} |
} |
340 |
if(A->val[iPtr*block_size*block_size+4]<0) { |
if(A->val[iPtr*block_size*block_size+4]<0) { |
341 |
sum_all_neg2+=A->val[iPtr*block_size*block_size+4]; |
sum_all_neg2+=A->val[iPtr*block_size*block_size+4]; |
342 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
343 |
sum_strong_neg2+=A->val[iPtr*block_size*block_size+4]; |
sum_strong_neg2+=A->val[iPtr*block_size*block_size+4]; |
344 |
} |
} |
345 |
} else if(A->val[iPtr*block_size*block_size+4]>0) { |
} else if(A->val[iPtr*block_size*block_size+4]>0) { |
346 |
sum_all_pos2+=A->val[iPtr*block_size*block_size+4]; |
sum_all_pos2+=A->val[iPtr*block_size*block_size+4]; |
347 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
348 |
sum_strong_pos2+=A->val[iPtr*block_size*block_size+4]; |
sum_strong_pos2+=A->val[iPtr*block_size*block_size+4]; |
349 |
} |
} |
350 |
} |
} |
351 |
if(A->val[iPtr*block_size*block_size+8]<0) { |
if(A->val[iPtr*block_size*block_size+8]<0) { |
352 |
sum_all_neg3+=A->val[iPtr*block_size*block_size+8]; |
sum_all_neg3+=A->val[iPtr*block_size*block_size+8]; |
353 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
354 |
sum_strong_neg3+=A->val[iPtr*block_size*block_size+8]; |
sum_strong_neg3+=A->val[iPtr*block_size*block_size+8]; |
355 |
} |
} |
356 |
} else if(A->val[iPtr*block_size*block_size+8]>0) { |
} else if(A->val[iPtr*block_size*block_size+8]>0) { |
357 |
sum_all_pos3+=A->val[iPtr*block_size*block_size+8]; |
sum_all_pos3+=A->val[iPtr*block_size*block_size+8]; |
358 |
if(!mis_marker[j]) { |
if(!marker_F[j]) { |
359 |
sum_strong_pos3+=A->val[iPtr*block_size*block_size+8]; |
sum_strong_pos3+=A->val[iPtr*block_size*block_size+8]; |
360 |
} |
} |
361 |
} |
} |