# Diff of /trunk/paso/src/AMG_Interpolation.cpp

revision 4814 by caltinay, Fri Mar 28 04:31:02 2014 UTC revision 4836 by caltinay, Mon Apr 7 05:51:55 2014 UTC
# Line 14  Line 14
14  *****************************************************************************/  *****************************************************************************/
15
16
17  /************************************************************************************/  /****************************************************************************/
18
19  /* Paso: defines AMG Interpolation                            */  /* Paso: defines AMG Interpolation                            */
20
21  /************************************************************************************/  /****************************************************************************/
22
23  /* Author: l.gao@uq.edu.au                                    */  /* Author: l.gao@uq.edu.au                                    */
24
25  /************************************************************************************/  /****************************************************************************/
26
27  #include "Paso.h"  #include "Paso.h"
28  #include "SparseMatrix.h"  #include "SparseMatrix.h"
29  #include "PasoUtil.h"  #include "PasoUtil.h"
30  #include "Preconditioner.h"  #include "Preconditioner.h"
31
32  /************************************************************************************  /****************************************************************************
33
34      Methods necessary for AMG preconditioner      Methods necessary for AMG preconditioner
35
# Line 38  Line 38
38
39      The coarsening operator A_C is defined by RAP where R=P^T.      The coarsening operator A_C is defined by RAP where R=P^T.
40
41  *************************************************************************************/  *****************************************************************************/
42
43  /* Extend system matrix B with extra two sparse matrices:  /* Extend system matrix B with extra two sparse matrices:
44      B_ext_main and B_ext_couple          B_ext_main and B_ext_couple
45     The combination of this two sparse matrices represents       The combination of this two sparse matrices represents
46     the portion of B that is stored on neighbour procs and     the portion of B that is stored on neighbour procs and
47     needed locally for triple matrix product (B^T) A B.     needed locally for triple matrix product (B^T) A B.
# Line 57  Line 57
57          A->col_coupler->connector->send->shared[rPtr...]          A->col_coupler->connector->send->shared[rPtr...]
58          rPtr=A->col_coupler->connector->send->OffsetInShared[k]          rPtr=A->col_coupler->connector->send->OffsetInShared[k]
59     on q. */     on q. */
60  void Paso_Preconditioner_AMG_extendB(Paso_SystemMatrix* A, Paso_SystemMatrix* B)  void Paso_Preconditioner_AMG_extendB(paso::SystemMatrix_ptr A,
61                                         paso::SystemMatrix_ptr B)
62  {  {
63      paso::Pattern *pattern_main=NULL, *pattern_couple=NULL;      paso::Pattern_ptr pattern_main, pattern_couple;
64    paso::Coupler *coupler=NULL;      paso::Coupler_ptr coupler;
65    Paso_SharedComponents *send=NULL, *recv=NULL;      paso::SharedComponents_ptr send, recv;
66    double *cols=NULL, *send_buf=NULL, *ptr_val=NULL, *send_m=NULL, *send_c=NULL;      double *cols=NULL, *send_buf=NULL, *ptr_val=NULL, *send_m=NULL, *send_c=NULL;
67    index_t *global_id=NULL, *cols_array=NULL, *ptr_ptr=NULL, *ptr_idx=NULL;      index_t *global_id=NULL, *cols_array=NULL, *ptr_ptr=NULL, *ptr_idx=NULL;
68    index_t *ptr_main=NULL, *ptr_couple=NULL, *idx_main=NULL, *idx_couple=NULL;      index_t *ptr_main=NULL, *ptr_couple=NULL, *idx_main=NULL, *idx_couple=NULL;
69    index_t *send_idx=NULL, *send_offset=NULL, *recv_buf=NULL, *recv_offset=NULL;      index_t *send_idx=NULL, *send_offset=NULL, *recv_buf=NULL, *recv_offset=NULL;
70    index_t *idx_m=NULL, *idx_c=NULL;      index_t *idx_m=NULL, *idx_c=NULL;
71    index_t i, j, k, m, p, q, j_ub, k_lb, k_ub, m_lb, m_ub, l_m, l_c, i0;      index_t i, j, k, m, p, q, j_ub, k_lb, k_ub, m_lb, m_ub, l_m, l_c, i0;
72    index_t offset, len, block_size, block_size_size, max_num_cols;      index_t offset, len, block_size, block_size_size, max_num_cols;
73    index_t num_main_cols, num_couple_cols, num_neighbors, row, neighbor;      index_t num_main_cols, num_couple_cols, num_neighbors, row, neighbor;
74    dim_t *recv_degree=NULL, *send_degree=NULL;      dim_t *recv_degree=NULL, *send_degree=NULL;
75    dim_t rank=A->mpi_info->rank, size=A->mpi_info->size;      dim_t rank=A->mpi_info->rank, size=A->mpi_info->size;
76
77    if (size == 1) return;      if (size == 1) return;
78
79    if (B->row_coupleBlock) {      B->row_coupleBlock.reset();
80        paso::SparseMatrix_free(B->row_coupleBlock);
81      B->row_coupleBlock = NULL;      if (B->remote_coupleBlock.get()) {
82    }          Esys_setError(VALUE_ERROR, "Paso_Preconditioner_AMG_extendB: the link to remote_coupleBlock has already been set.");
83            return;
84    if (B->row_coupleBlock || B->remote_coupleBlock) {      }
Esys_setError(VALUE_ERROR, "Paso_Preconditioner_AMG_extendB: the link to row_coupleBlock or to remote_coupleBlock has already been set.");
return;
}
85
86    /* sending/receiving unknown's global ID */      /* sending/receiving unknown's global ID */
87    num_main_cols = B->mainBlock->numCols;      num_main_cols = B->mainBlock->numCols;
88    cols = new double[num_main_cols];      cols = new double[num_main_cols];
89    offset = B->col_distribution->first_component[rank];      offset = B->col_distribution->first_component[rank];
90    #pragma omp parallel for private(i) schedule(static)      #pragma omp parallel for private(i) schedule(static)
91    for (i=0; i<num_main_cols; ++i) cols[i] = offset + i;      for (i=0; i<num_main_cols; ++i) cols[i] = offset + i;
92    if (B->global_id == NULL) {      if (B->global_id == NULL) {
93      coupler=paso::Coupler_alloc(B->col_coupler->connector, 1);          coupler.reset(new paso::Coupler(B->col_coupler->connector, 1));
94      paso::Coupler_startCollect(coupler, cols);          coupler->startCollect(cols);
95    }      }
96
97    recv_buf = new index_t[size];      recv_buf = new index_t[size];
98    recv_degree = new dim_t[size];      recv_degree = new dim_t[size];
99    recv_offset = new index_t[size+1];      recv_offset = new index_t[size+1];
100    #pragma omp parallel for private(i) schedule(static)    #pragma omp parallel for private(i) schedule(static)
101    for (i=0; i<size; i++){      for (i=0; i<size; i++){
102      recv_buf[i] = 0;          recv_buf[i] = 0;
103      recv_degree[i] = 1;          recv_degree[i] = 1;
104      recv_offset[i] = i;          recv_offset[i] = i;
105    }      }
106
107    block_size = B->block_size;    block_size = B->block_size;
108    block_size_size = block_size * sizeof(double);    block_size_size = block_size * sizeof(double);
# Line 126  void Paso_Preconditioner_AMG_extendB(Pas Line 124  void Paso_Preconditioner_AMG_extendB(Pas
124
125    /* waiting for receiving unknown's global ID */    /* waiting for receiving unknown's global ID */
126    if (B->global_id == NULL) {    if (B->global_id == NULL) {
127        paso::Coupler_finishCollect(coupler);        coupler->finishCollect();
128      global_id = new index_t[num_couple_cols];      global_id = new index_t[num_couple_cols];
129      #pragma omp parallel for private(i) schedule(static)      #pragma omp parallel for private(i) schedule(static)
130      for (i=0; i<num_couple_cols; ++i)      for (i=0; i<num_couple_cols; ++i)
131          global_id[i] = coupler->recv_buffer[i];          global_id[i] = coupler->recv_buffer[i];
132      B->global_id = global_id;      B->global_id = global_id;
paso::Coupler_free(coupler);
133    } else    } else
134      global_id = B->global_id;      global_id = B->global_id;
135
# Line 149  void Paso_Preconditioner_AMG_extendB(Pas Line 146  void Paso_Preconditioner_AMG_extendB(Pas
146      recv_offset[i] = len;      recv_offset[i] = len;
147      len += recv_buf[i];      len += recv_buf[i];
148      if (max_num_cols < recv_buf[i])      if (max_num_cols < recv_buf[i])
149      max_num_cols = recv_buf[i];          max_num_cols = recv_buf[i];
150    }    }
151    recv_offset[size] = len;    recv_offset[size] = len;
152    cols_array = new index_t[len];    cols_array = new index_t[len];
# Line 182  void Paso_Preconditioner_AMG_extendB(Pas Line 179  void Paso_Preconditioner_AMG_extendB(Pas
179      m_ub = B->col_distribution->first_component[neighbor + 1];      m_ub = B->col_distribution->first_component[neighbor + 1];
180      j_ub = send->offsetInShared[p + 1];      j_ub = send->offsetInShared[p + 1];
181      for (j=send->offsetInShared[p]; j<j_ub; j++) {      for (j=send->offsetInShared[p]; j<j_ub; j++) {
182      row = send->shared[j];          row = send->shared[j];
183      l_m = 0;          l_m = 0;
184      l_c = 0;          l_c = 0;
185      k_ub = B->col_coupleBlock->pattern->ptr[row + 1];          k_ub = B->col_coupleBlock->pattern->ptr[row + 1];
186      k_lb = B->col_coupleBlock->pattern->ptr[row];          k_lb = B->col_coupleBlock->pattern->ptr[row];
187
188      /* check part of col_coupleBlock for data to be passed @row */          /* check part of col_coupleBlock for data to be passed @row */
189      for (k=k_lb; k<k_ub; k++) {          for (k=k_lb; k<k_ub; k++) {
m = global_id[B->col_coupleBlock->pattern->index[k]];
if (m > offset) break;
if (m>= m_lb && m < m_ub) {
/* data to be passed to sparse matrix B_ext_main */
idx_m[l_m] = m - m_lb;
memcpy(&(send_m[l_m*block_size]), &(B->col_coupleBlock->val[block_size*k]), block_size_size);
l_m++;
} else {
/* data to be passed to sparse matrix B_ext_couple */
idx_c[l_c] = m;
memcpy(&(send_c[l_c*block_size]), &(B->col_coupleBlock->val[block_size*k]), block_size_size);
l_c++;
}
}
k_lb = k;

/* check mainBlock for data to be passed @row to sparse
matrix B_ext_couple */
k_ub = B->mainBlock->pattern->ptr[row + 1];
k = B->mainBlock->pattern->ptr[row];
memcpy(&(send_c[l_c*block_size]), &(B->mainBlock->val[block_size*k]), block_size_size * (k_ub-k));
for (; k<k_ub; k++) {
m = B->mainBlock->pattern->index[k] + offset;
idx_c[l_c] = m;
l_c++;
}

/* check the rest part of col_coupleBlock for data to
be passed @row to sparse matrix B_ext_couple */
k = k_lb;
k_ub = B->col_coupleBlock->pattern->ptr[row + 1];
for (k=k_lb; k<k_ub; k++) {
190            m = global_id[B->col_coupleBlock->pattern->index[k]];            m = global_id[B->col_coupleBlock->pattern->index[k]];
191        if (m>= m_lb && m < m_ub) {            if (m > offset) break;
192          /* data to be passed to sparse matrix B_ext_main */            if (m>= m_lb && m < m_ub) {
193          idx_m[l_m] = m - m_lb;              /* data to be passed to sparse matrix B_ext_main */
194          memcpy(&(send_m[l_m*block_size]), &(B->col_coupleBlock->val[block_size*k]), block_size_size);              idx_m[l_m] = m - m_lb;
195          l_m++;              memcpy(&(send_m[l_m*block_size]), &(B->col_coupleBlock->val[block_size*k]), block_size_size);
196        } else {              l_m++;
197          /* data to be passed to sparse matrix B_ext_couple */            } else {
198          idx_c[l_c] = m;              /* data to be passed to sparse matrix B_ext_couple */
199          memcpy(&(send_c[l_c*block_size]), &(B->col_coupleBlock->val[block_size*k]), block_size_size);              idx_c[l_c] = m;
200          l_c++;              memcpy(&(send_c[l_c*block_size]), &(B->col_coupleBlock->val[block_size*k]), block_size_size);
201        }              l_c++;
202      }            }
203            }
204            k_lb = k;
205
206      memcpy(&(send_buf[len*block_size]), send_m, block_size_size*l_m);          /* check mainBlock for data to be passed @row to sparse
207      memcpy(&(send_idx[len]), idx_m, l_m * sizeof(index_t));             matrix B_ext_couple */
208            k_ub = B->mainBlock->pattern->ptr[row + 1];
209            k = B->mainBlock->pattern->ptr[row];
210            memcpy(&(send_c[l_c*block_size]), &(B->mainBlock->val[block_size*k]), block_size_size * (k_ub-k));
211            for (; k<k_ub; k++) {
212              m = B->mainBlock->pattern->index[k] + offset;
213              idx_c[l_c] = m;
214              l_c++;
215            }
216
217            /* check the rest part of col_coupleBlock for data to
218               be passed @row to sparse matrix B_ext_couple */
219            k = k_lb;
220            k_ub = B->col_coupleBlock->pattern->ptr[row + 1];
221            for (k=k_lb; k<k_ub; k++) {
222              m = global_id[B->col_coupleBlock->pattern->index[k]];
223              if (m>= m_lb && m < m_ub) {
224                /* data to be passed to sparse matrix B_ext_main */
225                idx_m[l_m] = m - m_lb;
226                memcpy(&(send_m[l_m*block_size]), &(B->col_coupleBlock->val[block_size*k]), block_size_size);
227                l_m++;
228              } else {
229                /* data to be passed to sparse matrix B_ext_couple */
230                idx_c[l_c] = m;
231                memcpy(&(send_c[l_c*block_size]), &(B->col_coupleBlock->val[block_size*k]), block_size_size);
232                l_c++;
233              }
234            }
235
236            memcpy(&(send_buf[len*block_size]), send_m, block_size_size*l_m);
237            memcpy(&(send_idx[len]), idx_m, l_m * sizeof(index_t));
238          send_offset[2*i] = l_m;          send_offset[2*i] = l_m;
239      len += l_m;          len += l_m;
240      memcpy(&(send_buf[len*block_size]), send_c, block_size_size*l_c);          memcpy(&(send_buf[len*block_size]), send_c, block_size_size*l_c);
241      memcpy(&(send_idx[len]), idx_c, l_c * sizeof(index_t));          memcpy(&(send_idx[len]), idx_c, l_c * sizeof(index_t));
242      send_offset[2*i+1] = l_c;          send_offset[2*i+1] = l_c;
243      len += l_c;          len += l_c;
244      i++;          i++;
245      }      }
246
247      /* sending */      /* sending */
# Line 300  void Paso_Preconditioner_AMG_extendB(Pas Line 297  void Paso_Preconditioner_AMG_extendB(Pas
297      m = recv->offsetInShared[p+1];      m = recv->offsetInShared[p+1];
298      i = ptr_main[m] - ptr_main[k] + ptr_couple[m] - ptr_couple[k];      i = ptr_main[m] - ptr_main[k] + ptr_couple[m] - ptr_couple[k];
299      if (i > 0) {      if (i > 0) {
300      k_ub ++;          k_ub ++;
301      #ifdef ESYS_MPI          #ifdef ESYS_MPI
302          MPI_Irecv(&(ptr_idx[j]), i, MPI_INT, recv->neighbor[p],          MPI_Irecv(&(ptr_idx[j]), i, MPI_INT, recv->neighbor[p],
303                  A->mpi_info->msg_tag_counter+recv->neighbor[p],                  A->mpi_info->msg_tag_counter+recv->neighbor[p],
304                  A->mpi_info->comm,                  A->mpi_info->comm,
305                  &(A->col_coupler->mpi_requests[p]));                  &(A->col_coupler->mpi_requests[p]));
306      #endif          #endif
307      }      }
308      j += i;      j += i;
309    }    }
# Line 316  void Paso_Preconditioner_AMG_extendB(Pas Line 313  void Paso_Preconditioner_AMG_extendB(Pas
313    for (p=0; p<num_neighbors; p++) {    for (p=0; p<num_neighbors; p++) {
314      i = send_degree[p] - j;      i = send_degree[p] - j;
315      if (i > 0){      if (i > 0){
316      k_ub ++;          k_ub ++;
317      #ifdef ESYS_MPI          #ifdef ESYS_MPI
318          MPI_Issend(&(send_idx[j]), i, MPI_INT, send->neighbor[p],          MPI_Issend(&(send_idx[j]), i, MPI_INT, send->neighbor[p],
319                  A->mpi_info->msg_tag_counter+rank,                  A->mpi_info->msg_tag_counter+rank,
320                  A->mpi_info->comm,                  A->mpi_info->comm,
321                  &(A->col_coupler->mpi_requests[p+recv->numNeighbors]));                  &(A->col_coupler->mpi_requests[p+recv->numNeighbors]));
322      #endif          #endif
323      }      }
324      j = send_degree[p];      j = send_degree[p];
325    }    }
# Line 340  void Paso_Preconditioner_AMG_extendB(Pas Line 337  void Paso_Preconditioner_AMG_extendB(Pas
337      k = ptr_main[i+1];      k = ptr_main[i+1];
338      m = ptr_couple[i];      m = ptr_couple[i];
339      for (p=j; p<k; p++) {      for (p=j; p<k; p++) {
340      idx_main[p] = ptr_idx[m+p];          idx_main[p] = ptr_idx[m+p];
341      }      }
342      j = ptr_couple[i+1];      j = ptr_couple[i+1];
343      for (p=m; p<j; p++) {      for (p=m; p<j; p++) {
344      idx_couple[p] = ptr_idx[k+p];          idx_couple[p] = ptr_idx[k+p];
345      }      }
346    }    }
347    delete[] ptr_idx;    delete[] ptr_idx;
348
349    /* allocate pattern and sparsematrix for B_ext_main */    /* allocate pattern and sparsematrix for B_ext_main */
350    pattern_main = paso::Pattern_alloc(B->col_coupleBlock->pattern->type,    pattern_main.reset(new paso::Pattern(B->col_coupleBlock->pattern->type,
351                  len, num_main_cols, ptr_main, idx_main);                  len, num_main_cols, ptr_main, idx_main));
352    B->row_coupleBlock = paso::SparseMatrix_alloc(B->col_coupleBlock->type,    B->row_coupleBlock.reset(new paso::SparseMatrix(B->col_coupleBlock->type,
353          pattern_main, B->row_block_size, B->col_block_size,                  pattern_main, B->row_block_size, B->col_block_size,
354          FALSE);                  false));
paso::Pattern_free(pattern_main);
355
356    /* allocate pattern and sparsematrix for B_ext_couple */    /* allocate pattern and sparsematrix for B_ext_couple */
357    pattern_couple = paso::Pattern_alloc(B->col_coupleBlock->pattern->type,    pattern_couple.reset(new paso::Pattern(B->col_coupleBlock->pattern->type,
358                  len, B->col_distribution->first_component[size],                  len, B->col_distribution->first_component[size],
359          ptr_couple, idx_couple);                  ptr_couple, idx_couple));
360    B->remote_coupleBlock = paso::SparseMatrix_alloc(B->col_coupleBlock->type,    B->remote_coupleBlock.reset(new paso::SparseMatrix(B->col_coupleBlock->type,
361                  pattern_couple, B->row_block_size, B->col_block_size,                  pattern_couple, B->row_block_size, B->col_block_size,
362                  FALSE);                  false));
paso::Pattern_free(pattern_couple);
363
365    j=0;    j=0;
# Line 409  void Paso_Preconditioner_AMG_extendB(Pas Line 404  void Paso_Preconditioner_AMG_extendB(Pas
404      k = ptr_main[i+1];      k = ptr_main[i+1];
405      m = ptr_couple[i];      m = ptr_couple[i];
406      for (p=j; p<k; p++) {      for (p=j; p<k; p++) {
407      memcpy(&(B->row_coupleBlock->val[p*block_size]), &(ptr_val[(m+p)*block_size]), block_size_size);          memcpy(&(B->row_coupleBlock->val[p*block_size]), &(ptr_val[(m+p)*block_size]), block_size_size);
408      }      }
409      j = ptr_couple[i+1];      j = ptr_couple[i+1];
410      for (p=m; p<j; p++) {      for (p=m; p<j; p++) {
411      memcpy(&(B->remote_coupleBlock->val[p*block_size]), &(ptr_val[(k+p)*block_size]), block_size_size);          memcpy(&(B->remote_coupleBlock->val[p*block_size]), &(ptr_val[(k+p)*block_size]), block_size_size);
412      }      }
413    }    }
414    delete[] ptr_val;    delete[] ptr_val;
415
416
417    /* release all temp memory allocation */      /* release all temp memory allocation */
418    delete[] cols;      delete[] cols;
419    delete[] cols_array;      delete[] cols_array;
420    delete[] recv_offset;      delete[] recv_offset;
421    delete[] recv_degree;      delete[] recv_degree;
422    delete[] recv_buf;      delete[] recv_buf;
423    delete[] send_buf;      delete[] send_buf;
424    delete[] send_offset;      delete[] send_offset;
425    delete[] send_degree;      delete[] send_degree;
426    delete[] send_idx;      delete[] send_idx;
427  }  }
428
429  /* As defined, sparse matrix (let's called it T) defined by T(ptr, idx, val)  /* As defined, sparse matrix (let's called it T) defined by T(ptr, idx, val)
430     has the same number of rows as P->col_coupleBlock->numCols. Now, we need     has the same number of rows as P->col_coupleBlock->numCols. Now, we need
431     to copy block of data in T to neighbour processors, defined by     to copy block of data in T to neighbour processors, defined by
432      P->col_coupler->connector->recv->neighbor[k] where k is in          P->col_coupler->connector->recv->neighbor[k] where k is in
433      [0, P->col_coupler->connector->recv->numNeighbors).          [0, P->col_coupler->connector->recv->numNeighbors).
434     Rows to be copied to neighbor processor k is in the list defined by     Rows to be copied to neighbor processor k is in the list defined by
435      P->col_coupler->connector->recv->offsetInShared[k] ...          P->col_coupler->connector->recv->offsetInShared[k] ...
436      P->col_coupler->connector->recv->offsetInShared[k+1]  */          P->col_coupler->connector->recv->offsetInShared[k+1]  */
437  void Paso_Preconditioner_AMG_CopyRemoteData(Paso_SystemMatrix* P,  void Paso_Preconditioner_AMG_CopyRemoteData(paso::SystemMatrix_ptr P,
438      index_t **p_ptr, index_t **p_idx, double **p_val,          index_t **p_ptr, index_t **p_idx, double **p_val,
439      index_t *global_id, index_t block_size)          index_t *global_id, index_t block_size)
440  {  {
441    Paso_SharedComponents *send=NULL, *recv=NULL;      paso::SharedComponents_ptr send, recv;
442    index_t send_neighbors, recv_neighbors, send_rows, recv_rows;      index_t send_neighbors, recv_neighbors, send_rows, recv_rows;
443    index_t i, j, p, m, n, size;      index_t i, j, p, m, n, size;
444    index_t *send_degree=NULL, *recv_ptr=NULL, *recv_idx=NULL;      index_t *send_degree=NULL, *recv_ptr=NULL, *recv_idx=NULL;
445    index_t *ptr=*p_ptr, *idx=*p_idx;      index_t *ptr=*p_ptr, *idx=*p_idx;
446    double  *val=*p_val, *recv_val=NULL;      double  *val=*p_val, *recv_val=NULL;
447    #ifdef ESYS_MPI    #ifdef ESYS_MPI
448    index_t rank = P->mpi_info->rank;      index_t rank = P->mpi_info->rank;
449    #endif    #endif
450
451    size = P->mpi_info->size;      size = P->mpi_info->size;
452    send = P->col_coupler->connector->recv;      send = P->col_coupler->connector->recv;
453    recv = P->col_coupler->connector->send;      recv = P->col_coupler->connector->send;
454    send_neighbors = send->numNeighbors;      send_neighbors = send->numNeighbors;
455    recv_neighbors = recv->numNeighbors;      recv_neighbors = recv->numNeighbors;
456    send_rows = P->col_coupleBlock->numCols;      send_rows = P->col_coupleBlock->numCols;
457    recv_rows = recv->offsetInShared[recv_neighbors];      recv_rows = recv->offsetInShared[recv_neighbors];
458
459    send_degree = new index_t[send_rows];      send_degree = new index_t[send_rows];
460    recv_ptr = new index_t[recv_rows + 1];      recv_ptr = new index_t[recv_rows + 1];
461    #pragma omp for schedule(static) private(i)    #pragma omp for schedule(static) private(i)
462    for (i=0; i<send_rows; i++)      for (i=0; i<send_rows; i++)
463      send_degree[i] = ptr[i+1] - ptr[i];          send_degree[i] = ptr[i+1] - ptr[i];
464
465    /* First, send/receive the degree */    /* First, send/receive the degree */
466    for (p=0; p<recv_neighbors; p++) { /* Receiving */    for (p=0; p<recv_neighbors; p++) { /* Receiving */
# Line 473  void Paso_Preconditioner_AMG_CopyRemoteD Line 468  void Paso_Preconditioner_AMG_CopyRemoteD
468      n = recv->offsetInShared[p+1];      n = recv->offsetInShared[p+1];
469      #ifdef ESYS_MPI      #ifdef ESYS_MPI
470      MPI_Irecv(&(recv_ptr[m]), n-m, MPI_INT, recv->neighbor[p],      MPI_Irecv(&(recv_ptr[m]), n-m, MPI_INT, recv->neighbor[p],
471          P->mpi_info->msg_tag_counter + recv->neighbor[p],                  P->mpi_info->msg_tag_counter + recv->neighbor[p],
472          P->mpi_info->comm,                  P->mpi_info->comm,
473          &(P->col_coupler->mpi_requests[p]));                  &(P->col_coupler->mpi_requests[p]));
474      #endif      #endif
475    }    }
476    for (p=0; p<send_neighbors; p++) { /* Sending */    for (p=0; p<send_neighbors; p++) { /* Sending */
# Line 483  void Paso_Preconditioner_AMG_CopyRemoteD Line 478  void Paso_Preconditioner_AMG_CopyRemoteD
478      n = send->offsetInShared[p+1];      n = send->offsetInShared[p+1];
479      #ifdef ESYS_MPI      #ifdef ESYS_MPI
480      MPI_Issend(&(send_degree[m]), n-m, MPI_INT, send->neighbor[p],      MPI_Issend(&(send_degree[m]), n-m, MPI_INT, send->neighbor[p],
481          P->mpi_info->msg_tag_counter + rank,                  P->mpi_info->msg_tag_counter + rank,
482          P->mpi_info->comm,                  P->mpi_info->comm,
483          &(P->col_coupler->mpi_requests[p+recv_neighbors]));                  &(P->col_coupler->mpi_requests[p+recv_neighbors]));
484      #endif      #endif
485    }    }
486    #ifdef ESYS_MPI    #ifdef ESYS_MPI
487    MPI_Waitall(send_neighbors+recv_neighbors,    MPI_Waitall(send_neighbors+recv_neighbors,
488          P->col_coupler->mpi_requests,                  P->col_coupler->mpi_requests,
489          P->col_coupler->mpi_stati);                  P->col_coupler->mpi_stati);
490    #endif    #endif
491    ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);    ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);
492
# Line 510  void Paso_Preconditioner_AMG_CopyRemoteD Line 505  void Paso_Preconditioner_AMG_CopyRemoteD
505      if (i > 0) {      if (i > 0) {
506        #ifdef ESYS_MPI        #ifdef ESYS_MPI
507        MPI_Irecv(&(recv_idx[j]), i, MPI_INT, recv->neighbor[p],        MPI_Irecv(&(recv_idx[j]), i, MPI_INT, recv->neighbor[p],
508          P->mpi_info->msg_tag_counter + recv->neighbor[p],                  P->mpi_info->msg_tag_counter + recv->neighbor[p],
509          P->mpi_info->comm,                  P->mpi_info->comm,
510          &(P->col_coupler->mpi_requests[p]));                  &(P->col_coupler->mpi_requests[p]));
511        #endif        #endif
512      }      }
513      j += i;      j += i;
# Line 524  void Paso_Preconditioner_AMG_CopyRemoteD Line 519  void Paso_Preconditioner_AMG_CopyRemoteD
519      n = send->offsetInShared[p+1];      n = send->offsetInShared[p+1];
520      i = ptr[n] - ptr[m];      i = ptr[n] - ptr[m];
521      if (i >0) {      if (i >0) {
522      #ifdef ESYS_MPI          #ifdef ESYS_MPI
523      MPI_Issend(&(idx[j]), i, MPI_INT, send->neighbor[p],          MPI_Issend(&(idx[j]), i, MPI_INT, send->neighbor[p],
524          P->mpi_info->msg_tag_counter + rank,                  P->mpi_info->msg_tag_counter + rank,
525          P->mpi_info->comm,                  P->mpi_info->comm,
526          &(P->col_coupler->mpi_requests[p+recv_neighbors]));                  &(P->col_coupler->mpi_requests[p+recv_neighbors]));
527      #endif          #endif
528      j += i;          j += i;
529      }      }
530    }    }
531    #ifdef ESYS_MPI    #ifdef ESYS_MPI
532    MPI_Waitall(send_neighbors+recv_neighbors,    MPI_Waitall(send_neighbors+recv_neighbors,
533          P->col_coupler->mpi_requests,                  P->col_coupler->mpi_requests,
534          P->col_coupler->mpi_stati);                  P->col_coupler->mpi_stati);
535    #endif    #endif
536    ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);    ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);
537
# Line 549  void Paso_Preconditioner_AMG_CopyRemoteD Line 544  void Paso_Preconditioner_AMG_CopyRemoteD
544      #ifdef ESYS_MPI      #ifdef ESYS_MPI
545      if (i > 0)      if (i > 0)
546        MPI_Irecv(&(recv_val[j]), i*block_size, MPI_DOUBLE, recv->neighbor[p],        MPI_Irecv(&(recv_val[j]), i*block_size, MPI_DOUBLE, recv->neighbor[p],
547          P->mpi_info->msg_tag_counter + recv->neighbor[p],                  P->mpi_info->msg_tag_counter + recv->neighbor[p],
548          P->mpi_info->comm,                  P->mpi_info->comm,
549          &(P->col_coupler->mpi_requests[p]));                  &(P->col_coupler->mpi_requests[p]));
550      #endif      #endif
551      j += (i*block_size);      j += (i*block_size);
552    }    }
# Line 562  void Paso_Preconditioner_AMG_CopyRemoteD Line 557  void Paso_Preconditioner_AMG_CopyRemoteD
557      n = send->offsetInShared[p+1];      n = send->offsetInShared[p+1];
558      i = ptr[n] - ptr[m];      i = ptr[n] - ptr[m];
559      if (i >0) {      if (i >0) {
560      #ifdef ESYS_MPI          #ifdef ESYS_MPI
561      MPI_Issend(&(val[j]), i * block_size, MPI_DOUBLE, send->neighbor[p],          MPI_Issend(&(val[j]), i * block_size, MPI_DOUBLE, send->neighbor[p],
562          P->mpi_info->msg_tag_counter + rank,                  P->mpi_info->msg_tag_counter + rank,
563          P->mpi_info->comm,                  P->mpi_info->comm,
564          &(P->col_coupler->mpi_requests[p+recv_neighbors]));                  &(P->col_coupler->mpi_requests[p+recv_neighbors]));
565      #endif          #endif
566      j += i * block_size;          j += i * block_size;
567      }      }
568    }    }
569    #ifdef ESYS_MPI    #ifdef ESYS_MPI
570    MPI_Waitall(send_neighbors+recv_neighbors,    MPI_Waitall(send_neighbors+recv_neighbors,
571          P->col_coupler->mpi_requests,                  P->col_coupler->mpi_requests,
572          P->col_coupler->mpi_stati);                  P->col_coupler->mpi_stati);
573    #endif    #endif
574    ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);    ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);
575
# Line 587  void Paso_Preconditioner_AMG_CopyRemoteD Line 582  void Paso_Preconditioner_AMG_CopyRemoteD
582    *p_val = recv_val;    *p_val = recv_val;
583  }  }
584
585  Paso_SystemMatrix* Paso_Preconditioner_AMG_buildInterpolationOperator(  paso::SystemMatrix_ptr Paso_Preconditioner_AMG_buildInterpolationOperator(
586      Paso_SystemMatrix* A, Paso_SystemMatrix* P, Paso_SystemMatrix* R)          paso::SystemMatrix_ptr A, paso::SystemMatrix_ptr P,
587            paso::SystemMatrix_ptr R)
588  {  {
589     Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(A->mpi_info);     Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(A->mpi_info);
590     paso::SparseMatrix *R_main=NULL, *R_couple=NULL;     paso::SystemMatrix_ptr out;
591     Paso_SystemMatrix *out=NULL;     paso::SystemMatrixPattern_ptr pattern;
paso::SystemMatrixPattern *pattern=NULL;
592     paso::Distribution_ptr input_dist, output_dist;     paso::Distribution_ptr input_dist, output_dist;
593     Paso_SharedComponents *send =NULL, *recv=NULL;     paso::Connector_ptr col_connector, row_connector;
paso::Connector *col_connector=NULL, *row_connector=NULL;
paso::Pattern *main_pattern=NULL;
paso::Pattern *col_couple_pattern=NULL, *row_couple_pattern =NULL;
594     const dim_t row_block_size=A->row_block_size;     const dim_t row_block_size=A->row_block_size;
595     const dim_t col_block_size=A->col_block_size;     const dim_t col_block_size=A->col_block_size;
596     const dim_t block_size = A->block_size;     const dim_t block_size = A->block_size;
# Line 635  Paso_SystemMatrix* Paso_Preconditioner_A Line 627  Paso_SystemMatrix* Paso_Preconditioner_A
627     /* two sparse matrices R_main and R_couple will be generate, as the     /* two sparse matrices R_main and R_couple will be generate, as the
628        transpose of P_main and P_col_couple, respectively. Note that,        transpose of P_main and P_col_couple, respectively. Note that,
629        R_couple is actually the row_coupleBlock of R (R=P^T) */        R_couple is actually the row_coupleBlock of R (R=P^T) */
630     R_main = paso::SparseMatrix_getTranspose(P->mainBlock);     paso::SparseMatrix_ptr R_main(P->mainBlock->getTranspose());
631       paso::SparseMatrix_ptr R_couple;
632     if (size > 1)     if (size > 1)
633       R_couple = paso::SparseMatrix_getTranspose(P->col_coupleBlock);       R_couple = P->col_coupleBlock->getTranspose();
634
635     /* generate P_ext, i.e. portion of P that is stored on neighbor procs     /* generate P_ext, i.e. portion of P that is stored on neighbor procs
636        and needed locally for triple matrix product RAP        and needed locally for triple matrix product RAP
637        to be specific, P_ext (on processor k) are group of rows in P, where        to be specific, P_ext (on processor k) are group of rows in P, where
638        the list of rows from processor q is given by        the list of rows from processor q is given by
639      A->col_coupler->connector->send->shared[rPtr...]          A->col_coupler->connector->send->shared[rPtr...]
640      rPtr=A->col_coupler->connector->send->OffsetInShared[k]          rPtr=A->col_coupler->connector->send->OffsetInShared[k]
641        on q.        on q.
642        P_ext is represented by two sparse matrices P_ext_main and        P_ext is represented by two sparse matrices P_ext_main and
643        P_ext_couple */        P_ext_couple */
# Line 669  Paso_SystemMatrix* Paso_Preconditioner_A Line 662  Paso_SystemMatrix* Paso_Preconditioner_A
662     num_Pext_cols = 0;     num_Pext_cols = 0;
663     if (P->global_id) {     if (P->global_id) {
664       /* first count the number of cols "num_Pext_cols" in both P_ext_couple       /* first count the number of cols "num_Pext_cols" in both P_ext_couple
665      and P->col_coupleBlock */          and P->col_coupleBlock */
666       iptr = 0;       iptr = 0;
667       if (num_Pcouple_cols || sum > 0) {       if (num_Pcouple_cols || sum > 0) {
668      temp = new index_t[num_Pcouple_cols+sum];          temp = new index_t[num_Pcouple_cols+sum];
669      #pragma omp parallel for lastprivate(iptr) schedule(static)          #pragma omp parallel for lastprivate(iptr) schedule(static)
670      for (iptr=0; iptr<sum; iptr++){          for (iptr=0; iptr<sum; iptr++){
671        temp[iptr] = P->remote_coupleBlock->pattern->index[iptr];            temp[iptr] = P->remote_coupleBlock->pattern->index[iptr];
672      }          }
673      for (j=0; j<num_Pcouple_cols; j++, iptr++){          for (j=0; j<num_Pcouple_cols; j++, iptr++){
674        temp[iptr] = P->global_id[j];            temp[iptr] = P->global_id[j];
675      }          }
676       }       }
677       if (iptr) {       if (iptr) {
678      #ifdef USE_QSORTG            qsort(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);
679        qsortG(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);          num_Pext_cols = 1;
680      #else          i = temp[0];
681        qsort(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);          for (j=1; j<iptr; j++) {
682      #endif            if (temp[j] > i) {
683      num_Pext_cols = 1;              i = temp[j];
684      i = temp[0];              temp[num_Pext_cols++] = i;
685      for (j=1; j<iptr; j++) {            }
686        if (temp[j] > i) {          }
i = temp[j];
temp[num_Pext_cols++] = i;
}
}
687       }       }
688       /* resize the pattern of P_ext_couple */       /* resize the pattern of P_ext_couple */
689       if(num_Pext_cols){       if(num_Pext_cols){
690      global_id_P = new index_t[num_Pext_cols];          global_id_P = new index_t[num_Pext_cols];
691      #pragma omp parallel for private(i) schedule(static)          #pragma omp parallel for private(i) schedule(static)
692      for (i=0; i<num_Pext_cols; i++)          for (i=0; i<num_Pext_cols; i++)
693        global_id_P[i] = temp[i];            global_id_P[i] = temp[i];
694       }       }
695       if (num_Pcouple_cols || sum > 0)       if (num_Pcouple_cols || sum > 0)
696      delete[] temp;          delete[] temp;
697       #pragma omp parallel for private(i, where_p) schedule(static)       #pragma omp parallel for private(i, where_p) schedule(static)
698       for (i=0; i<sum; i++) {       for (i=0; i<sum; i++) {
699      where_p = (index_t *)bsearch(          where_p = (index_t *)bsearch(
700              &(P->remote_coupleBlock->pattern->index[i]),                          &(P->remote_coupleBlock->pattern->index[i]),
701              global_id_P, num_Pext_cols,                          global_id_P, num_Pext_cols,
702                          sizeof(index_t), paso::comparIndex);                          sizeof(index_t), paso::comparIndex);
703      P->remote_coupleBlock->pattern->index[i] =          P->remote_coupleBlock->pattern->index[i] =
704              (index_t)(where_p -global_id_P);                          (index_t)(where_p -global_id_P);
705       }         }
706
707       /* build the mapping */       /* build the mapping */
708       if (num_Pcouple_cols) {       if (num_Pcouple_cols) {
709      Pcouple_to_Pext = new index_t[num_Pcouple_cols];          Pcouple_to_Pext = new index_t[num_Pcouple_cols];
710      iptr = 0;          iptr = 0;
711      for (i=0; i<num_Pext_cols; i++)          for (i=0; i<num_Pext_cols; i++)
712        if (global_id_P[i] == P->global_id[iptr]) {            if (global_id_P[i] == P->global_id[iptr]) {
713          Pcouple_to_Pext[iptr++] = i;              Pcouple_to_Pext[iptr++] = i;
714          if (iptr == num_Pcouple_cols) break;              if (iptr == num_Pcouple_cols) break;
715        }            }
716       }       }
717     }     }
718
# Line 743  Paso_SystemMatrix* Paso_Preconditioner_A Line 732  Paso_SystemMatrix* Paso_Preconditioner_A
732       /* then, loop over elements in row i_r of R_couple */       /* then, loop over elements in row i_r of R_couple */
733       j1_ub = R_couple->pattern->ptr[i_r+1];       j1_ub = R_couple->pattern->ptr[i_r+1];
734       for (j1=R_couple->pattern->ptr[i_r]; j1<j1_ub; j1++){       for (j1=R_couple->pattern->ptr[i_r]; j1<j1_ub; j1++){
735      i1 = R_couple->pattern->index[j1];          i1 = R_couple->pattern->index[j1];
736      /* then, loop over elements in row i1 of A->col_coupleBlock */          /* then, loop over elements in row i1 of A->col_coupleBlock */
737      j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];          j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];
738      for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
739        i2 = A->col_coupleBlock->pattern->index[j2];            i2 = A->col_coupleBlock->pattern->index[j2];
740
741        /* check whether entry RA[i_r, i2] has been previously visited.            /* check whether entry RA[i_r, i2] has been previously visited.
742           RAP new entry is possible only if entry RA[i_r, i2] has not               RAP new entry is possible only if entry RA[i_r, i2] has not
743           been visited yet */               been visited yet */
744        if (A_marker[i2] != i_r) {            if (A_marker[i2] != i_r) {
745          /* first, mark entry RA[i_r,i2] as visited */              /* first, mark entry RA[i_r,i2] as visited */
746          A_marker[i2] = i_r;              A_marker[i2] = i_r;
747
748          /* then loop over elements in row i2 of P_ext_main */              /* then loop over elements in row i2 of P_ext_main */
749          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
750          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
751          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
752
753          /* check whether entry RAP[i_r,i_c] has been created.                  /* check whether entry RAP[i_r,i_c] has been created.
754             If not yet created, create the entry and increase                     If not yet created, create the entry and increase
755             the total number of elements in RAP_ext */                     the total number of elements in RAP_ext */
756          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
757            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
758            sum++;                    sum++;
759          }                  }
760          }              }
761
762          /* loop over elements in row i2 of P_ext_couple, do the same */              /* loop over elements in row i2 of P_ext_couple, do the same */
763          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
764          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
765          i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;                  i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;
766          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
767            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
768            sum++;                    sum++;
769          }                  }
770          }              }
771        }            }
772      }          }
773
774      /* now loop over elements in row i1 of A->mainBlock, repeat          /* now loop over elements in row i1 of A->mainBlock, repeat
775         the process we have done to block A->col_coupleBlock */             the process we have done to block A->col_coupleBlock */
776      j2_ub = A->mainBlock->pattern->ptr[i1+1];          j2_ub = A->mainBlock->pattern->ptr[i1+1];
777      for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
778        i2 = A->mainBlock->pattern->index[j2];            i2 = A->mainBlock->pattern->index[j2];
779        if (A_marker[i2 + num_Acouple_cols] != i_r) {            if (A_marker[i2 + num_Acouple_cols] != i_r) {
780          A_marker[i2 + num_Acouple_cols] = i_r;              A_marker[i2 + num_Acouple_cols] = i_r;
781          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
782          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
783          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
784          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
785            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
786            sum++;                    sum++;
787          }                  }
788          }              }
789          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
790          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
791          /* note that we need to map the column index in                  /* note that we need to map the column index in
792             P->col_coupleBlock back into the column index in                     P->col_coupleBlock back into the column index in
793             P_ext_couple */                     P_ext_couple */
794          i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
795          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
796            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
797            sum++;                    sum++;
798          }                  }
799          }              }
800        }            }
801      }          }
802       }       }
803     }     }
804
# Line 834  Paso_SystemMatrix* Paso_Preconditioner_A Line 823  Paso_SystemMatrix* Paso_Preconditioner_A
823       /* then, loop over elements in row i_r of R_couple */       /* then, loop over elements in row i_r of R_couple */
824       j1_ub = R_couple->pattern->ptr[i_r+1];       j1_ub = R_couple->pattern->ptr[i_r+1];
825       for (j1=R_couple->pattern->ptr[i_r]; j1<j1_ub; j1++){       for (j1=R_couple->pattern->ptr[i_r]; j1<j1_ub; j1++){
826      i1 = R_couple->pattern->index[j1];          i1 = R_couple->pattern->index[j1];
827      R_val = &(R_couple->val[j1*P_block_size]);          R_val = &(R_couple->val[j1*P_block_size]);
828
829      /* then, loop over elements in row i1 of A->col_coupleBlock */          /* then, loop over elements in row i1 of A->col_coupleBlock */
830      j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];          j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];
831      for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
832        i2 = A->col_coupleBlock->pattern->index[j2];            i2 = A->col_coupleBlock->pattern->index[j2];
833        temp_val = &(A->col_coupleBlock->val[j2*block_size]);            temp_val = &(A->col_coupleBlock->val[j2*block_size]);
834        for (irb=0; irb<row_block_size; irb++)            for (irb=0; irb<row_block_size; irb++)
835              for (icb=0; icb<col_block_size; icb++)              for (icb=0; icb<col_block_size; icb++)
836                  RA_val[irb+row_block_size*icb] = ZERO;                  RA_val[irb+row_block_size*icb] = ZERO;
837        for (irb=0; irb<P_block_size; irb++) {            for (irb=0; irb<P_block_size; irb++) {
838          rtmp=R_val[irb];              rtmp=R_val[irb];
839          for (icb=0; icb<col_block_size; icb++) {              for (icb=0; icb<col_block_size; icb++) {
840          RA_val[irb+row_block_size*icb] += rtmp * temp_val[irb+col_block_size*icb];                  RA_val[irb+row_block_size*icb] += rtmp * temp_val[irb+col_block_size*icb];
841          }              }
842        }            }
843
844        /* check whether entry RA[i_r, i2] has been previously visited.            /* check whether entry RA[i_r, i2] has been previously visited.
845           RAP new entry is possible only if entry RA[i_r, i2] has not               RAP new entry is possible only if entry RA[i_r, i2] has not
846           been visited yet */               been visited yet */
847        if (A_marker[i2] != i_r) {            if (A_marker[i2] != i_r) {
848          /* first, mark entry RA[i_r,i2] as visited */              /* first, mark entry RA[i_r,i2] as visited */
849          A_marker[i2] = i_r;              A_marker[i2] = i_r;
850
851          /* then loop over elements in row i2 of P_ext_main */              /* then loop over elements in row i2 of P_ext_main */
852          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
853          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
854          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
855          temp_val = &(P->row_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->row_coupleBlock->val[j3*P_block_size]);
856          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
857            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
858              RAP_val[irb+row_block_size*icb] = ZERO;                      RAP_val[irb+row_block_size*icb] = ZERO;
859          for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
860            rtmp = temp_val[icb];                    rtmp = temp_val[icb];
861            for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
862              RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
863            }                    }
864          }                  }
865
866          /* check whether entry RAP[i_r,i_c] has been created.                  /* check whether entry RAP[i_r,i_c] has been created.
867             If not yet created, create the entry and increase                     If not yet created, create the entry and increase
868             the total number of elements in RAP_ext */                     the total number of elements in RAP_ext */
869          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
870            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
871            memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
872            RAP_ext_idx[sum] = i_c + offset;                    RAP_ext_idx[sum] = i_c + offset;
873            sum++;                    sum++;
874          } else {                  } else {
875            temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
876            for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
877              temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
878            }                    }
879          }                  }
880          }              }
881
882          /* loop over elements in row i2 of P_ext_couple, do the same */              /* loop over elements in row i2 of P_ext_couple, do the same */
883          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
884          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
885          i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;                  i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;
886                  temp_val = &(P->remote_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->remote_coupleBlock->val[j3*P_block_size]);
887          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
888            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
889              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
890                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
891            rtmp = temp_val[icb];                    rtmp = temp_val[icb];
892                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
893                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
894                    }                    }
895                  }                  }
896
897          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
898            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
899            memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
900            RAP_ext_idx[sum] = global_id_P[i_c - num_Pmain_cols];                    RAP_ext_idx[sum] = global_id_P[i_c - num_Pmain_cols];
901            sum++;                    sum++;
902          } else {                  } else {
903                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
904                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
905                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
906                    }                    }
907          }                  }
908          }              }
909
910        /* If entry RA[i_r, i2] is visited, no new RAP entry is created.            /* If entry RA[i_r, i2] is visited, no new RAP entry is created.
911           Only the contributions are added. */               Only the contributions are added. */
912        } else {            } else {
913          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
914          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
915          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
916                  temp_val = &(P->row_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->row_coupleBlock->val[j3*P_block_size]);
917          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
918            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
919              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
920                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
921            rtmp = temp_val[icb];                    rtmp = temp_val[icb];
922                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
923                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
924                    }                    }
# Line 939  Paso_SystemMatrix* Paso_Preconditioner_A Line 928  Paso_SystemMatrix* Paso_Preconditioner_A
928                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
929                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
930                  }                  }
931          }              }
932          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
933          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
934          i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;                  i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;
935                  temp_val = &(P->remote_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->remote_coupleBlock->val[j3*P_block_size]);
936          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
937            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
938              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
939                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
940            rtmp = temp_val[icb];                    rtmp = temp_val[icb];
941                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
942                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
943                    }                    }
# Line 958  Paso_SystemMatrix* Paso_Preconditioner_A Line 947  Paso_SystemMatrix* Paso_Preconditioner_A
947                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
948                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
949                  }                  }
950          }              }
951        }            }
952      }          }
953
954      /* now loop over elements in row i1 of A->mainBlock, repeat          /* now loop over elements in row i1 of A->mainBlock, repeat
955         the process we have done to block A->col_coupleBlock */             the process we have done to block A->col_coupleBlock */
956      j2_ub = A->mainBlock->pattern->ptr[i1+1];          j2_ub = A->mainBlock->pattern->ptr[i1+1];
957      for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
958        i2 = A->mainBlock->pattern->index[j2];            i2 = A->mainBlock->pattern->index[j2];
959            temp_val = &(A->mainBlock->val[j2*block_size]);            temp_val = &(A->mainBlock->val[j2*block_size]);
960        for (irb=0; irb<row_block_size; irb++)            for (irb=0; irb<row_block_size; irb++)
961          for (icb=0; icb<col_block_size; icb++)              for (icb=0; icb<col_block_size; icb++)
962          RA_val[irb+row_block_size*icb]=ZERO;                  RA_val[irb+row_block_size*icb]=ZERO;
963            for (irb=0; irb<P_block_size; irb++) {            for (irb=0; irb<P_block_size; irb++) {
964          rtmp=R_val[irb];              rtmp=R_val[irb];
965              for (icb=0; icb<col_block_size; icb++) {              for (icb=0; icb<col_block_size; icb++) {
966                  RA_val[irb+row_block_size*icb] += rtmp * temp_val[irb+col_block_size*icb];                  RA_val[irb+row_block_size*icb] += rtmp * temp_val[irb+col_block_size*icb];
967              }              }
968            }            }
969
970        if (A_marker[i2 + num_Acouple_cols] != i_r) {            if (A_marker[i2 + num_Acouple_cols] != i_r) {
971          A_marker[i2 + num_Acouple_cols] = i_r;              A_marker[i2 + num_Acouple_cols] = i_r;
972          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
973          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
974          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
975                  temp_val = &(P->mainBlock->val[j3*P_block_size]);                  temp_val = &(P->mainBlock->val[j3*P_block_size]);
976          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
977            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
978              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
979                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
980            rtmp = temp_val[icb];                    rtmp = temp_val[icb];
981                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
982                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
983                    }                    }
984                  }                  }
985
986          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
987            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
988            memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
989            RAP_ext_idx[sum] = i_c + offset;                    RAP_ext_idx[sum] = i_c + offset;
990            sum++;                    sum++;
991          } else {                  } else {
992                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
993                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
994                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
995                    }                    }
996          }                  }
997          }              }
998          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
999          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1000          i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
1001                  temp_val = &(P->col_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->col_coupleBlock->val[j3*P_block_size]);
1002          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1003                    for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1004              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1005                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1006            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1007                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1008                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1009                    }                    }
1010                  }                  }
1011
1012          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
1013            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
1014            memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
1015                    RAP_ext_idx[sum] = global_id_P[i_c - num_Pmain_cols];                    RAP_ext_idx[sum] = global_id_P[i_c - num_Pmain_cols];
1016            sum++;                    sum++;
1017          } else {                  } else {
1018                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
1019                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
1020                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
1021                    }                    }
1022          }                  }
1023          }              }
1024        } else {            } else {
1025          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
1026          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1027          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
1028                  temp_val = &(P->mainBlock->val[j3*P_block_size]);                  temp_val = &(P->mainBlock->val[j3*P_block_size]);
1029          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1030            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1031              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1032                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1033            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1034                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1035                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1036                    }                    }
# Line 1051  Paso_SystemMatrix* Paso_Preconditioner_A Line 1040  Paso_SystemMatrix* Paso_Preconditioner_A
1040                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
1041                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
1042                  }                  }
1043          }              }
1044          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
1045          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1046          i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
1047                  temp_val = &(P->col_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->col_coupleBlock->val[j3*P_block_size]);
1048          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1049            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1050              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1051                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1052              rtmp=temp_val[icb];                      rtmp=temp_val[icb];
1053                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1054                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1055                    }                    }
# Line 1070  Paso_SystemMatrix* Paso_Preconditioner_A Line 1059  Paso_SystemMatrix* Paso_Preconditioner_A
1059                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
1060                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
1061                  }                  }
1062          }              }
1063        }            }
1064      }          }
1065       }       }
1066       RAP_ext_ptr[i_r+1] = sum;       RAP_ext_ptr[i_r+1] = sum;
1067     }     }
# Line 1086  Paso_SystemMatrix* Paso_Preconditioner_A Line 1075  Paso_SystemMatrix* Paso_Preconditioner_A
1075        be passed to neighbouring processors, and added to corresponding        be passed to neighbouring processors, and added to corresponding
1076        RAP[r,c] entries in the neighbouring processors */        RAP[r,c] entries in the neighbouring processors */
1077     Paso_Preconditioner_AMG_CopyRemoteData(P, &RAP_ext_ptr, &RAP_ext_idx,     Paso_Preconditioner_AMG_CopyRemoteData(P, &RAP_ext_ptr, &RAP_ext_idx,
1078          &RAP_ext_val, global_id_P, block_size);                  &RAP_ext_val, global_id_P, block_size);
1079
1080     num_RAPext_rows = P->col_coupler->connector->send->offsetInShared[P->col_coupler->connector->send->numNeighbors];     num_RAPext_rows = P->col_coupler->connector->send->offsetInShared[P->col_coupler->connector->send->numNeighbors];
1081     sum = RAP_ext_ptr[num_RAPext_rows];     sum = RAP_ext_ptr[num_RAPext_rows];
# Line 1095  Paso_SystemMatrix* Paso_Preconditioner_A Line 1084  Paso_SystemMatrix* Paso_Preconditioner_A
1084       temp = new index_t[num_Pext_cols+sum];       temp = new index_t[num_Pext_cols+sum];
1085       j1_ub = offset + num_Pmain_cols;       j1_ub = offset + num_Pmain_cols;
1086       for (i=0, iptr=0; i<sum; i++) {       for (i=0, iptr=0; i<sum; i++) {
1087      if (RAP_ext_idx[i] < offset || RAP_ext_idx[i] >= j1_ub)  /* XXX */          if (RAP_ext_idx[i] < offset || RAP_ext_idx[i] >= j1_ub)  /* XXX */
1088        temp[iptr++] = RAP_ext_idx[i];          /* XXX */            temp[iptr++] = RAP_ext_idx[i];                  /* XXX */
1089       }       }
1090       for (j=0; j<num_Pext_cols; j++, iptr++){       for (j=0; j<num_Pext_cols; j++, iptr++){
1091      temp[iptr] = global_id_P[j];          temp[iptr] = global_id_P[j];
1092       }       }
1093
1094       if (iptr) {       if (iptr) {
1095      #ifdef USE_QSORTG            qsort(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);
1096        qsortG(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);          num_RAPext_cols = 1;
1097      #else          i = temp[0];
1098        qsort(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);          for (j=1; j<iptr; j++) {
1099      #endif            if (temp[j] > i) {
1100      num_RAPext_cols = 1;              i = temp[j];
1101      i = temp[0];              temp[num_RAPext_cols++] = i;
1102      for (j=1; j<iptr; j++) {            }
1103        if (temp[j] > i) {          }
i = temp[j];
temp[num_RAPext_cols++] = i;
}
}
1104       }       }
1105     }     }
1106
# Line 1124  Paso_SystemMatrix* Paso_Preconditioner_A Line 1109  Paso_SystemMatrix* Paso_Preconditioner_A
1109       global_id_RAP = new index_t[num_RAPext_cols];       global_id_RAP = new index_t[num_RAPext_cols];
1110       #pragma omp parallel for private(i) schedule(static)       #pragma omp parallel for private(i) schedule(static)
1111       for (i=0; i<num_RAPext_cols; i++)       for (i=0; i<num_RAPext_cols; i++)
1112      global_id_RAP[i] = temp[i];          global_id_RAP[i] = temp[i];
1113     }     }
1114     if (num_Pext_cols || sum > 0)     if (num_Pext_cols || sum > 0)
1115       delete[] temp;       delete[] temp;
# Line 1132  Paso_SystemMatrix* Paso_Preconditioner_A Line 1117  Paso_SystemMatrix* Paso_Preconditioner_A
1117     #pragma omp parallel for private(i, where_p) schedule(static)     #pragma omp parallel for private(i, where_p) schedule(static)
1118     for (i=0; i<sum; i++) {     for (i=0; i<sum; i++) {
1119       if (RAP_ext_idx[i] < offset || RAP_ext_idx[i] >= j1_ub){       if (RAP_ext_idx[i] < offset || RAP_ext_idx[i] >= j1_ub){
1120      where_p = (index_t *) bsearch(&(RAP_ext_idx[i]), global_id_RAP,          where_p = (index_t *) bsearch(&(RAP_ext_idx[i]), global_id_RAP,
1121  /*XXX*/         num_RAPext_cols, sizeof(index_t), paso::comparIndex);  /*XXX*/                 num_RAPext_cols, sizeof(index_t), paso::comparIndex);
1122      RAP_ext_idx[i] = num_Pmain_cols + (index_t)(where_p - global_id_RAP);          RAP_ext_idx[i] = num_Pmain_cols + (index_t)(where_p - global_id_RAP);
1123       } else       } else
1124      RAP_ext_idx[i] = RAP_ext_idx[i] - offset;          RAP_ext_idx[i] = RAP_ext_idx[i] - offset;
1125     }     }
1126
1127     /* build the mapping */     /* build the mapping */
# Line 1144  Paso_SystemMatrix* Paso_Preconditioner_A Line 1129  Paso_SystemMatrix* Paso_Preconditioner_A
1129       Pcouple_to_RAP = new index_t[num_Pcouple_cols];       Pcouple_to_RAP = new index_t[num_Pcouple_cols];
1130       iptr = 0;       iptr = 0;
1131       for (i=0; i<num_RAPext_cols; i++)       for (i=0; i<num_RAPext_cols; i++)
1132      if (global_id_RAP[i] == P->global_id[iptr]) {          if (global_id_RAP[i] == P->global_id[iptr]) {
1133        Pcouple_to_RAP[iptr++] = i;            Pcouple_to_RAP[iptr++] = i;
1134        if (iptr == num_Pcouple_cols) break;            if (iptr == num_Pcouple_cols) break;
1135      }          }
1136     }     }
1137
1138     if (num_Pext_cols) {     if (num_Pext_cols) {
1139       Pext_to_RAP = new index_t[num_Pext_cols];       Pext_to_RAP = new index_t[num_Pext_cols];
1140       iptr = 0;       iptr = 0;
1141       for (i=0; i<num_RAPext_cols; i++)       for (i=0; i<num_RAPext_cols; i++)
1142      if (global_id_RAP[i] == global_id_P[iptr]) {          if (global_id_RAP[i] == global_id_P[iptr]) {
1143        Pext_to_RAP[iptr++] = i;            Pext_to_RAP[iptr++] = i;
1144        if (iptr == num_Pext_cols) break;            if (iptr == num_Pext_cols) break;
1145      }          }
1146     }     }
1147
1148     if (global_id_P){     if (global_id_P){
# Line 1185  Paso_SystemMatrix* Paso_Preconditioner_A Line 1170  Paso_SystemMatrix* Paso_Preconditioner_A
1170       row_marker_ext = j;       row_marker_ext = j;
1171
1172       /* Mark the diagonal element RAP[i_r, i_r], and other elements       /* Mark the diagonal element RAP[i_r, i_r], and other elements
1173      in RAP_ext */          in RAP_ext */
1174       P_marker[i_r] = i;       P_marker[i_r] = i;
1175       i++;       i++;
1176       for (j1=0; j1<num_neighbors; j1++) {       for (j1=0; j1<num_neighbors; j1++) {
1177      for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {          for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {
1178        if (shared[j2] == i_r) {            if (shared[j2] == i_r) {
1179          for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {              for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {
1180            i_c = RAP_ext_idx[k];                i_c = RAP_ext_idx[k];
1181            if (i_c < num_Pmain_cols) {                if (i_c < num_Pmain_cols) {
1182          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
1183            P_marker[i_c] = i;                    P_marker[i_c] = i;
1184            i++;                    i++;
1185          }                  }
1186            } else {                } else {
1187          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
1188            P_marker[i_c] = j;                    P_marker[i_c] = j;
1189            j++;                    j++;
1190          }                  }
1191            }                }
1192          }              }
1193          break;              break;
1194        }            }
1195      }          }
1196       }       }
1197
1198       /* then, loop over elements in row i_r of R_main */       /* then, loop over elements in row i_r of R_main */
1199       j1_ub = R_main->pattern->ptr[i_r+1];       j1_ub = R_main->pattern->ptr[i_r+1];
1200       for (j1=R_main->pattern->ptr[i_r]; j1<j1_ub; j1++){       for (j1=R_main->pattern->ptr[i_r]; j1<j1_ub; j1++){
1201      i1 = R_main->pattern->index[j1];          i1 = R_main->pattern->index[j1];
1202
1203            /* then, loop over elements in row i1 of A->col_coupleBlock */
1204            j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];
1205            for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
1206              i2 = A->col_coupleBlock->pattern->index[j2];
1207
1208              /* check whether entry RA[i_r, i2] has been previously visited.
1209                 RAP new entry is possible only if entry RA[i_r, i2] has not
1210                 been visited yet */
1211              if (A_marker[i2] != i_r) {
1212                /* first, mark entry RA[i_r,i2] as visited */
1213                A_marker[i2] = i_r;
1214
1215                /* then loop over elements in row i2 of P_ext_main */
1216                j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
1217                for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1218                    i_c = P->row_coupleBlock->pattern->index[j3];
1219
1220                    /* check whether entry RAP[i_r,i_c] has been created.
1221                       If not yet created, create the entry and increase
1222                       the total number of elements in RAP_ext */
1223                    if (P_marker[i_c] < row_marker) {
1224                      P_marker[i_c] = i;
1225                      i++;
1226                    }
1227                }
1228
1229                /* loop over elements in row i2 of P_ext_couple, do the same */
1230                j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
1231                for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1232                    i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
1233                    if (P_marker[i_c] < row_marker_ext) {
1234                      P_marker[i_c] = j;
1235                      j++;
1236                    }
1237                }
1238              }
1239            }
1240
1241      /* then, loop over elements in row i1 of A->col_coupleBlock */          /* now loop over elements in row i1 of A->mainBlock, repeat
1242      j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];             the process we have done to block A->col_coupleBlock */
1243      for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          j2_ub = A->mainBlock->pattern->ptr[i1+1];
1244        i2 = A->col_coupleBlock->pattern->index[j2];          for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
1245              i2 = A->mainBlock->pattern->index[j2];
1246        /* check whether entry RA[i_r, i2] has been previously visited.            if (A_marker[i2 + num_Acouple_cols] != i_r) {
1247           RAP new entry is possible only if entry RA[i_r, i2] has not              A_marker[i2 + num_Acouple_cols] = i_r;
1248           been visited yet */              j3_ub = P->mainBlock->pattern->ptr[i2+1];
1249        if (A_marker[i2] != i_r) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1250          /* first, mark entry RA[i_r,i2] as visited */                  i_c = P->mainBlock->pattern->index[j3];
1251          A_marker[i2] = i_r;                  if (P_marker[i_c] < row_marker) {
1252                      P_marker[i_c] = i;
1253          /* then loop over elements in row i2 of P_ext_main */                    i++;
1254          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];                  }
1255          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              }
1256          i_c = P->row_coupleBlock->pattern->index[j3];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
1257                for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1258          /* check whether entry RAP[i_r,i_c] has been created.                  /* note that we need to map the column index in
1259             If not yet created, create the entry and increase                     P->col_coupleBlock back into the column index in
1260             the total number of elements in RAP_ext */                     P_ext_couple */
1261          if (P_marker[i_c] < row_marker) {                  i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
1262            P_marker[i_c] = i;                  if (P_marker[i_c] < row_marker_ext) {
1263            i++;                    P_marker[i_c] = j;
1264          }                    j++;
1265          }                  }
1266                }
1267          /* loop over elements in row i2 of P_ext_couple, do the same */            }
1268          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];          }
for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
if (P_marker[i_c] < row_marker_ext) {
P_marker[i_c] = j;
j++;
}
}
}
}

/* now loop over elements in row i1 of A->mainBlock, repeat
the process we have done to block A->col_coupleBlock */
j2_ub = A->mainBlock->pattern->ptr[i1+1];
for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
i2 = A->mainBlock->pattern->index[j2];
if (A_marker[i2 + num_Acouple_cols] != i_r) {
A_marker[i2 + num_Acouple_cols] = i_r;
j3_ub = P->mainBlock->pattern->ptr[i2+1];
for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
i_c = P->mainBlock->pattern->index[j3];
if (P_marker[i_c] < row_marker) {
P_marker[i_c] = i;
i++;
}
}
j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
/* note that we need to map the column index in
P->col_coupleBlock back into the column index in
P_ext_couple */
i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
if (P_marker[i_c] < row_marker_ext) {
P_marker[i_c] = j;
j++;
}
}
}
}
1269       }       }
1270     }     }
1271
# Line 1315  Paso_SystemMatrix* Paso_Preconditioner_A Line 1300  Paso_SystemMatrix* Paso_Preconditioner_A
1300       RAP_couple_ptr[i_r] = row_marker_ext;       RAP_couple_ptr[i_r] = row_marker_ext;
1301
1302       /* Mark and setup the diagonal element RAP[i_r, i_r], and elements       /* Mark and setup the diagonal element RAP[i_r, i_r], and elements
1303      in row i_r of RAP_ext */          in row i_r of RAP_ext */
1304       P_marker[i_r] = i;       P_marker[i_r] = i;
1305       for (ib=0; ib<block_size; ib++)       for (ib=0; ib<block_size; ib++)
1306         RAP_main_val[i*block_size+ib] = ZERO;         RAP_main_val[i*block_size+ib] = ZERO;
# Line 1323  Paso_SystemMatrix* Paso_Preconditioner_A Line 1308  Paso_SystemMatrix* Paso_Preconditioner_A
1308       i++;       i++;
1309
1310       for (j1=0; j1<num_neighbors; j1++) {       for (j1=0; j1<num_neighbors; j1++) {
1311      for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {          for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {
1312        if (shared[j2] == i_r) {            if (shared[j2] == i_r) {
1313          for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {              for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {
1314            i_c = RAP_ext_idx[k];                i_c = RAP_ext_idx[k];
1315            if (i_c < num_Pmain_cols) {                if (i_c < num_Pmain_cols) {
1316          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
1317            P_marker[i_c] = i;                    P_marker[i_c] = i;
1318            memcpy(&(RAP_main_val[i*block_size]), &(RAP_ext_val[k*block_size]), block_size*sizeof(double));                    memcpy(&(RAP_main_val[i*block_size]), &(RAP_ext_val[k*block_size]), block_size*sizeof(double));
1319            RAP_main_idx[i] = i_c;                    RAP_main_idx[i] = i_c;
1320            i++;                    i++;
1321          } else {                  } else {
1322            t1_val = &(RAP_ext_val[k*block_size]);                    t1_val = &(RAP_ext_val[k*block_size]);
1323            t2_val = &(RAP_main_val[P_marker[i_c]*block_size]);                    t2_val = &(RAP_main_val[P_marker[i_c]*block_size]);
1324            for (ib=0; ib<block_size; ib++)                    for (ib=0; ib<block_size; ib++)
1325              t2_val[ib] += t1_val[ib];                      t2_val[ib] += t1_val[ib];
1326          }                  }
1327            } else {                } else {
1328          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
1329            P_marker[i_c] = j;                    P_marker[i_c] = j;
1330            memcpy(&(RAP_couple_val[j*block_size]), &(RAP_ext_val[k*block_size]), block_size*sizeof(double));                    memcpy(&(RAP_couple_val[j*block_size]), &(RAP_ext_val[k*block_size]), block_size*sizeof(double));
1331            RAP_couple_idx[j] = i_c - num_Pmain_cols;                    RAP_couple_idx[j] = i_c - num_Pmain_cols;
1332            j++;                    j++;
1333          } else {                  } else {
1334            t1_val = &(RAP_ext_val[k*block_size]);                    t1_val = &(RAP_ext_val[k*block_size]);
1335            t2_val = &(RAP_couple_val[P_marker[i_c]*block_size]);                    t2_val = &(RAP_couple_val[P_marker[i_c]*block_size]);
1336            for (ib=0; ib<block_size; ib++)                    for (ib=0; ib<block_size; ib++)
1337              t2_val[ib] += t1_val[ib];                      t2_val[ib] += t1_val[ib];
1338          }                  }
1339            }                }
1340          }              }
1341          break;              break;
1342        }            }
1343      }          }
1344       }       }
1345
1346       /* then, loop over elements in row i_r of R_main */       /* then, loop over elements in row i_r of R_main */
1347       j1_ub = R_main->pattern->ptr[i_r+1];       j1_ub = R_main->pattern->ptr[i_r+1];
1348       for (j1=R_main->pattern->ptr[i_r]; j1<j1_ub; j1++){       for (j1=R_main->pattern->ptr[i_r]; j1<j1_ub; j1++){
1349      i1 = R_main->pattern->index[j1];          i1 = R_main->pattern->index[j1];
1350      R_val = &(R_main->val[j1*P_block_size]);          R_val = &(R_main->val[j1*P_block_size]);
1351
1352      /* then, loop over elements in row i1 of A->col_coupleBlock */          /* then, loop over elements in row i1 of A->col_coupleBlock */
1353      j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];          j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];
1354      for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
1355        i2 = A->col_coupleBlock->pattern->index[j2];            i2 = A->col_coupleBlock->pattern->index[j2];
1356            temp_val = &(A->col_coupleBlock->val[j2*block_size]);            temp_val = &(A->col_coupleBlock->val[j2*block_size]);
1357        for (irb=0; irb<row_block_size; irb++)            for (irb=0; irb<row_block_size; irb++)
1358          for (icb=0; icb<col_block_size; icb++)              for (icb=0; icb<col_block_size; icb++)
1359          RA_val[irb+row_block_size*icb]=ZERO;                  RA_val[irb+row_block_size*icb]=ZERO;
1360            for (irb=0; irb<P_block_size; irb++) {            for (irb=0; irb<P_block_size; irb++) {
1361          rtmp=R_val[irb];              rtmp=R_val[irb];
1362              for (icb=0; icb<col_block_size; icb++) {              for (icb=0; icb<col_block_size; icb++) {
1363                  RA_val[irb+col_block_size*icb] += rtmp * temp_val[irb+col_block_size*icb];                  RA_val[irb+col_block_size*icb] += rtmp * temp_val[irb+col_block_size*icb];
1364              }              }
1365            }            }
1366
1367
1368        /* check whether entry RA[i_r, i2] has been previously visited.            /* check whether entry RA[i_r, i2] has been previously visited.
1369           RAP new entry is possible only if entry RA[i_r, i2] has not               RAP new entry is possible only if entry RA[i_r, i2] has not
1370           been visited yet */               been visited yet */
1371        if (A_marker[i2] != i_r) {            if (A_marker[i2] != i_r) {
1372          /* first, mark entry RA[i_r,i2] as visited */              /* first, mark entry RA[i_r,i2] as visited */
1373          A_marker[i2] = i_r;              A_marker[i2] = i_r;
1374
1375          /* then loop over elements in row i2 of P_ext_main */              /* then loop over elements in row i2 of P_ext_main */
1376          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
1377          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1378          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
1379                  temp_val = &(P->row_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->row_coupleBlock->val[j3*P_block_size]);
1380          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1381            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1382              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1383                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1384            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1385                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1386                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1387                    }                    }
1388                  }                  }
1389
1390
1391          /* check whether entry RAP[i_r,i_c] has been created.                  /* check whether entry RAP[i_r,i_c] has been created.
1392             If not yet created, create the entry and increase                     If not yet created, create the entry and increase
1393             the total number of elements in RAP_ext */                     the total number of elements in RAP_ext */
1394          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
1395            P_marker[i_c] = i;                    P_marker[i_c] = i;
1396            memcpy(&(RAP_main_val[i*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_main_val[i*block_size]), RAP_val, block_size*sizeof(double));
1397            RAP_main_idx[i] = i_c;                    RAP_main_idx[i] = i_c;
1398            i++;                    i++;
1399          } else {                  } else {
1400                    temp_val = &(RAP_main_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_main_val[P_marker[i_c] * block_size]);
1401                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
1402                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
1403                    }                    }
1404          }                  }
1405          }              }
1406
1407          /* loop over elements in row i2 of P_ext_couple, do the same */              /* loop over elements in row i2 of P_ext_couple, do the same */
1408          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
1409          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1410          i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
1411                  temp_val = &(P->remote_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->remote_coupleBlock->val[j3*P_block_size]);
1412          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1413            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1414              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1415                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1416            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1417                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1418                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1419                    }                    }
1420                  }                  }
1421
1422          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
1423            P_marker[i_c] = j;                    P_marker[i_c] = j;
1424            memcpy(&(RAP_couple_val[j*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_couple_val[j*block_size]), RAP_val, block_size*sizeof(double));
1425            RAP_couple_idx[j] = i_c - num_Pmain_cols;                    RAP_couple_idx[j] = i_c - num_Pmain_cols;
1426            j++;                    j++;
1427          } else {                  } else {
1428                    temp_val = &(RAP_couple_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_couple_val[P_marker[i_c] * block_size]);
1429                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
1430                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
1431                    }                    }
1432          }                  }
1433          }              }
1434
1435        /* If entry RA[i_r, i2] is visited, no new RAP entry is created.            /* If entry RA[i_r, i2] is visited, no new RAP entry is created.
1436           Only the contributions are added. */               Only the contributions are added. */
1437        } else {            } else {
1438          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
1439          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1440          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
1441                  temp_val = &(P->row_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->row_coupleBlock->val[j3*P_block_size]);
1442          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1443            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1444              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1445                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1446            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1447                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1448                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1449                    }                    }
# Line 1468  Paso_SystemMatrix* Paso_Preconditioner_A Line 1453  Paso_SystemMatrix* Paso_Preconditioner_A
1453                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
1454                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
1455                  }                  }
1456          }              }
1457          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
1458          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1459          i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
1460                  temp_val = &(P->remote_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->remote_coupleBlock->val[j3*P_block_size]);
1461          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1462            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1463              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1464                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1465            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1466                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1467                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1468                    }                    }
# Line 1487  Paso_SystemMatrix* Paso_Preconditioner_A Line 1472  Paso_SystemMatrix* Paso_Preconditioner_A
1472                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
1473                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
1474                  }                  }
1475          }              }
1476        }            }
1477      }          }
1478
1479      /* now loop over elements in row i1 of A->mainBlock, repeat          /* now loop over elements in row i1 of A->mainBlock, repeat
1480         the process we have done to block A->col_coupleBlock */             the process we have done to block A->col_coupleBlock */
1481      j2_ub = A->mainBlock->pattern->ptr[i1+1];          j2_ub = A->mainBlock->pattern->ptr[i1+1];
1482      for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
1483        i2 = A->mainBlock->pattern->index[j2];            i2 = A->mainBlock->pattern->index[j2];
1484            temp_val = &(A->mainBlock->val[j2*block_size]);            temp_val = &(A->mainBlock->val[j2*block_size]);
1485        for (irb=0; irb<row_block_size; irb++)            for (irb=0; irb<row_block_size; irb++)
1486          for (icb=0; icb<col_block_size; icb++)              for (icb=0; icb<col_block_size; icb++)
1487          RA_val[irb+row_block_size*icb]=ZERO;                  RA_val[irb+row_block_size*icb]=ZERO;
1488            for (irb=0; irb<P_block_size; irb++) {            for (irb=0; irb<P_block_size; irb++) {
1489          rtmp=R_val[irb];              rtmp=R_val[irb];
1490              for (icb=0; icb<col_block_size; icb++) {              for (icb=0; icb<col_block_size; icb++) {
1491                  RA_val[irb+row_block_size*icb] += rtmp * temp_val[irb+col_block_size*icb];                  RA_val[irb+row_block_size*icb] += rtmp * temp_val[irb+col_block_size*icb];
1492              }              }
1493            }            }
1494
1495        if (A_marker[i2 + num_Acouple_cols] != i_r) {            if (A_marker[i2 + num_Acouple_cols] != i_r) {
1496          A_marker[i2 + num_Acouple_cols] = i_r;              A_marker[i2 + num_Acouple_cols] = i_r;
1497          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
1498          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1499          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
1500                  temp_val = &(P->mainBlock->val[j3*P_block_size]);                  temp_val = &(P->mainBlock->val[j3*P_block_size]);
1501          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1502            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1503              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1504                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1505            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1506                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1507                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1508                    }                    }
1509                  }                  }
1510          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
1511            P_marker[i_c] = i;                    P_marker[i_c] = i;
1512            memcpy(&(RAP_main_val[i*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_main_val[i*block_size]), RAP_val, block_size*sizeof(double));
1513            RAP_main_idx[i] = i_c;                    RAP_main_idx[i] = i_c;
1514            i++;                    i++;
1515          } else {                  } else {
1516                    temp_val = &(RAP_main_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_main_val[P_marker[i_c] * block_size]);
1517                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
1518                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
1519                    }                    }
1520          }                  }
1521          }              }
1522          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
1523          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1524          /* note that we need to map the column index in                  /* note that we need to map the column index in
1525             P->col_coupleBlock back into the column index in                     P->col_coupleBlock back into the column index in
1526             P_ext_couple */                     P_ext_couple */
1527          i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
1528                  temp_val = &(P->col_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->col_coupleBlock->val[j3*P_block_size]);
1529          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1530            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1531              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1532                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1533            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1534                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1535                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1536                    }                    }
1537                  }                  }
1538
1539          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
1540            P_marker[i_c] = j;                    P_marker[i_c] = j;
1541            memcpy(&(RAP_couple_val[j*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_couple_val[j*block_size]), RAP_val, block_size*sizeof(double));
1542            RAP_couple_idx[j] = i_c - num_Pmain_cols;                    RAP_couple_idx[j] = i_c - num_Pmain_cols;
1543            j++;                    j++;
1544          } else {                  } else {
1545                    temp_val = &(RAP_couple_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_couple_val[P_marker[i_c] * block_size]);
1546                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
1547                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
1548                    }                    }
1549          }                  }
1550          }              }
1551
1552        } else {            } else {
1553          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
1554          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1555          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
1556                  temp_val = &(P->mainBlock->val[j3*P_block_size]);                  temp_val = &(P->mainBlock->val[j3*P_block_size]);
1557          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1558            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1559              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1560                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1561            rtmp=temp_val[icb];                    rtmp=temp_val[icb];
1562                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1563                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1564                    }                    }
# Line 1583  Paso_SystemMatrix* Paso_Preconditioner_A Line 1568  Paso_SystemMatrix* Paso_Preconditioner_A
1568                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
1569                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
1570                  }                  }
1571          }              }
1572          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
1573          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
1574          i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
1575                  temp_val = &(P->col_coupleBlock->val[j3*P_block_size]);                  temp_val = &(P->col_coupleBlock->val[j3*P_block_size]);
1576          for (irb=0; irb<row_block_size; irb++)                  for (irb=0; irb<row_block_size; irb++)
1577            for (icb=0; icb<col_block_size; icb++)                    for (icb=0; icb<col_block_size; icb++)
1578              RAP_val[irb+row_block_size*icb]=ZERO;                      RAP_val[irb+row_block_size*icb]=ZERO;
1579                  for (icb=0; icb<P_block_size; icb++) {                  for (icb=0; icb<P_block_size; icb++) {
1580            rtmp = temp_val[icb];                    rtmp = temp_val[icb];
1581                    for (irb=0; irb<row_block_size; irb++) {                    for (irb=0; irb<row_block_size; irb++) {
1582                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;                      RAP_val[irb+row_block_size*icb] += RA_val[irb+row_block_size*icb] * rtmp;
1583                    }                    }
# Line 1602  Paso_SystemMatrix* Paso_Preconditioner_A Line 1587  Paso_SystemMatrix* Paso_Preconditioner_A
1587                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
1588                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
1589                  }                  }
1590          }              }
1591        }            }
1592      }          }
1593       }       }
1594
1595       /* sort RAP_XXXX_idx and reorder RAP_XXXX_val accordingly */       /* sort RAP_XXXX_idx and reorder RAP_XXXX_val accordingly */
1596       if (i > row_marker) {       if (i > row_marker) {
1597      offset = i - row_marker;          offset = i - row_marker;
1598      temp = new index_t[offset];          temp = new index_t[offset];
1599      #pragma omp parallel for schedule(static) private(iptr)          #pragma omp parallel for schedule(static) private(iptr)
1600      for (iptr=0; iptr<offset; iptr++)          for (iptr=0; iptr<offset; iptr++)
1601        temp[iptr] = RAP_main_idx[row_marker+iptr];            temp[iptr] = RAP_main_idx[row_marker+iptr];
1602      if (offset > 0) {          if (offset > 0) {
1603        #ifdef USE_QSORTG              qsort(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);
1604          qsortG(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);          }
1605        #else          temp_val = new double[offset * block_size];
1606          qsort(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);          #pragma omp parallel for schedule(static) private(iptr,k)
1607        #endif          for (iptr=0; iptr<offset; iptr++){
1608      }            k = P_marker[temp[iptr]];
1609      temp_val = new double[offset * block_size];            memcpy(&(temp_val[iptr*block_size]), &(RAP_main_val[k*block_size]), block_size*sizeof(double));
1610      #pragma omp parallel for schedule(static) private(iptr,k)            P_marker[temp[iptr]] = iptr + row_marker;
1611      for (iptr=0; iptr<offset; iptr++){          }
1612        k = P_marker[temp[iptr]];          #pragma omp parallel for schedule(static) private(iptr)
1613        memcpy(&(temp_val[iptr*block_size]), &(RAP_main_val[k*block_size]), block_size*sizeof(double));          for (iptr=0; iptr<offset; iptr++){
1614        P_marker[temp[iptr]] = iptr + row_marker;            RAP_main_idx[row_marker+iptr] = temp[iptr];
1615      }            memcpy(&(RAP_main_val[(row_marker+iptr)*block_size]), &(temp_val[iptr*block_size]), block_size*sizeof(double));
1616      #pragma omp parallel for schedule(static) private(iptr)          }
1617      for (iptr=0; iptr<offset; iptr++){          delete[] temp;
1618        RAP_main_idx[row_marker+iptr] = temp[iptr];          delete[] temp_val;
memcpy(&(RAP_main_val[(row_marker+iptr)*block_size]), &(temp_val[iptr*block_size]), block_size*sizeof(double));
}
delete[] temp;
delete[] temp_val;
1619       }       }
1620       if (j > row_marker_ext) {       if (j > row_marker_ext) {
1621          offset = j - row_marker_ext;          offset = j - row_marker_ext;
1622          temp = new index_t[offset];          temp = new index_t[offset];
1623      #pragma omp parallel for schedule(static) private(iptr)          #pragma omp parallel for schedule(static) private(iptr)
1624          for (iptr=0; iptr<offset; iptr++)          for (iptr=0; iptr<offset; iptr++)
1625            temp[iptr] = RAP_couple_idx[row_marker_ext+iptr];            temp[iptr] = RAP_couple_idx[row_marker_ext+iptr];
1626          if (offset > 0) {          if (offset > 0) {
#ifdef USE_QSORTG
qsortG(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);
#else
1627              qsort(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);              qsort(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);
#endif
1628          }          }
1629          temp_val = new double[offset * block_size];          temp_val = new double[offset * block_size];
1630      #pragma omp parallel for schedule(static) private(iptr, k)          #pragma omp parallel for schedule(static) private(iptr, k)
1631          for (iptr=0; iptr<offset; iptr++){          for (iptr=0; iptr<offset; iptr++){
1632            k = P_marker[temp[iptr] + num_Pmain_cols];            k = P_marker[temp[iptr] + num_Pmain_cols];
1633        memcpy(&(temp_val[iptr*block_size]), &(RAP_couple_val[k*block_size]), block_size*sizeof(double));            memcpy(&(temp_val[iptr*block_size]), &(RAP_couple_val[k*block_size]), block_size*sizeof(double));
1634            P_marker[temp[iptr] + num_Pmain_cols] = iptr + row_marker_ext;            P_marker[temp[iptr] + num_Pmain_cols] = iptr + row_marker_ext;
1635          }          }
1636      #pragma omp parallel for schedule(static) private(iptr)          #pragma omp parallel for schedule(static) private(iptr)
1637          for (iptr=0; iptr<offset; iptr++){          for (iptr=0; iptr<offset; iptr++){
1638            RAP_couple_idx[row_marker_ext+iptr] = temp[iptr];            RAP_couple_idx[row_marker_ext+iptr] = temp[iptr];
1639        memcpy(&(RAP_couple_val[(row_marker_ext+iptr)*block_size]), &(temp_val[iptr*block_size]), block_size*sizeof(double));            memcpy(&(RAP_couple_val[(row_marker_ext+iptr)*block_size]), &(temp_val[iptr*block_size]), block_size*sizeof(double));
1640          }          }
1641          delete[] temp;          delete[] temp;
1642          delete[] temp_val;          delete[] temp_val;
# Line 1674  Paso_SystemMatrix* Paso_Preconditioner_A Line 1651  Paso_SystemMatrix* Paso_Preconditioner_A
1651     delete[] RAP_ext_ptr;     delete[] RAP_ext_ptr;
1652     delete[] RAP_ext_idx;     delete[] RAP_ext_idx;
1653     delete[] RAP_ext_val;     delete[] RAP_ext_val;
1654     paso::SparseMatrix_free(R_main);     R_main.reset();
1655     paso::SparseMatrix_free(R_couple);     R_couple.reset();
1656
1657     /* Check whether there are empty columns in RAP_couple */     /* Check whether there are empty columns in RAP_couple */
1658     #pragma omp parallel for schedule(static) private(i)     #pragma omp parallel for schedule(static) private(i)
# Line 1686  Paso_SystemMatrix* Paso_Preconditioner_A Line 1663  Paso_SystemMatrix* Paso_Preconditioner_A
1663     for (i=0; i<j; i++) {     for (i=0; i<j; i++) {
1664       i1 = RAP_couple_idx[i];       i1 = RAP_couple_idx[i];
1665       if (P_marker[i1]) {       if (P_marker[i1]) {
1666      P_marker[i1] = 0;          P_marker[i1] = 0;
1667      k++;          k++;
1668       }       }
1669     }     }
1670
# Line 1696  Paso_SystemMatrix* Paso_Preconditioner_A Line 1673  Paso_SystemMatrix* Paso_Preconditioner_A
1673       temp = new index_t[k];       temp = new index_t[k];
1674       k = 0;       k = 0;
1675       for (i=0; i<num_RAPext_cols; i++)       for (i=0; i<num_RAPext_cols; i++)
1676      if (!P_marker[i]) {          if (!P_marker[i]) {
1677        P_marker[i] = k;            P_marker[i] = k;
1678        temp[k] = global_id_RAP[i];            temp[k] = global_id_RAP[i];
1679        k++;            k++;
1680      }          }
1681       #pragma omp parallel for schedule(static) private(i, i1)       #pragma omp parallel for schedule(static) private(i, i1)
1682       for (i=0; i<j; i++) {       for (i=0; i<j; i++) {
1683      i1 = RAP_couple_idx[i];          i1 = RAP_couple_idx[i];
1684      RAP_couple_idx[i] = P_marker[i1];          RAP_couple_idx[i] = P_marker[i1];
1685       }       }
1686       num_RAPext_cols = k;       num_RAPext_cols = k;
1687       delete[] global_id_RAP;       delete[] global_id_RAP;
# Line 1729  Paso_SystemMatrix* Paso_Preconditioner_A Line 1706  Paso_SystemMatrix* Paso_Preconditioner_A
1706     for (i=0, j=0, k=dist[j+1]; i<num_RAPext_cols; i++) {     for (i=0, j=0, k=dist[j+1]; i<num_RAPext_cols; i++) {
1707       shared[i] = i + num_Pmain_cols;       shared[i] = i + num_Pmain_cols;
1708       if (k <= global_id_RAP[i]) {       if (k <= global_id_RAP[i]) {
1709      if (recv_len[j] > 0) {          if (recv_len[j] > 0) {
1710        neighbor[num_neighbors] = j;            neighbor[num_neighbors] = j;
1711        num_neighbors ++;            num_neighbors ++;
1712        offsetInShared[num_neighbors] = i;            offsetInShared[num_neighbors] = i;
1713      }          }
1714      while (k <= global_id_RAP[i]) {          while (k <= global_id_RAP[i]) {
1715        j++;            j++;
1716        k = dist[j+1];            k = dist[j+1];
1717      }          }
1718       }       }
1719       recv_len[j] ++;       recv_len[j] ++;
1720     }     }
# Line 1746  Paso_SystemMatrix* Paso_Preconditioner_A Line 1723  Paso_SystemMatrix* Paso_Preconditioner_A
1723       num_neighbors ++;       num_neighbors ++;
1724       offsetInShared[num_neighbors] = i;       offsetInShared[num_neighbors] = i;
1725     }     }
1726     recv = Paso_SharedComponents_alloc(num_Pmain_cols, num_neighbors,
1727              neighbor, shared, offsetInShared, 1, 0, mpi_info);     paso::SharedComponents_ptr recv(new paso::SharedComponents(
1728                   num_Pmain_cols, num_neighbors, neighbor, shared,
1729                   offsetInShared, 1, 0, mpi_info));
1730
1731     #ifdef ESYS_MPI     #ifdef ESYS_MPI
1732     MPI_Alltoall(recv_len, 1, MPI_INT, send_len, 1, MPI_INT, mpi_info->comm);     MPI_Alltoall(recv_len, 1, MPI_INT, send_len, 1, MPI_INT, mpi_info->comm);
# Line 1765  Paso_SystemMatrix* Paso_Preconditioner_A Line 1744  Paso_SystemMatrix* Paso_Preconditioner_A
1744     offsetInShared[0] = 0;     offsetInShared[0] = 0;
1745     for (i=0; i<size; i++) {     for (i=0; i<size; i++) {
1746       if (send_len[i] > 0) {       if (send_len[i] > 0) {
1747      neighbor[num_neighbors] = i;          neighbor[num_neighbors] = i;
1748      num_neighbors ++;          num_neighbors ++;
1749      j += send_len[i];          j += send_len[i];
1750      offsetInShared[num_neighbors] = j;          offsetInShared[num_neighbors] = j;
1751       }       }
1752     }     }
1753     delete[] shared;     delete[] shared;
# Line 1777  Paso_SystemMatrix* Paso_Preconditioner_A Line 1756  Paso_SystemMatrix* Paso_Preconditioner_A
1756       k = neighbor[i];       k = neighbor[i];
1757       #ifdef ESYS_MPI       #ifdef ESYS_MPI
1758       MPI_Irecv(&shared[j], send_len[k] , MPI_INT, k,       MPI_Irecv(&shared[j], send_len[k] , MPI_INT, k,
1759          mpi_info->msg_tag_counter+k,                  mpi_info->msg_tag_counter+k,
1760          mpi_info->comm, &mpi_requests[i]);                  mpi_info->comm, &mpi_requests[i]);
1761       #endif       #endif
1762       j += send_len[k];       j += send_len[k];
1763     }     }
# Line 1786  Paso_SystemMatrix* Paso_Preconditioner_A Line 1765  Paso_SystemMatrix* Paso_Preconditioner_A
1765       k = recv->neighbor[i];       k = recv->neighbor[i];
1766       #ifdef ESYS_MPI       #ifdef ESYS_MPI
1767       MPI_Issend(&(global_id_RAP[j]), recv_len[k], MPI_INT, k,       MPI_Issend(&(global_id_RAP[j]), recv_len[k], MPI_INT, k,
1768          mpi_info->msg_tag_counter+rank,                  mpi_info->msg_tag_counter+rank,
1769          mpi_info->comm, &mpi_requests[i+num_neighbors]);                  mpi_info->comm, &mpi_requests[i+num_neighbors]);
1770       #endif       #endif
1771       j += recv_len[k];       j += recv_len[k];
1772     }     }
1773     #ifdef ESYS_MPI     #ifdef ESYS_MPI
1774     MPI_Waitall(num_neighbors + recv->numNeighbors,     MPI_Waitall(num_neighbors + recv->numNeighbors,
1775          mpi_requests, mpi_stati);                  mpi_requests, mpi_stati);
1776     #endif     #endif
1777     ESYS_MPI_INC_COUNTER(*mpi_info, size);     ESYS_MPI_INC_COUNTER(*mpi_info, size);
1778
# Line 1801  Paso_SystemMatrix* Paso_Preconditioner_A Line 1780  Paso_SystemMatrix* Paso_Preconditioner_A
1780     offset = dist[rank];     offset = dist[rank];
1781     #pragma omp parallel for schedule(static) private(i)     #pragma omp parallel for schedule(static) private(i)
1782     for (i=0; i<j; i++) shared[i] = shared[i] - offset;     for (i=0; i<j; i++) shared[i] = shared[i] - offset;
send = Paso_SharedComponents_alloc(num_Pmain_cols, num_neighbors,
neighbor, shared, offsetInShared, 1, 0, mpi_info);
1783
1784     col_connector = paso::Connector_alloc(send, recv);     paso::SharedComponents_ptr send(new paso::SharedComponents(
1785                   num_Pmain_cols, num_neighbors, neighbor, shared,
1786                   offsetInShared, 1, 0, mpi_info));
1787
1788       col_connector.reset(new paso::Connector(send, recv));
1789     delete[] shared;     delete[] shared;
Paso_SharedComponents_free(recv);
Paso_SharedComponents_free(send);
1790
1791     /* now, create row distribution (output_distri) and col     /* now, create row distribution (output_distri) and col
1792        distribution (input_distribution) */        distribution (input_distribution) */
# Line 1833  Paso_SystemMatrix* Paso_Preconditioner_A Line 1812  Paso_SystemMatrix* Paso_Preconditioner_A
1812       i1 = RAP_couple_ptr[i_r];       i1 = RAP_couple_ptr[i_r];
1813       i2 = RAP_couple_ptr[i_r+1];       i2 = RAP_couple_ptr[i_r+1];
1814       if (i2 > i1) {       if (i2 > i1) {
1815      /* then row i_r will be in the sender of row_connector, now check          /* then row i_r will be in the sender of row_connector, now check
1816         how many neighbours i_r needs to be send to */             how many neighbours i_r needs to be send to */
1817      for (j=i1; j<i2; j++) {          for (j=i1; j<i2; j++) {
1818        i_c = RAP_couple_idx[j];            i_c = RAP_couple_idx[j];
1819        /* find out the corresponding neighbor "p" of column i_c */            /* find out the corresponding neighbor "p" of column i_c */
1820        for (p=0; p<recv->numNeighbors; p++) {            for (p=0; p<recv->numNeighbors; p++) {
1821          if (i_c < recv->offsetInShared[p+1]) {              if (i_c < recv->offsetInShared[p+1]) {
1822            k = recv->neighbor[p];                k = recv->neighbor[p];
1823            if (send_ptr[k][i_r] == 0) sum++;                if (send_ptr[k][i_r] == 0) sum++;
1824            send_ptr[k][i_r] ++;                send_ptr[k][i_r] ++;
1825            send_idx[k][len[k]] = global_id_RAP[i_c];                send_idx[k][len[k]] = global_id_RAP[i_c];
1826            len[k] ++;                len[k] ++;
1827            break;                break;
1828          }              }
1829        }            }
1830      }          }
1831       }       }
1832     }     }
1833     if (global_id_RAP) {     if (global_id_RAP) {
# Line 1863  Paso_SystemMatrix* Paso_Preconditioner_A Line 1842  Paso_SystemMatrix* Paso_Preconditioner_A
1842     offsetInShared[0] = 0;     offsetInShared[0] = 0;
1843     for (p=0, k=0; p<size; p++) {     for (p=0, k=0; p<size; p++) {
1844       for (i=0; i<num_Pmain_cols; i++) {       for (i=0; i<num_Pmain_cols; i++) {
1845      if (send_ptr[p][i] > 0) {          if (send_ptr[p][i] > 0) {
1846        shared[k] = i;            shared[k] = i;
1847        k++;            k++;
1848        send_ptr[p][send_len[p]] = send_ptr[p][i];            send_ptr[p][send_len[p]] = send_ptr[p][i];
1849        send_len[p]++;            send_len[p]++;
1850      }          }
1851       }       }
1852       if (k > offsetInShared[num_neighbors]) {       if (k > offsetInShared[num_neighbors]) {
1853      neighbor[num_neighbors] = p;          neighbor[num_neighbors] = p;
1854      num_neighbors ++;          num_neighbors ++;
1855      offsetInShared[num_neighbors] = k;          offsetInShared[num_neighbors] = k;
1856       }       }
1857     }     }
1858     send = Paso_SharedComponents_alloc(num_Pmain_cols, num_neighbors,     send.reset(new paso::SharedComponents(num_Pmain_cols, num_neighbors,
1859                          neighbor, shared, offsetInShared, 1, 0, mpi_info);                 neighbor, shared, offsetInShared, 1, 0, mpi_info));
1860
1861     /* send/recv number of rows will be sent from current proc     /* send/recv number of rows will be sent from current proc
1862        recover info for the receiver of row_connector from the sender */        recover info for the receiver of row_connector from the sender */
# Line 1889  Paso_SystemMatrix* Paso_Preconditioner_A Line 1868  Paso_SystemMatrix* Paso_Preconditioner_A
1868     j = 0;     j = 0;
1869     for (i=0; i<size; i++) {     for (i=0; i<size; i++) {
1870       if (i != rank && recv_len[i] > 0) {       if (i != rank && recv_len[i] > 0) {
1871      neighbor[num_neighbors] = i;          neighbor[num_neighbors] = i;
1872      num_neighbors ++;          num_neighbors ++;
1873      j += recv_len[i];          j += recv_len[i];
1874      offsetInShared[num_neighbors] = j;          offsetInShared[num_neighbors] = j;
1875       }       }
1876     }     }
1877     delete[] shared;     delete[] shared;
# Line 1903  Paso_SystemMatrix* Paso_Preconditioner_A Line 1882  Paso_SystemMatrix* Paso_Preconditioner_A
1882     for (i=0; i<k; i++) {     for (i=0; i<k; i++) {
1883       shared[i] = i + num_Pmain_cols;       shared[i] = i + num_Pmain_cols;
1884     }     }
1885     recv = Paso_SharedComponents_alloc(num_Pmain_cols, num_neighbors,     recv.reset(new paso::SharedComponents(num_Pmain_cols, num_neighbors,
1886                          neighbor, shared, offsetInShared, 1, 0, mpi_info);                 neighbor, shared, offsetInShared, 1, 0, mpi_info));
1887     row_connector = paso::Connector_alloc(send, recv);     row_connector.reset(new paso::Connector(send, recv));
1888     delete[] shared;     delete[] shared;
Paso_SharedComponents_free(recv);
Paso_SharedComponents_free(send);
1889
1890     /* send/recv pattern->ptr for rowCoupleBlock */     /* send/recv pattern->ptr for rowCoupleBlock */
1891     num_RAPext_rows = offsetInShared[num_neighbors];     num_RAPext_rows = offsetInShared[num_neighbors];
# Line 1918  Paso_SystemMatrix* Paso_Preconditioner_A Line 1895  Paso_SystemMatrix* Paso_Preconditioner_A
1895       i = offsetInShared[p+1];       i = offsetInShared[p+1];
1896       #ifdef ESYS_MPI       #ifdef ESYS_MPI
1897       MPI_Irecv(&(row_couple_ptr[j]), i-j, MPI_INT, neighbor[p],       MPI_Irecv(&(row_couple_ptr[j]), i-j, MPI_INT, neighbor[p],
1898          mpi_info->msg_tag_counter+neighbor[p],                  mpi_info->msg_tag_counter+neighbor[p],
1899          mpi_info->comm, &mpi_requests[p]);                  mpi_info->comm, &mpi_requests[p]);
1900       #endif       #endif
1901     }     }
1902     send = row_connector->send;     send = row_connector->send;
1903     for (p=0; p<send->numNeighbors; p++) {     for (p=0; p<send->numNeighbors; p++) {
1904       #ifdef ESYS_MPI       #ifdef ESYS_MPI
1905       MPI_Issend(send_ptr[send->neighbor[p]], send_len[send->neighbor[p]],       MPI_Issend(send_ptr[send->neighbor[p]], send_len[send->neighbor[p]],
1906          MPI_INT, send->neighbor[p],                  MPI_INT, send->neighbor[p],
1907          mpi_info->msg_tag_counter+rank,                  mpi_info->msg_tag_counter+rank,
1908          mpi_info->comm, &mpi_requests[p+num_neighbors]);                  mpi_info->comm, &mpi_requests[p+num_neighbors]);
1909       #endif       #endif
1910     }     }
1911     #ifdef ESYS_MPI     #ifdef ESYS_MPI
1912     MPI_Waitall(num_neighbors + send->numNeighbors,     MPI_Waitall(num_neighbors + send->numNeighbors,
1913      mpi_requests, mpi_stati);          mpi_requests, mpi_stati);
1914     #endif     #endif
1915     ESYS_MPI_INC_COUNTER(*mpi_info, size);     ESYS_MPI_INC_COUNTER(*mpi_info, size);
1916     delete[] send_len;     delete[] send_len;
# Line 1954  Paso_SystemMatrix* Paso_Preconditioner_A Line 1931  Paso_SystemMatrix* Paso_Preconditioner_A
1931       j2 = row_couple_ptr[offsetInShared[p+1]];       j2 = row_couple_ptr[offsetInShared[p+1]];
1932       #ifdef ESYS_MPI       #ifdef ESYS_MPI
1933       MPI_Irecv(&(row_couple_idx[j1]), j2-j1, MPI_INT, neighbor[p],       MPI_Irecv(&(row_couple_idx[j1]), j2-j1, MPI_INT, neighbor[p],
1934          mpi_info->msg_tag_counter+neighbor[p],                  mpi_info->msg_tag_counter+neighbor[p],
1935          mpi_info->comm, &mpi_requests[p]);                  mpi_info->comm, &mpi_requests[p]);
1936       #endif       #endif
1937     }     }
1938     for (p=0; p<send->numNeighbors; p++) {     for (p=0; p<send->numNeighbors; p++) {
1939       #ifdef ESYS_MPI       #ifdef ESYS_MPI
1940       MPI_Issend(send_idx[send->neighbor[p]], len[send->neighbor[p]],       MPI_Issend(send_idx[send->neighbor[p]], len[send->neighbor[p]],
1941          MPI_INT, send->neighbor[p],                  MPI_INT, send->neighbor[p],
1942          mpi_info->msg_tag_counter+rank,                  mpi_info->msg_tag_counter+rank,
1943          mpi_info->comm, &mpi_requests[p+num_neighbors]);                  mpi_info->comm, &mpi_requests[p+num_neighbors]);
1944       #endif       #endif
1945     }     }
1946     #ifdef ESYS_MPI     #ifdef ESYS_MPI
1947     MPI_Waitall(num_neighbors + send->numNeighbors,     MPI_Waitall(num_neighbors + send->numNeighbors,
1948          mpi_requests, mpi_stati);                  mpi_requests, mpi_stati);
1949     #endif     #endif
1950     ESYS_MPI_INC_COUNTER(*mpi_info, size);     ESYS_MPI_INC_COUNTER(*mpi_info, size);
1951
1952     offset = input_dist->first_component[rank];      offset = input_dist->first_component[rank];
1953     k = row_couple_ptr[num_RAPext_rows];      k = row_couple_ptr[num_RAPext_rows];
1954     #pragma omp parallel for schedule(static) private(i)      #pragma omp parallel for schedule(static) private(i)
1955     for (i=0; i<k; i++) {      for (i=0; i<k; i++) {
1956       row_couple_idx[i] -= offset;          row_couple_idx[i] -= offset;
1957     }      }
1958     #pragma omp parallel for schedule(static) private(i)      #pragma omp parallel for schedule(static) private(i)
1959     for (i=0; i<size; i++) {      for (i=0; i<size; i++) {
1960       delete[] send_ptr[i];          delete[] send_ptr[i];
1961       delete[] send_idx[i];          delete[] send_idx[i];
1962     }      }
1963     delete[] send_ptr;      delete[] send_ptr;
1964     delete[] send_idx;      delete[] send_idx;
1965     delete[] len;      delete[] len;
1966     delete[] offsetInShared;      delete[] offsetInShared;
1967     delete[] neighbor;      delete[] neighbor;
1968     delete[] mpi_requests;      delete[] mpi_requests;
1969     delete[] mpi_stati;      delete[] mpi_stati;
1970     Esys_MPIInfo_free(mpi_info);      Esys_MPIInfo_free(mpi_info);
1971
1972     /* Now, we can create pattern for mainBlock and coupleBlock */      /* Now, we can create pattern for mainBlock and coupleBlock */
1973     main_pattern = paso::Pattern_alloc(MATRIX_FORMAT_DEFAULT, num_Pmain_cols,      paso::Pattern_ptr main_pattern(new paso::Pattern(MATRIX_FORMAT_DEFAULT,
1974              num_Pmain_cols, RAP_main_ptr, RAP_main_idx);                 num_Pmain_cols, num_Pmain_cols, RAP_main_ptr, RAP_main_idx));
1975     col_couple_pattern = paso::Pattern_alloc(MATRIX_FORMAT_DEFAULT,      paso::Pattern_ptr col_couple_pattern(new paso::Pattern(
1976              num_Pmain_cols, num_RAPext_cols,                 MATRIX_FORMAT_DEFAULT, num_Pmain_cols, num_RAPext_cols,
1977              RAP_couple_ptr, RAP_couple_idx);                 RAP_couple_ptr, RAP_couple_idx));
1978     row_couple_pattern = paso::Pattern_alloc(MATRIX_FORMAT_DEFAULT,      paso::Pattern_ptr row_couple_pattern(new paso::Pattern(
1979              num_RAPext_rows, num_Pmain_cols,                 MATRIX_FORMAT_DEFAULT, num_RAPext_rows, num_Pmain_cols,
1980              row_couple_ptr, row_couple_idx);                 row_couple_ptr, row_couple_idx));
1981
1982     /* next, create the system matrix */      /* next, create the system matrix */
1983     pattern = new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,      pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
1984                  output_dist, input_dist, main_pattern, col_couple_pattern,                  output_dist, input_dist, main_pattern, col_couple_pattern,
1985                  row_couple_pattern, col_connector, row_connector);                  row_couple_pattern, col_connector, row_connector));
1986     out = Paso_SystemMatrix_alloc(A->type, pattern,      out.reset(new paso::SystemMatrix(A->type, pattern, row_block_size,
1987                  row_block_size, col_block_size, FALSE);                                      col_block_size, false));
1988
1989     /* finally, fill in the data*/      /* finally, fill in the data*/
1990     memcpy(out->mainBlock->val, RAP_main_val,      memcpy(out->mainBlock->val, RAP_main_val,
1991          out->mainBlock->len* sizeof(double));                  out->mainBlock->len* sizeof(double));
1992     memcpy(out->col_coupleBlock->val, RAP_couple_val,      memcpy(out->col_coupleBlock->val, RAP_couple_val,
1993          out->col_coupleBlock->len * sizeof(double));                  out->col_coupleBlock->len * sizeof(double));
1994
1995     /* Clean up */      delete[] RAP_main_val;
1996     paso::SystemMatrixPattern_free(pattern);      delete[] RAP_couple_val;
1997     paso::Pattern_free(main_pattern);      if (!Esys_noError()) {
1998     paso::Pattern_free(col_couple_pattern);          out.reset();
1999     paso::Pattern_free(row_couple_pattern);      }
2000     paso::Connector_free(col_connector);      return out;
paso::Connector_free(row_connector);
delete[] RAP_main_val;
delete[] RAP_couple_val;
if (Esys_noError()) {
return out;
} else {
Paso_SystemMatrix_free(out);
return NULL;
}
2001  }  }
2002
2003
2004  Paso_SystemMatrix* Paso_Preconditioner_AMG_buildInterpolationOperatorBlock(  paso::SystemMatrix_ptr Paso_Preconditioner_AMG_buildInterpolationOperatorBlock(
2005      Paso_SystemMatrix* A, Paso_SystemMatrix* P, Paso_SystemMatrix* R)          paso::SystemMatrix_ptr A, paso::SystemMatrix_ptr P,
2006            paso::SystemMatrix_ptr R)
2007  {  {
2008     Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(A->mpi_info);     Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(A->mpi_info);
2009     paso::SparseMatrix *R_main=NULL, *R_couple=NULL;     paso::SystemMatrix_ptr out;
2010     Paso_SystemMatrix *out=NULL;     paso::SystemMatrixPattern_ptr pattern;
paso::SystemMatrixPattern *pattern=NULL;
2011     paso::Distribution_ptr input_dist, output_dist;     paso::Distribution_ptr input_dist, output_dist;
2012     Paso_SharedComponents *send =NULL, *recv=NULL;     paso::SharedComponents_ptr send, recv;
2013     paso::Connector *col_connector=NULL, *row_connector=NULL;     paso::Connector_ptr col_connector, row_connector;
paso::Pattern *main_pattern=NULL;
paso::Pattern *col_couple_pattern=NULL, *row_couple_pattern =NULL;
2014     const dim_t row_block_size=A->row_block_size;     const dim_t row_block_size=A->row_block_size;
2015     const dim_t col_block_size=A->col_block_size;     const dim_t col_block_size=A->col_block_size;
2016     const dim_t block_size = A->block_size;     const dim_t block_size = A->block_size;
# Line 2077  Paso_SystemMatrix* Paso_Preconditioner_A Line 2043  Paso_SystemMatrix* Paso_Preconditioner_A
2043     /* two sparse matrices R_main and R_couple will be generate, as the     /* two sparse matrices R_main and R_couple will be generate, as the
2044        transpose of P_main and P_col_couple, respectively. Note that,        transpose of P_main and P_col_couple, respectively. Note that,
2045        R_couple is actually the row_coupleBlock of R (R=P^T) */        R_couple is actually the row_coupleBlock of R (R=P^T) */
2046     R_main = paso::SparseMatrix_getTranspose(P->mainBlock);     paso::SparseMatrix_ptr R_main(P->mainBlock->getTranspose());
2047     R_couple = paso::SparseMatrix_getTranspose(P->col_coupleBlock);     paso::SparseMatrix_ptr R_couple(P->col_coupleBlock->getTranspose());
2048
2049     /* generate P_ext, i.e. portion of P that is stored on neighbor procs     /* generate P_ext, i.e. portion of P that is stored on neighbor procs
2050        and needed locally for triple matrix product RAP        and needed locally for triple matrix product RAP
2051        to be specific, P_ext (on processor k) are group of rows in P, where        to be specific, P_ext (on processor k) are group of rows in P, where
2052        the list of rows from processor q is given by        the list of rows from processor q is given by
2053      A->col_coupler->connector->send->shared[rPtr...]          A->col_coupler->connector->send->shared[rPtr...]
2054      rPtr=A->col_coupler->connector->send->OffsetInShared[k]          rPtr=A->col_coupler->connector->send->OffsetInShared[k]
2055        on q.        on q.
2056        P_ext is represented by two sparse matrices P_ext_main and        P_ext is represented by two sparse matrices P_ext_main and
2057        P_ext_couple */        P_ext_couple */
# Line 2104  Paso_SystemMatrix* Paso_Preconditioner_A Line 2070  Paso_SystemMatrix* Paso_Preconditioner_A
2070     num_Pext_cols = 0;     num_Pext_cols = 0;
2071     if (P->global_id) {     if (P->global_id) {
2072       /* first count the number of cols "num_Pext_cols" in both P_ext_couple       /* first count the number of cols "num_Pext_cols" in both P_ext_couple
2073      and P->col_coupleBlock */          and P->col_coupleBlock */
2074       iptr = 0;       iptr = 0;
2075       if (num_Pcouple_cols || sum > 0) {       if (num_Pcouple_cols || sum > 0) {
2076      temp = new index_t[num_Pcouple_cols+sum];          temp = new index_t[num_Pcouple_cols+sum];
2077      for (; iptr<sum; iptr++){          for (; iptr<sum; iptr++){
2078        temp[iptr] = P->remote_coupleBlock->pattern->index[iptr];            temp[iptr] = P->remote_coupleBlock->pattern->index[iptr];
2079      }          }
2080      for (j=0; j<num_Pcouple_cols; j++, iptr++){          for (j=0; j<num_Pcouple_cols; j++, iptr++){
2081        temp[iptr] = P->global_id[j];            temp[iptr] = P->global_id[j];
2082      }          }
2083       }       }
2084       if (iptr) {       if (iptr) {
2085      #ifdef USE_QSORTG            qsort(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);
2086        qsortG(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);          num_Pext_cols = 1;
2087      #else          i = temp[0];
2088        qsort(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);          for (j=1; j<iptr; j++) {
2089      #endif            if (temp[j] > i) {
2090      num_Pext_cols = 1;              i = temp[j];
2091      i = temp[0];              temp[num_Pext_cols++] = i;
2092      for (j=1; j<iptr; j++) {            }
2093        if (temp[j] > i) {          }
i = temp[j];
temp[num_Pext_cols++] = i;
}
}
2094       }       }
2095
2096       /* resize the pattern of P_ext_couple */       /* resize the pattern of P_ext_couple */
2097       if(num_Pext_cols){       if(num_Pext_cols){
2098      global_id_P = new index_t[num_Pext_cols];          global_id_P = new index_t[num_Pext_cols];
2099      for (i=0; i<num_Pext_cols; i++)          for (i=0; i<num_Pext_cols; i++)
2100        global_id_P[i] = temp[i];            global_id_P[i] = temp[i];
2101       }       }
2102       if (num_Pcouple_cols || sum > 0)       if (num_Pcouple_cols || sum > 0)
2103      delete[] temp;          delete[] temp;
2104       for (i=0; i<sum; i++) {       for (i=0; i<sum; i++) {
2105      where_p = (index_t *)bsearch(          where_p = (index_t *)bsearch(
2106              &(P->remote_coupleBlock->pattern->index[i]),                          &(P->remote_coupleBlock->pattern->index[i]),
2107              global_id_P, num_Pext_cols,                          global_id_P, num_Pext_cols,
2108                          sizeof(index_t), paso::comparIndex);                          sizeof(index_t), paso::comparIndex);
2109      P->remote_coupleBlock->pattern->index[i] =          P->remote_coupleBlock->pattern->index[i] =
2110              (index_t)(where_p -global_id_P);                          (index_t)(where_p -global_id_P);
2111       }         }
2112
2113       /* build the mapping */       /* build the mapping */
2114       if (num_Pcouple_cols) {       if (num_Pcouple_cols) {
2115      Pcouple_to_Pext = new index_t[num_Pcouple_cols];          Pcouple_to_Pext = new index_t[num_Pcouple_cols];
2116      iptr = 0;          iptr = 0;
2117      for (i=0; i<num_Pext_cols; i++)          for (i=0; i<num_Pext_cols; i++)
2118        if (global_id_P[i] == P->global_id[iptr]) {            if (global_id_P[i] == P->global_id[iptr]) {
2119          Pcouple_to_Pext[iptr++] = i;              Pcouple_to_Pext[iptr++] = i;
2120          if (iptr == num_Pcouple_cols) break;              if (iptr == num_Pcouple_cols) break;
2121        }            }
2122       }       }
2123     }     }
2124
# Line 2176  Paso_SystemMatrix* Paso_Preconditioner_A Line 2138  Paso_SystemMatrix* Paso_Preconditioner_A
2138       /* then, loop over elements in row i_r of R_couple */       /* then, loop over elements in row i_r of R_couple */
2139       j1_ub = R_couple->pattern->ptr[i_r+1];       j1_ub = R_couple->pattern->ptr[i_r+1];
2140       for (j1=R_couple->pattern->ptr[i_r]; j1<j1_ub; j1++){       for (j1=R_couple->pattern->ptr[i_r]; j1<j1_ub; j1++){
2141      i1 = R_couple->pattern->index[j1];          i1 = R_couple->pattern->index[j1];
2142      /* then, loop over elements in row i1 of A->col_coupleBlock */          /* then, loop over elements in row i1 of A->col_coupleBlock */
2143      j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];          j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];
2144      for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
2145        i2 = A->col_coupleBlock->pattern->index[j2];            i2 = A->col_coupleBlock->pattern->index[j2];
2146
2147        /* check whether entry RA[i_r, i2] has been previously visited.            /* check whether entry RA[i_r, i2] has been previously visited.
2148           RAP new entry is possible only if entry RA[i_r, i2] has not               RAP new entry is possible only if entry RA[i_r, i2] has not
2149           been visited yet */               been visited yet */
2150        if (A_marker[i2] != i_r) {            if (A_marker[i2] != i_r) {
2151          /* first, mark entry RA[i_r,i2] as visited */              /* first, mark entry RA[i_r,i2] as visited */
2152          A_marker[i2] = i_r;              A_marker[i2] = i_r;
2153
2154          /* then loop over elements in row i2 of P_ext_main */              /* then loop over elements in row i2 of P_ext_main */
2155          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
2156          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2157          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
2158
2159          /* check whether entry RAP[i_r,i_c] has been created.                  /* check whether entry RAP[i_r,i_c] has been created.
2160             If not yet created, create the entry and increase                     If not yet created, create the entry and increase
2161             the total number of elements in RAP_ext */                     the total number of elements in RAP_ext */
2162          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2163            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
2164            sum++;                    sum++;
2165          }                  }
2166          }              }
2167
2168          /* loop over elements in row i2 of P_ext_couple, do the same */              /* loop over elements in row i2 of P_ext_couple, do the same */
2169          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
2170          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2171          i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;                  i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;
2172          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2173            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
2174            sum++;                    sum++;
2175          }                  }
2176          }              }
2177        }            }
2178      }          }
2179
2180      /* now loop over elements in row i1 of A->mainBlock, repeat          /* now loop over elements in row i1 of A->mainBlock, repeat
2181         the process we have done to block A->col_coupleBlock */             the process we have done to block A->col_coupleBlock */
2182      j2_ub = A->mainBlock->pattern->ptr[i1+1];          j2_ub = A->mainBlock->pattern->ptr[i1+1];
2183      for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
2184        i2 = A->mainBlock->pattern->index[j2];            i2 = A->mainBlock->pattern->index[j2];
2185        if (A_marker[i2 + num_Acouple_cols] != i_r) {            if (A_marker[i2 + num_Acouple_cols] != i_r) {
2186          A_marker[i2 + num_Acouple_cols] = i_r;              A_marker[i2 + num_Acouple_cols] = i_r;
2187          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
2188          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2189          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
2190          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2191            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
2192            sum++;                    sum++;
2193          }                  }
2194          }              }
2195          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
2196          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2197          /* note that we need to map the column index in                  /* note that we need to map the column index in
2198             P->col_coupleBlock back into the column index in                     P->col_coupleBlock back into the column index in
2199             P_ext_couple */                     P_ext_couple */
2200          i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2201          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2202            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
2203            sum++;                    sum++;
2204          }                  }
2205          }              }
2206        }            }
2207      }          }
2208       }       }
2209     }     }
2210
# Line 2267  Paso_SystemMatrix* Paso_Preconditioner_A Line 2229  Paso_SystemMatrix* Paso_Preconditioner_A
2229       /* then, loop over elements in row i_r of R_couple */       /* then, loop over elements in row i_r of R_couple */
2230       j1_ub = R_couple->pattern->ptr[i_r+1];       j1_ub = R_couple->pattern->ptr[i_r+1];
2231       for (j1=R_couple->pattern->ptr[i_r]; j1<j1_ub; j1++){       for (j1=R_couple->pattern->ptr[i_r]; j1<j1_ub; j1++){
2232      i1 = R_couple->pattern->index[j1];          i1 = R_couple->pattern->index[j1];
2233      R_val = &(R_couple->val[j1*block_size]);          R_val = &(R_couple->val[j1*block_size]);
2234
2235            /* then, loop over elements in row i1 of A->col_coupleBlock */
2236            j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];
2237            for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
2238              i2 = A->col_coupleBlock->pattern->index[j2];
2239              temp_val = &(A->col_coupleBlock->val[j2*block_size]);
2240              for (irb=0; irb<row_block_size; irb++) {
2241                for (icb=0; icb<col_block_size; icb++) {
2242                    rtmp = ZERO;
2243                    for (ib=0; ib<col_block_size; ib++) {
2244                      rtmp+= R_val[irb+row_block_size*ib]*temp_val[ib+col_block_size*icb];
2245                    }
2246                    RA_val[irb+row_block_size*icb]=rtmp;
2247                }
2248              }
2249
2250      /* then, loop over elements in row i1 of A->col_coupleBlock */            /* check whether entry RA[i_r, i2] has been previously visited.
2251      j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];               RAP new entry is possible only if entry RA[i_r, i2] has not
2252      for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {               been visited yet */
2253        i2 = A->col_coupleBlock->pattern->index[j2];            if (A_marker[i2] != i_r) {
2254        temp_val = &(A->col_coupleBlock->val[j2*block_size]);              /* first, mark entry RA[i_r,i2] as visited */
2255        for (irb=0; irb<row_block_size; irb++) {              A_marker[i2] = i_r;
2256          for (icb=0; icb<col_block_size; icb++) {
2257          rtmp = ZERO;              /* then loop over elements in row i2 of P_ext_main */
2258          for (ib=0; ib<col_block_size; ib++) {              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
2259            rtmp+= R_val[irb+row_block_size*ib]*temp_val[ib+col_block_size*icb];              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2260          }                  i_c = P->row_coupleBlock->pattern->index[j3];
2261          RA_val[irb+row_block_size*icb]=rtmp;                  temp_val = &(P->row_coupleBlock->val[j3*block_size]);
2262          }                  for (irb=0; irb<row_block_size; irb++) {
2263        }                    for (icb=0; icb<col_block_size; icb++) {
2264                        rtmp = ZERO;
2265        /* check whether entry RA[i_r, i2] has been previously visited.                      for (ib=0; ib<col_block_size; ib++) {
2266           RAP new entry is possible only if entry RA[i_r, i2] has not                          rtmp+= RA_val[irb+row_block_size*ib]*temp_val[ib+col_block_size*icb];
2267           been visited yet */                      }
2268        if (A_marker[i2] != i_r) {                      RAP_val[irb+row_block_size*icb]=rtmp;
2269          /* first, mark entry RA[i_r,i2] as visited */                    }
2270          A_marker[i2] = i_r;                  }
2271
2272          /* then loop over elements in row i2 of P_ext_main */
2273          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];                  /* check whether entry RAP[i_r,i_c] has been created.
2274          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {                     If not yet created, create the entry and increase
2275          i_c = P->row_coupleBlock->pattern->index[j3];                     the total number of elements in RAP_ext */
2276          temp_val = &(P->row_coupleBlock->val[j3*block_size]);                  if (P_marker[i_c] < row_marker) {
2277          for (irb=0; irb<row_block_size; irb++) {                    P_marker[i_c] = sum;
2278            for (icb=0; icb<col_block_size; icb++) {                    memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
2279              rtmp = ZERO;                    RAP_ext_idx[sum] = i_c + offset;
2280              for (ib=0; ib<col_block_size; ib++) {                    sum++;
2281              rtmp+= RA_val[irb+row_block_size*ib]*temp_val[ib+col_block_size*icb];                  } else {
2282              }                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
2283              RAP_val[irb+row_block_size*icb]=rtmp;                    for (ib=0; ib<block_size; ib++) {
2284            }                      temp_val[ib] += RAP_val[ib];
2285          }                    }
2286                    }
2287                }
2288          /* check whether entry RAP[i_r,i_c] has been created.
2289             If not yet created, create the entry and increase              /* loop over elements in row i2 of P_ext_couple, do the same */
2290             the total number of elements in RAP_ext */              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
2291          if (P_marker[i_c] < row_marker) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2292            P_marker[i_c] = sum;                  i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;
memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
RAP_ext_idx[sum] = i_c + offset;
sum++;
} else {
temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
for (ib=0; ib<block_size; ib++) {
temp_val[ib] += RAP_val[ib];
}
}
}

/* loop over elements in row i2 of P_ext_couple, do the same */
j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;
2293                  temp_val = &(P->remote_coupleBlock->val[j3*block_size]);                  temp_val = &(P->remote_coupleBlock->val[j3*block_size]);
2294                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2295                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2339  Paso_SystemMatrix* Paso_Preconditioner_A Line 2301  Paso_SystemMatrix* Paso_Preconditioner_A
2301                    }                    }
2302                  }                  }
2303
2304          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2305            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
2306            memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
2307            RAP_ext_idx[sum] = global_id_P[i_c - num_Pmain_cols];                    RAP_ext_idx[sum] = global_id_P[i_c - num_Pmain_cols];
2308            sum++;                    sum++;
2309          } else {                  } else {
2310                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
2311                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
2312                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
2313                    }                    }
2314          }                  }
2315          }              }
2316
2317        /* If entry RA[i_r, i2] is visited, no new RAP entry is created.            /* If entry RA[i_r, i2] is visited, no new RAP entry is created.
2318           Only the contributions are added. */               Only the contributions are added. */
2319        } else {            } else {
2320          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
2321          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2322          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
2323                  temp_val = &(P->row_coupleBlock->val[j3*block_size]);                  temp_val = &(P->row_coupleBlock->val[j3*block_size]);
2324                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2325                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2373  Paso_SystemMatrix* Paso_Preconditioner_A Line 2335  Paso_SystemMatrix* Paso_Preconditioner_A
2335                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
2336                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
2337                  }                  }
2338          }              }
2339          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
2340          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2341          i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;                  i_c = P->remote_coupleBlock->pattern->index[j3] + num_Pmain_cols;
2342                  temp_val = &(P->remote_coupleBlock->val[j3*block_size]);                  temp_val = &(P->remote_coupleBlock->val[j3*block_size]);
2343                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2344                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2392  Paso_SystemMatrix* Paso_Preconditioner_A Line 2354  Paso_SystemMatrix* Paso_Preconditioner_A
2354                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
2355                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
2356                  }                  }
2357          }              }
2358        }            }
2359      }          }
2360
2361      /* now loop over elements in row i1 of A->mainBlock, repeat          /* now loop over elements in row i1 of A->mainBlock, repeat
2362         the process we have done to block A->col_coupleBlock */             the process we have done to block A->col_coupleBlock */
2363      j2_ub = A->mainBlock->pattern->ptr[i1+1];          j2_ub = A->mainBlock->pattern->ptr[i1+1];
2364      for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
2365        i2 = A->mainBlock->pattern->index[j2];            i2 = A->mainBlock->pattern->index[j2];
2366            temp_val = &(A->mainBlock->val[j2*block_size]);            temp_val = &(A->mainBlock->val[j2*block_size]);
2367            for (irb=0; irb<row_block_size; irb++) {            for (irb=0; irb<row_block_size; irb++) {
2368              for (icb=0; icb<col_block_size; icb++) {              for (icb=0; icb<col_block_size; icb++) {
# Line 2412  Paso_SystemMatrix* Paso_Preconditioner_A Line 2374  Paso_SystemMatrix* Paso_Preconditioner_A
2374              }              }
2375            }            }
2376
2377        if (A_marker[i2 + num_Acouple_cols] != i_r) {            if (A_marker[i2 + num_Acouple_cols] != i_r) {
2378          A_marker[i2 + num_Acouple_cols] = i_r;              A_marker[i2 + num_Acouple_cols] = i_r;
2379          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
2380          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2381          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
2382                  temp_val = &(P->mainBlock->val[j3*block_size]);                  temp_val = &(P->mainBlock->val[j3*block_size]);
2383                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2384                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2428  Paso_SystemMatrix* Paso_Preconditioner_A Line 2390  Paso_SystemMatrix* Paso_Preconditioner_A
2390                    }                    }
2391                  }                  }
2392
2393          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2394            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
2395            memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
2396            RAP_ext_idx[sum] = i_c + offset;                    RAP_ext_idx[sum] = i_c + offset;
2397            sum++;                    sum++;
2398          } else {                  } else {
2399                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
2400                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
2401                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
2402                    }                    }
2403          }                  }
2404          }              }
2405          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
2406          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2407          i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2408                  temp_val = &(P->col_coupleBlock->val[j3*block_size]);                  temp_val = &(P->col_coupleBlock->val[j3*block_size]);
2409                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2410                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2454  Paso_SystemMatrix* Paso_Preconditioner_A Line 2416  Paso_SystemMatrix* Paso_Preconditioner_A
2416                    }                    }
2417                  }                  }
2418
2419          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2420            P_marker[i_c] = sum;                    P_marker[i_c] = sum;
2421            memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_ext_val[sum*block_size]), RAP_val, block_size*sizeof(double));
2422                    RAP_ext_idx[sum] = global_id_P[i_c - num_Pmain_cols];                    RAP_ext_idx[sum] = global_id_P[i_c - num_Pmain_cols];
2423            sum++;                    sum++;
2424          } else {                  } else {
2425                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_ext_val[P_marker[i_c] * block_size]);
2426                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
2427                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
2428                    }                    }
2429          }                  }
2430          }              }
2431        } else {            } else {
2432          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
2433          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2434          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
2435                  temp_val = &(P->mainBlock->val[j3*block_size]);                  temp_val = &(P->mainBlock->val[j3*block_size]);
2436                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2437                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2485  Paso_SystemMatrix* Paso_Preconditioner_A Line 2447  Paso_SystemMatrix* Paso_Preconditioner_A
2447                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
2448                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
2449                  }                  }
2450          }              }
2451          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
2452          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2453          i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_Pext[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2454                  temp_val = &(P->col_coupleBlock->val[j3*block_size]);                  temp_val = &(P->col_coupleBlock->val[j3*block_size]);
2455                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2456                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2504  Paso_SystemMatrix* Paso_Preconditioner_A Line 2466  Paso_SystemMatrix* Paso_Preconditioner_A
2466                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
2467                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
2468                  }                  }
2469          }              }
2470        }            }
2471      }          }
2472       }       }
2473       RAP_ext_ptr[i_r+1] = sum;       RAP_ext_ptr[i_r+1] = sum;
2474     }     }
# Line 2520  Paso_SystemMatrix* Paso_Preconditioner_A Line 2482  Paso_SystemMatrix* Paso_Preconditioner_A
2482        be passed to neighbouring processors, and added to corresponding        be passed to neighbouring processors, and added to corresponding
2483        RAP[r,c] entries in the neighbouring processors */        RAP[r,c] entries in the neighbouring processors */
2484     Paso_Preconditioner_AMG_CopyRemoteData(P, &RAP_ext_ptr, &RAP_ext_idx,     Paso_Preconditioner_AMG_CopyRemoteData(P, &RAP_ext_ptr, &RAP_ext_idx,
2485          &RAP_ext_val, global_id_P, block_size);                  &RAP_ext_val, global_id_P, block_size);
2486
2487     num_RAPext_rows = P->col_coupler->connector->send->offsetInShared[P->col_coupler->connector->send->numNeighbors];     num_RAPext_rows = P->col_coupler->connector->send->offsetInShared[P->col_coupler->connector->send->numNeighbors];
2488     sum = RAP_ext_ptr[num_RAPext_rows];     sum = RAP_ext_ptr[num_RAPext_rows];
# Line 2529  Paso_SystemMatrix* Paso_Preconditioner_A Line 2491  Paso_SystemMatrix* Paso_Preconditioner_A
2491       temp = new index_t[num_Pext_cols+sum];       temp = new index_t[num_Pext_cols+sum];
2492       j1_ub = offset + num_Pmain_cols;       j1_ub = offset + num_Pmain_cols;
2493       for (i=0, iptr=0; i<sum; i++) {       for (i=0, iptr=0; i<sum; i++) {
2494      if (RAP_ext_idx[i] < offset || RAP_ext_idx[i] >= j1_ub)  /* XXX */          if (RAP_ext_idx[i] < offset || RAP_ext_idx[i] >= j1_ub)  /* XXX */
2495        temp[iptr++] = RAP_ext_idx[i];          /* XXX */            temp[iptr++] = RAP_ext_idx[i];                  /* XXX */
2496       }       }
2497       for (j=0; j<num_Pext_cols; j++, iptr++){       for (j=0; j<num_Pext_cols; j++, iptr++){
2498      temp[iptr] = global_id_P[j];          temp[iptr] = global_id_P[j];
2499       }       }
2500
2501       if (iptr) {       if (iptr) {
2502      #ifdef USE_QSORTG            qsort(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);
2503        qsortG(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);          num_RAPext_cols = 1;
2504      #else          i = temp[0];
2505        qsort(temp, (size_t)iptr, sizeof(index_t), paso::comparIndex);          for (j=1; j<iptr; j++) {
2506      #endif            if (temp[j] > i) {
2507      num_RAPext_cols = 1;              i = temp[j];
2508      i = temp[0];              temp[num_RAPext_cols++] = i;
2509      for (j=1; j<iptr; j++) {            }
2510        if (temp[j] > i) {          }
i = temp[j];
temp[num_RAPext_cols++] = i;
}
}
2511       }       }
2512     }     }
2513
# Line 2557  Paso_SystemMatrix* Paso_Preconditioner_A Line 2515  Paso_SystemMatrix* Paso_Preconditioner_A
2515     if(num_RAPext_cols){     if(num_RAPext_cols){
2516       global_id_RAP = new index_t[num_RAPext_cols];       global_id_RAP = new index_t[num_RAPext_cols];
2517       for (i=0; i<num_RAPext_cols; i++)       for (i=0; i<num_RAPext_cols; i++)
2518      global_id_RAP[i] = temp[i];          global_id_RAP[i] = temp[i];
2519     }     }
2520     if (num_Pext_cols || sum > 0)     if (num_Pext_cols || sum > 0)
2521       delete[] temp;       delete[] temp;
2522     j1_ub = offset + num_Pmain_cols;     j1_ub = offset + num_Pmain_cols;
2523     for (i=0; i<sum; i++) {     for (i=0; i<sum; i++) {
2524       if (RAP_ext_idx[i] < offset || RAP_ext_idx[i] >= j1_ub){       if (RAP_ext_idx[i] < offset || RAP_ext_idx[i] >= j1_ub){
2525      where_p = (index_t *) bsearch(&(RAP_ext_idx[i]), global_id_RAP,          where_p = (index_t *) bsearch(&(RAP_ext_idx[i]), global_id_RAP,
2526  /*XXX*/         num_RAPext_cols, sizeof(index_t), paso::comparIndex);  /*XXX*/                 num_RAPext_cols, sizeof(index_t), paso::comparIndex);
2527      RAP_ext_idx[i] = num_Pmain_cols + (index_t)(where_p - global_id_RAP);          RAP_ext_idx[i] = num_Pmain_cols + (index_t)(where_p - global_id_RAP);
2528       } else       } else
2529      RAP_ext_idx[i] = RAP_ext_idx[i] - offset;          RAP_ext_idx[i] = RAP_ext_idx[i] - offset;
2530     }     }
2531
2532     /* build the mapping */     /* build the mapping */
# Line 2576  Paso_SystemMatrix* Paso_Preconditioner_A Line 2534  Paso_SystemMatrix* Paso_Preconditioner_A
2534       Pcouple_to_RAP = new index_t[num_Pcouple_cols];       Pcouple_to_RAP = new index_t[num_Pcouple_cols];
2535       iptr = 0;       iptr = 0;
2536       for (i=0; i<num_RAPext_cols; i++)       for (i=0; i<num_RAPext_cols; i++)
2537      if (global_id_RAP[i] == P->global_id[iptr]) {          if (global_id_RAP[i] == P->global_id[iptr]) {
2538        Pcouple_to_RAP[iptr++] = i;            Pcouple_to_RAP[iptr++] = i;
2539        if (iptr == num_Pcouple_cols) break;            if (iptr == num_Pcouple_cols) break;
2540      }          }
2541     }     }
2542
2543     if (num_Pext_cols) {     if (num_Pext_cols) {
2544       Pext_to_RAP = new index_t[num_Pext_cols];       Pext_to_RAP = new index_t[num_Pext_cols];
2545       iptr = 0;       iptr = 0;
2546       for (i=0; i<num_RAPext_cols; i++)       for (i=0; i<num_RAPext_cols; i++)
2547      if (global_id_RAP[i] == global_id_P[iptr]) {          if (global_id_RAP[i] == global_id_P[iptr]) {
2548        Pext_to_RAP[iptr++] = i;            Pext_to_RAP[iptr++] = i;
2549        if (iptr == num_Pext_cols) break;            if (iptr == num_Pext_cols) break;
2550      }          }
2551     }     }
2552
2553     if (global_id_P){     if (global_id_P){
# Line 2617  Paso_SystemMatrix* Paso_Preconditioner_A Line 2575  Paso_SystemMatrix* Paso_Preconditioner_A
2575       row_marker_ext = j;       row_marker_ext = j;
2576
2577       /* Mark the diagonal element RAP[i_r, i_r], and other elements       /* Mark the diagonal element RAP[i_r, i_r], and other elements
2578      in RAP_ext */          in RAP_ext */
2579       P_marker[i_r] = i;       P_marker[i_r] = i;
2580       i++;       i++;
2581       for (j1=0; j1<num_neighbors; j1++) {       for (j1=0; j1<num_neighbors; j1++) {
2582      for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {          for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {
2583        if (shared[j2] == i_r) {            if (shared[j2] == i_r) {
2584          for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {              for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {
2585            i_c = RAP_ext_idx[k];                i_c = RAP_ext_idx[k];
2586            if (i_c < num_Pmain_cols) {                if (i_c < num_Pmain_cols) {
2587          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2588            P_marker[i_c] = i;                    P_marker[i_c] = i;
2589            i++;                    i++;
2590          }                  }
2591            } else {                } else {
2592          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
2593            P_marker[i_c] = j;                    P_marker[i_c] = j;
2594            j++;                    j++;
2595          }                  }
2596            }                }
2597          }              }
2598          break;              break;
2599        }            }
2600      }          }
2601       }       }
2602
2603       /* then, loop over elements in row i_r of R_main */       /* then, loop over elements in row i_r of R_main */
2604       j1_ub = R_main->pattern->ptr[i_r+1];       j1_ub = R_main->pattern->ptr[i_r+1];
2605       for (j1=R_main->pattern->ptr[i_r]; j1<j1_ub; j1++){       for (j1=R_main->pattern->ptr[i_r]; j1<j1_ub; j1++){
2606      i1 = R_main->pattern->index[j1];          i1 = R_main->pattern->index[j1];
2607
2608      /* then, loop over elements in row i1 of A->col_coupleBlock */          /* then, loop over elements in row i1 of A->col_coupleBlock */
2609      j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];          j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];
2610      for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
2611        i2 = A->col_coupleBlock->pattern->index[j2];            i2 = A->col_coupleBlock->pattern->index[j2];
2612
2613        /* check whether entry RA[i_r, i2] has been previously visited.            /* check whether entry RA[i_r, i2] has been previously visited.
2614           RAP new entry is possible only if entry RA[i_r, i2] has not               RAP new entry is possible only if entry RA[i_r, i2] has not
2615           been visited yet */               been visited yet */
2616        if (A_marker[i2] != i_r) {            if (A_marker[i2] != i_r) {
2617          /* first, mark entry RA[i_r,i2] as visited */              /* first, mark entry RA[i_r,i2] as visited */
2618          A_marker[i2] = i_r;              A_marker[i2] = i_r;
2619
2620          /* then loop over elements in row i2 of P_ext_main */              /* then loop over elements in row i2 of P_ext_main */
2621          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
2622          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2623          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
2624
2625          /* check whether entry RAP[i_r,i_c] has been created.                  /* check whether entry RAP[i_r,i_c] has been created.
2626             If not yet created, create the entry and increase                     If not yet created, create the entry and increase
2627             the total number of elements in RAP_ext */                     the total number of elements in RAP_ext */
2628          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2629            P_marker[i_c] = i;                    P_marker[i_c] = i;
2630            i++;                    i++;
2631          }                  }
2632          }              }
2633
2634          /* loop over elements in row i2 of P_ext_couple, do the same */              /* loop over elements in row i2 of P_ext_couple, do the same */
2635          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
2636          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2637          i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2638          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
2639            P_marker[i_c] = j;                    P_marker[i_c] = j;
2640            j++;                    j++;
2641          }                  }
2642          }              }
2643        }            }
2644      }          }
2645
2646      /* now loop over elements in row i1 of A->mainBlock, repeat          /* now loop over elements in row i1 of A->mainBlock, repeat
2647         the process we have done to block A->col_coupleBlock */             the process we have done to block A->col_coupleBlock */
2648      j2_ub = A->mainBlock->pattern->ptr[i1+1];          j2_ub = A->mainBlock->pattern->ptr[i1+1];
2649      for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
2650        i2 = A->mainBlock->pattern->index[j2];            i2 = A->mainBlock->pattern->index[j2];
2651        if (A_marker[i2 + num_Acouple_cols] != i_r) {            if (A_marker[i2 + num_Acouple_cols] != i_r) {
2652          A_marker[i2 + num_Acouple_cols] = i_r;              A_marker[i2 + num_Acouple_cols] = i_r;
2653          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
2654          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2655          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
2656          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2657            P_marker[i_c] = i;                    P_marker[i_c] = i;
2658            i++;                    i++;
2659          }                  }
2660          }              }
2661          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
2662          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2663          /* note that we need to map the column index in                  /* note that we need to map the column index in
2664             P->col_coupleBlock back into the column index in                     P->col_coupleBlock back into the column index in
2665             P_ext_couple */                     P_ext_couple */
2666          i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2667          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
2668            P_marker[i_c] = j;                    P_marker[i_c] = j;
2669            j++;                    j++;
2670          }                  }
2671          }              }
2672        }            }
2673      }          }
2674       }       }
2675     }     }
2676
# Line 2747  Paso_SystemMatrix* Paso_Preconditioner_A Line 2705  Paso_SystemMatrix* Paso_Preconditioner_A
2705       RAP_couple_ptr[i_r] = row_marker_ext;       RAP_couple_ptr[i_r] = row_marker_ext;
2706
2707       /* Mark and setup the diagonal element RAP[i_r, i_r], and elements       /* Mark and setup the diagonal element RAP[i_r, i_r], and elements
2708      in row i_r of RAP_ext */          in row i_r of RAP_ext */
2709       P_marker[i_r] = i;       P_marker[i_r] = i;
2710       RAP_main_val[i] = ZERO;       RAP_main_val[i] = ZERO;
2711       RAP_main_idx[i] = i_r;       RAP_main_idx[i] = i_r;
2712       i++;       i++;
2713
2714       for (j1=0; j1<num_neighbors; j1++) {       for (j1=0; j1<num_neighbors; j1++) {
2715      for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {          for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {
2716        if (shared[j2] == i_r) {            if (shared[j2] == i_r) {
2717          for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {              for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {
2718            i_c = RAP_ext_idx[k];                i_c = RAP_ext_idx[k];
2719            if (i_c < num_Pmain_cols) {                if (i_c < num_Pmain_cols) {
2720          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2721            P_marker[i_c] = i;                    P_marker[i_c] = i;
2722            memcpy(&(RAP_main_val[i*block_size]), &(RAP_ext_val[k*block_size]), block_size*sizeof(double));                    memcpy(&(RAP_main_val[i*block_size]), &(RAP_ext_val[k*block_size]), block_size*sizeof(double));
2723            RAP_main_idx[i] = i_c;                    RAP_main_idx[i] = i_c;
2724            i++;                    i++;
2725          } else {                  } else {
2726            temp_val = &(RAP_ext_val[k*block_size]);                    temp_val = &(RAP_ext_val[k*block_size]);
2727            RAP_val = &(RAP_main_val[P_marker[i_c]*block_size]);                    RAP_val = &(RAP_main_val[P_marker[i_c]*block_size]);
2728            for (ib=0; ib<block_size; ib++)                    for (ib=0; ib<block_size; ib++)
2729              RAP_val[ib] += temp_val[ib];                      RAP_val[ib] += temp_val[ib];
2730          }                  }
2731            } else {                } else {
2732          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
2733            P_marker[i_c] = j;                    P_marker[i_c] = j;
2734            memcpy(&(RAP_couple_val[j*block_size]), &(RAP_ext_val[k*block_size]), block_size*sizeof(double));                    memcpy(&(RAP_couple_val[j*block_size]), &(RAP_ext_val[k*block_size]), block_size*sizeof(double));
2735            RAP_couple_idx[j] = i_c - num_Pmain_cols;                    RAP_couple_idx[j] = i_c - num_Pmain_cols;
2736            j++;                    j++;
2737          } else {                  } else {
2738            temp_val = &(RAP_ext_val[k*block_size]);                    temp_val = &(RAP_ext_val[k*block_size]);
2739            RAP_val = &(RAP_couple_val[P_marker[i_c]*block_size]);                    RAP_val = &(RAP_couple_val[P_marker[i_c]*block_size]);
2740            for (ib=0; ib<block_size; ib++)                    for (ib=0; ib<block_size; ib++)
2741              RAP_val[ib] += temp_val[ib];                      RAP_val[ib] += temp_val[ib];
2742          }                  }
2743            }                }
2744          }              }
2745          break;              break;
2746        }            }
2747      }          }
2748       }       }
2749
2750       /* then, loop over elements in row i_r of R_main */       /* then, loop over elements in row i_r of R_main */
2751       j1_ub = R_main->pattern->ptr[i_r+1];       j1_ub = R_main->pattern->ptr[i_r+1];
2752       for (j1=R_main->pattern->ptr[i_r]; j1<j1_ub; j1++){       for (j1=R_main->pattern->ptr[i_r]; j1<j1_ub; j1++){
2753      i1 = R_main->pattern->index[j1];          i1 = R_main->pattern->index[j1];
2754      R_val = &(R_main->val[j1*block_size]);          R_val = &(R_main->val[j1*block_size]);
2755
2756      /* then, loop over elements in row i1 of A->col_coupleBlock */          /* then, loop over elements in row i1 of A->col_coupleBlock */
2757      j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];          j2_ub = A->col_coupleBlock->pattern->ptr[i1+1];
2758      for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->col_coupleBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
2759        i2 = A->col_coupleBlock->pattern->index[j2];            i2 = A->col_coupleBlock->pattern->index[j2];
2760            temp_val = &(A->col_coupleBlock->val[j2*block_size]);            temp_val = &(A->col_coupleBlock->val[j2*block_size]);
2761            for (irb=0; irb<row_block_size; irb++) {            for (irb=0; irb<row_block_size; irb++) {
2762              for (icb=0; icb<col_block_size; icb++) {              for (icb=0; icb<col_block_size; icb++) {
# Line 2811  Paso_SystemMatrix* Paso_Preconditioner_A Line 2769  Paso_SystemMatrix* Paso_Preconditioner_A
2769            }            }
2770
2771
2772        /* check whether entry RA[i_r, i2] has been previously visited.            /* check whether entry RA[i_r, i2] has been previously visited.
2773           RAP new entry is possible only if entry RA[i_r, i2] has not               RAP new entry is possible only if entry RA[i_r, i2] has not
2774           been visited yet */               been visited yet */
2775        if (A_marker[i2] != i_r) {            if (A_marker[i2] != i_r) {
2776          /* first, mark entry RA[i_r,i2] as visited */              /* first, mark entry RA[i_r,i2] as visited */
2777          A_marker[i2] = i_r;              A_marker[i2] = i_r;
2778
2779          /* then loop over elements in row i2 of P_ext_main */              /* then loop over elements in row i2 of P_ext_main */
2780          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
2781          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2782          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
2783                  temp_val = &(P->row_coupleBlock->val[j3*block_size]);                  temp_val = &(P->row_coupleBlock->val[j3*block_size]);
2784                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2785                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2834  Paso_SystemMatrix* Paso_Preconditioner_A Line 2792  Paso_SystemMatrix* Paso_Preconditioner_A
2792                  }                  }
2793
2794
2795          /* check whether entry RAP[i_r,i_c] has been created.                  /* check whether entry RAP[i_r,i_c] has been created.
2796             If not yet created, create the entry and increase                     If not yet created, create the entry and increase
2797             the total number of elements in RAP_ext */                     the total number of elements in RAP_ext */
2798          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2799            P_marker[i_c] = i;                    P_marker[i_c] = i;
2800            memcpy(&(RAP_main_val[i*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_main_val[i*block_size]), RAP_val, block_size*sizeof(double));
2801            RAP_main_idx[i] = i_c;                    RAP_main_idx[i] = i_c;
2802            i++;                    i++;
2803          } else {                  } else {
2804                    temp_val = &(RAP_main_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_main_val[P_marker[i_c] * block_size]);
2805                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
2806                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
2807                    }                    }
2808          }                  }
2809          }              }
2810
2811          /* loop over elements in row i2 of P_ext_couple, do the same */              /* loop over elements in row i2 of P_ext_couple, do the same */
2812          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
2813          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2814          i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2815                  temp_val = &(P->remote_coupleBlock->val[j3*block_size]);                  temp_val = &(P->remote_coupleBlock->val[j3*block_size]);
2816                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2817                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2865  Paso_SystemMatrix* Paso_Preconditioner_A Line 2823  Paso_SystemMatrix* Paso_Preconditioner_A
2823                    }                    }
2824                  }                  }
2825
2826          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
2827            P_marker[i_c] = j;                    P_marker[i_c] = j;
2828            memcpy(&(RAP_couple_val[j*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_couple_val[j*block_size]), RAP_val, block_size*sizeof(double));
2829            RAP_couple_idx[j] = i_c - num_Pmain_cols;                    RAP_couple_idx[j] = i_c - num_Pmain_cols;
2830            j++;                    j++;
2831          } else {                  } else {
2832                    temp_val = &(RAP_couple_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_couple_val[P_marker[i_c] * block_size]);
2833                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
2834                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
2835                    }                    }
2836          }                  }
2837          }              }
2838
2839        /* If entry RA[i_r, i2] is visited, no new RAP entry is created.            /* If entry RA[i_r, i2] is visited, no new RAP entry is created.
2840           Only the contributions are added. */               Only the contributions are added. */
2841        } else {            } else {
2842          j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->row_coupleBlock->pattern->ptr[i2+1];
2843          for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->row_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2844          i_c = P->row_coupleBlock->pattern->index[j3];                  i_c = P->row_coupleBlock->pattern->index[j3];
2845                  temp_val = &(P->row_coupleBlock->val[j3*block_size]);                  temp_val = &(P->row_coupleBlock->val[j3*block_size]);
2846                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2847                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2899  Paso_SystemMatrix* Paso_Preconditioner_A Line 2857  Paso_SystemMatrix* Paso_Preconditioner_A
2857                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
2858                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
2859                  }                  }
2860          }              }
2861          j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->remote_coupleBlock->pattern->ptr[i2+1];
2862          for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->remote_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2863          i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pext_to_RAP[P->remote_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2864                  temp_val = &(P->remote_coupleBlock->val[j3*block_size]);                  temp_val = &(P->remote_coupleBlock->val[j3*block_size]);
2865                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2866                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2918  Paso_SystemMatrix* Paso_Preconditioner_A Line 2876  Paso_SystemMatrix* Paso_Preconditioner_A
2876                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
2877                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
2878                  }                  }
2879          }              }
2880        }            }
2881      }          }
2882
2883      /* now loop over elements in row i1 of A->mainBlock, repeat          /* now loop over elements in row i1 of A->mainBlock, repeat
2884         the process we have done to block A->col_coupleBlock */             the process we have done to block A->col_coupleBlock */
2885      j2_ub = A->mainBlock->pattern->ptr[i1+1];          j2_ub = A->mainBlock->pattern->ptr[i1+1];
2886      for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {          for (j2=A->mainBlock->pattern->ptr[i1]; j2<j2_ub; j2++) {
2887        i2 = A->mainBlock->pattern->index[j2];            i2 = A->mainBlock->pattern->index[j2];
2888            temp_val = &(A->mainBlock->val[j2*block_size]);            temp_val = &(A->mainBlock->val[j2*block_size]);
2889            for (irb=0; irb<row_block_size; irb++) {            for (irb=0; irb<row_block_size; irb++) {
2890              for (icb=0; icb<col_block_size; icb++) {              for (icb=0; icb<col_block_size; icb++) {
# Line 2938  Paso_SystemMatrix* Paso_Preconditioner_A Line 2896  Paso_SystemMatrix* Paso_Preconditioner_A
2896              }              }
2897            }            }
2898
2899        if (A_marker[i2 + num_Acouple_cols] != i_r) {            if (A_marker[i2 + num_Acouple_cols] != i_r) {
2900          A_marker[i2 + num_Acouple_cols] = i_r;              A_marker[i2 + num_Acouple_cols] = i_r;
2901          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
2902          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2903          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
2904                  temp_val = &(P->mainBlock->val[j3*block_size]);                  temp_val = &(P->mainBlock->val[j3*block_size]);
2905                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2906                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2954  Paso_SystemMatrix* Paso_Preconditioner_A Line 2912  Paso_SystemMatrix* Paso_Preconditioner_A
2912                    }                    }
2913                  }                  }
2914
2915          if (P_marker[i_c] < row_marker) {                  if (P_marker[i_c] < row_marker) {
2916            P_marker[i_c] = i;                    P_marker[i_c] = i;
2917            memcpy(&(RAP_main_val[i*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_main_val[i*block_size]), RAP_val, block_size*sizeof(double));
2918            RAP_main_idx[i] = i_c;                    RAP_main_idx[i] = i_c;
2919            i++;                    i++;
2920          } else {                  } else {
2921                    temp_val = &(RAP_main_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_main_val[P_marker[i_c] * block_size]);
2922                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
2923                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
2924                    }                    }
2925          }                  }
2926          }              }
2927          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
2928          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2929          /* note that we need to map the column index in                  /* note that we need to map the column index in
2930             P->col_coupleBlock back into the column index in                     P->col_coupleBlock back into the column index in
2931             P_ext_couple */                     P_ext_couple */
2932          i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2933                  temp_val = &(P->col_coupleBlock->val[j3*block_size]);                  temp_val = &(P->col_coupleBlock->val[j3*block_size]);
2934                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2935                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 2983  Paso_SystemMatrix* Paso_Preconditioner_A Line 2941  Paso_SystemMatrix* Paso_Preconditioner_A
2941                    }                    }
2942                  }                  }
2943
2944          if (P_marker[i_c] < row_marker_ext) {                  if (P_marker[i_c] < row_marker_ext) {
2945            P_marker[i_c] = j;                    P_marker[i_c] = j;
2946            memcpy(&(RAP_couple_val[j*block_size]), RAP_val, block_size*sizeof(double));                    memcpy(&(RAP_couple_val[j*block_size]), RAP_val, block_size*sizeof(double));
2947            RAP_couple_idx[j] = i_c - num_Pmain_cols;                    RAP_couple_idx[j] = i_c - num_Pmain_cols;
2948            j++;                    j++;
2949          } else {                  } else {
2950                    temp_val = &(RAP_couple_val[P_marker[i_c] * block_size]);                    temp_val = &(RAP_couple_val[P_marker[i_c] * block_size]);
2951                    for (ib=0; ib<block_size; ib++) {                    for (ib=0; ib<block_size; ib++) {
2952                      temp_val[ib] += RAP_val[ib];                      temp_val[ib] += RAP_val[ib];
2953                    }                    }
2954          }                  }
2955          }              }
2956
2957        } else {            } else {
2958          j3_ub = P->mainBlock->pattern->ptr[i2+1];              j3_ub = P->mainBlock->pattern->ptr[i2+1];
2959          for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->mainBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2960          i_c = P->mainBlock->pattern->index[j3];                  i_c = P->mainBlock->pattern->index[j3];
2961                  temp_val = &(P->mainBlock->val[j3*block_size]);                  temp_val = &(P->mainBlock->val[j3*block_size]);
2962                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2963                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 3015  Paso_SystemMatrix* Paso_Preconditioner_A Line 2973  Paso_SystemMatrix* Paso_Preconditioner_A
2973                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
2974                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
2975                  }                  }
2976          }              }
2977          j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];              j3_ub = P->col_coupleBlock->pattern->ptr[i2+1];
2978          for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {              for (j3=P->col_coupleBlock->pattern->ptr[i2]; j3<j3_ub; j3++) {
2979          i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;                  i_c = Pcouple_to_RAP[P->col_coupleBlock->pattern->index[j3]] + num_Pmain_cols;
2980                  temp_val = &(P->col_coupleBlock->val[j3*block_size]);                  temp_val = &(P->col_coupleBlock->val[j3*block_size]);
2981                  for (irb=0; irb<row_block_size; irb++) {                  for (irb=0; irb<row_block_size; irb++) {
2982                    for (icb=0; icb<col_block_size; icb++) {                    for (icb=0; icb<col_block_size; icb++) {
# Line 3034  Paso_SystemMatrix* Paso_Preconditioner_A Line 2992  Paso_SystemMatrix* Paso_Preconditioner_A
2992                  for (ib=0; ib<block_size; ib++) {                  for (ib=0; ib<block_size; ib++) {
2993                    temp_val[ib] += RAP_val[ib];                    temp_val[ib] += RAP_val[ib];
2994                  }                  }
2995          }              }
2996        }            }
2997      }          }
2998       }       }
2999
3000       /* sort RAP_XXXX_idx and reorder RAP_XXXX_val accordingly */       /* sort RAP_XXXX_idx and reorder RAP_XXXX_val accordingly */
3001       if (i > row_marker) {       if (i > row_marker) {
3002      offset = i - row_marker;          offset = i - row_marker;
3003      temp = new index_t[offset];          temp = new index_t[offset];
3004      for (iptr=0; iptr<offset; iptr++)          for (iptr=0; iptr<offset; iptr++)
3005        temp[iptr] = RAP_main_idx[row_marker+iptr];            temp[iptr] = RAP_main_idx[row_marker+iptr];
3006      if (offset > 0) {          if (offset > 0) {
3007        #ifdef USE_QSORTG              qsort(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);
3008          qsortG(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);          }
3009        #else          temp_val = new double[offset * block_size];
3010          qsort(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);          for (iptr=0; iptr<offset; iptr++){
3011        #endif            k = P_marker[temp[iptr]];
3012      }            memcpy(&(temp_val[iptr*block_size]), &(RAP_main_val[k*block_size]), block_size*sizeof(double));
3013      temp_val = new double[offset * block_size];            P_marker[temp[iptr]] = iptr + row_marker;
3014      for (iptr=0; iptr<offset; iptr++){          }
3015        k = P_marker[temp[iptr]];          for (iptr=0; iptr<offset; iptr++){
3016        memcpy(&(temp_val[iptr*block_size]), &(RAP_main_val[k*block_size]), block_size*sizeof(double));            RAP_main_idx[row_marker+iptr] = temp[iptr];
3017        P_marker[temp[iptr]] = iptr + row_marker;            memcpy(&(RAP_main_val[(row_marker+iptr)*block_size]), &(temp_val[iptr*block_size]), block_size*sizeof(double));
3018      }          }
3019      for (iptr=0; iptr<offset; iptr++){          delete[] temp;
3020        RAP_main_idx[row_marker+iptr] = temp[iptr];          delete[] temp_val;
memcpy(&(RAP_main_val[(row_marker+iptr)*block_size]), &(temp_val[iptr*block_size]), block_size*sizeof(double));
}
delete[] temp;
delete[] temp_val;
3021       }       }
3022       if (j > row_marker_ext) {       if (j > row_marker_ext) {
3023          offset = j - row_marker_ext;          offset = j - row_marker_ext;
# Line 3071  Paso_SystemMatrix* Paso_Preconditioner_A Line 3025  Paso_SystemMatrix* Paso_Preconditioner_A
3025          for (iptr=0; iptr<offset; iptr++)          for (iptr=0; iptr<offset; iptr++)
3026            temp[iptr] = RAP_couple_idx[row_marker_ext+iptr];            temp[iptr] = RAP_couple_idx[row_marker_ext+iptr];
3027          if (offset > 0) {          if (offset > 0) {
#ifdef USE_QSORTG
qsortG(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);
#else
3028              qsort(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);              qsort(temp, (size_t)offset, sizeof(index_t), paso::comparIndex);
#endif
3029          }          }
3030          temp_val = new double[offset * block_size];          temp_val = new double[offset * block_size];
3031          for (iptr=0; iptr<offset; iptr++){          for (iptr=0; iptr<offset; iptr++){
3032            k = P_marker[temp[iptr] + num_Pmain_cols];            k = P_marker[temp[iptr] + num_Pmain_cols];
3033        memcpy(&(temp_val[iptr*block_size]), &(RAP_couple_val[k*block_size]), block_size*sizeof(double));            memcpy(&(temp_val[iptr*block_size]), &(RAP_couple_val[k*block_size]), block_size*sizeof(double));
3034            P_marker[temp[iptr] + num_Pmain_cols] = iptr + row_marker_ext;            P_marker[temp[iptr] + num_Pmain_cols] = iptr + row_marker_ext;
3035          }          }
3036          for (iptr=0; iptr<offset; iptr++){          for (iptr=0; iptr<offset; iptr++){
3037            RAP_couple_idx[row_marker_ext+iptr] = temp[iptr];            RAP_couple_idx[row_marker_ext+iptr] = temp[iptr];
3038        memcpy(&(RAP_couple_val[(row_marker_ext+iptr)*block_size]), &(temp_val[iptr*block_size]), block_size*sizeof(double));            memcpy(&(RAP_couple_val[(row_marker_ext+iptr)*block_size]), &(temp_val[iptr*block_size]), block_size*sizeof(double));
3039          }          }
3040          delete[] temp;          delete[] temp;
3041          delete[] temp_val;          delete[] temp_val;
# Line 3100  Paso_SystemMatrix* Paso_Preconditioner_A Line 3050  Paso_SystemMatrix* Paso_Preconditioner_A
3050     delete[] RAP_ext_ptr;     delete[] RAP_ext_ptr;
3051     delete[] RAP_ext_idx;     delete[] RAP_ext_idx;
3052     delete[] RAP_ext_val;     delete[] RAP_ext_val;
3053     paso::SparseMatrix_free(R_main);     R_main.reset();
3054     paso::SparseMatrix_free(R_couple);     R_couple.reset();
3055
3056     /* Check whether there are empty columns in RAP_couple */     /* Check whether there are empty columns in RAP_couple */
3057     #pragma omp parallel for schedule(static) private(i)     #pragma omp parallel for schedule(static) private(i)
# Line 3112  Paso_SystemMatrix* Paso_Preconditioner_A Line 3062  Paso_SystemMatrix* Paso_Preconditioner_A
3062     for (i=0; i<j; i++) {     for (i=0; i<j; i++) {
3063       i1 = RAP_couple_idx[i];       i1 = RAP_couple_idx[i];
3064       if (P_marker[i1]) {       if (P_marker[i1]) {
3065      P_marker[i1] = 0;          P_marker[i1] = 0;
3066      k++;          k++;
3067       }       }
3068     }     }
3069
# Line 3122  Paso_SystemMatrix* Paso_Preconditioner_A Line 3072  Paso_SystemMatrix* Paso_Preconditioner_A
3072       temp = new index_t[k];       temp = new index_t[k];
3073       k = 0;       k = 0;
3074       for (i=0; i<num_RAPext_cols; i++)       for (i=0; i<num_RAPext_cols; i++)
3075      if (!P_marker[i]) {          if (!P_marker[i]) {
3076        P_marker[i] = k;            P_marker[i] = k;
3077        temp[k] = global_id_RAP[i];            temp[k] = global_id_RAP[i];
3078        k++;            k++;
3079      }          }
3080       for (i=0; i<j; i++) {       for (i=0; i<j; i++) {
3081      i1 = RAP_couple_idx[i];          i1 = RAP_couple_idx[i];
3082      RAP_couple_idx[i] = P_marker[i1];          RAP_couple_idx[i] = P_marker[i1];
3083       }       }
3084       num_RAPext_cols = k;       num_RAPext_cols = k;
3085       delete[] global_id_RAP;       delete[] global_id_RAP;
# Line 3154  Paso_SystemMatrix* Paso_Preconditioner_A Line 3104  Paso_SystemMatrix* Paso_Preconditioner_A
3104     for (i=0, j=0, k=dist[j+1]; i<num_RAPext_cols; i++) {     for (i=0, j=0, k=dist[j+1]; i<num_RAPext_cols; i++) {
3105       shared[i] = i + num_Pmain_cols;       shared[i] = i + num_Pmain_cols;
3106       if (k <= global_id_RAP[i]) {       if (k <= global_id_RAP[i]) {
3107      if (recv_len[j] > 0) {          if (recv_len[j] > 0) {
3108        neighbor[num_neighbors] = j;            neighbor[num_neighbors] = j;
3109        num_neighbors ++;            num_neighbors ++;
3110        offsetInShared[num_neighbors] = i;            offsetInShared[num_neighbors] = i;
3111      }          }
3112      while (k <= global_id_RAP[i]) {          while (k <= global_id_RAP[i]) {
3113        j++;            j++;
3114        k = dist[j+1];            k = dist[j+1];
3115      }          }
3116       }       }
3117       recv_len[j] ++;       recv_len[j] ++;
3118     }     }
# Line 3171  Paso_SystemMatrix* Paso_Preconditioner_A Line 3121  Paso_SystemMatrix* Paso_Preconditioner_A
3121       num_neighbors ++;       num_neighbors ++;
3122       offsetInShared[num_neighbors] = i;       offsetInShared[num_neighbors] = i;
3123     }     }
3124     recv = Paso_SharedComponents_alloc(num_Pmain_cols, num_neighbors,     recv.reset(new paso::SharedComponents(num_Pmain_cols, num_neighbors,
3125              neighbor, shared, offsetInShared, 1, 0, mpi_info);                 neighbor, shared, offsetInShared, 1, 0, mpi_info));
3126
3127     #ifdef ESYS_MPI     #ifdef ESYS_MPI
3128     MPI_Alltoall(recv_len, 1, MPI_INT, send_len, 1, MPI_INT, mpi_info->comm);     MPI_Alltoall(recv_len, 1, MPI_INT, send_len, 1, MPI_INT, mpi_info->comm);
# Line 3190  Paso_SystemMatrix* Paso_Preconditioner_A Line 3140  Paso_SystemMatrix* Paso_Preconditioner_A
3140     offsetInShared[0] = 0;     offsetInShared[0] = 0;
3141     for (i=0; i<size; i++) {     for (i=0; i<size; i++) {
3142       if (send_len[i] > 0) {       if (send_len[i] > 0) {
3143      neighbor[num_neighbors] = i;          neighbor[num_neighbors] = i;
3144      num_neighbors ++;          num_neighbors ++;
3145      j += send_len[i];          j += send_len[i];
3146      offsetInShared[num_neighbors] = j;          offsetInShared[num_neighbors] = j;
3147       }       }
3148     }     }
3149     delete[] shared;     delete[] shared;
# Line 3202  Paso_SystemMatrix* Paso_Preconditioner_A Line 3152  Paso_SystemMatrix* Paso_Preconditioner_A
3152       k = neighbor[i];       k = neighbor[i];
3153       #ifdef ESYS_MPI       #ifdef ESYS_MPI
3154       MPI_Irecv(&shared[j], send_len[k] , MPI_INT, k,       MPI_Irecv(&shared[j], send_len[k] , MPI_INT, k,
3155          mpi_info->msg_tag_counter+k,                  mpi_info->msg_tag_counter+k,
3156          mpi_info->comm, &mpi_requests[i]);                  mpi_info->comm, &mpi_requests[i]);
3157       #endif       #endif
3158       j += send_len[k];       j += send_len[k];
3159     }     }
# Line 3211  Paso_SystemMatrix* Paso_Preconditioner_A Line 3161  Paso_SystemMatrix* Paso_Preconditioner_A
3161       k = recv->neighbor[i];       k = recv->neighbor[i];
3162       #ifdef ESYS_MPI       #ifdef ESYS_MPI
3163       MPI_Issend(&(global_id_RAP[j]), recv_len[k], MPI_INT, k,       MPI_Issend(&(global_id_RAP[j]), recv_len[k], MPI_INT, k,
3164          mpi_info->msg_tag_counter+rank,                  mpi_info->msg_tag_counter+rank,
3165          mpi_info->comm, &mpi_requests[i+num_neighbors]);                  mpi_info->comm, &mpi_requests[i+num_neighbors]);
3166       #endif       #endif
3167       j += recv_len[k];       j += recv_len[k];
3168     }     }
3169     #ifdef ESYS_MPI     #ifdef ESYS_MPI
3170     MPI_Waitall(num_neighbors + recv->numNeighbors,     MPI_Waitall(num_neighbors + recv->numNeighbors,
3171          mpi_requests, mpi_stati);                  mpi_requests, mpi_stati);
3172     #endif     #endif
3173     ESYS_MPI_INC_COUNTER(*mpi_info, size);     ESYS_MPI_INC_COUNTER(*mpi_info, size);
3174
3175     j = offsetInShared[num_neighbors];     j = offsetInShared[num_neighbors];
3176     offset = dist[rank];     offset = dist[rank];
3177     for (i=0; i<j; i++) shared[i] = shared[i] - offset;     for (i=0; i<j; i++) shared[i] = shared[i] - offset;
3178     send = Paso_SharedComponents_alloc(num_Pmain_cols, num_neighbors,     send.reset(new paso::SharedComponents(num_Pmain_cols, num_neighbors,
3179              neighbor, shared, offsetInShared, 1, 0, mpi_info);                 neighbor, shared, offsetInShared, 1, 0, mpi_info));
3180
3181     col_connector = paso::Connector_alloc(send, recv);     col_connector.reset(new paso::Connector(send, recv));
3182     delete[] shared;     delete[] shared;
Paso_SharedComponents_free(recv);
Paso_SharedComponents_free(send);
3183
3184     /* now, create row distribution (output_distri) and col     /* now, create row distribution (output_distri) and col
3185        distribution (input_distribution) */        distribution (input_distribution) */
# Line 3256  Paso_SystemMatrix* Paso_Preconditioner_A Line 3204  Paso_SystemMatrix* Paso_Preconditioner_A
3204       i1 = RAP_couple_ptr[i_r];       i1 = RAP_couple_ptr[i_r];
3205       i2 = RAP_couple_ptr[i_r+1];       i2 = RAP_couple_ptr[i_r+1];
3206       if (i2 > i1) {       if (i2 > i1) {
3207      /* then row i_r will be in the sender of row_connector, now check          /* then row i_r will be in the sender of row_connector, now check
3208         how many neighbours i_r needs to be send to */             how many neighbours i_r needs to be send to */
3209      for (j=i1; j<i2; j++) {          for (j=i1; j<i2; j++) {
3210        i_c = RAP_couple_idx[j];            i_c = RAP_couple_idx[j];
3211        /* find out the corresponding neighbor "p" of column i_c */            /* find out the corresponding neighbor "p" of column i_c */
3212        for (p=0; p<recv->numNeighbors; p++) {            for (p=0; p<recv->numNeighbors; p++) {
3213          if (i_c < recv->offsetInShared[p+1]) {              if (i_c < recv->offsetInShared[p+1]) {
3214            k = recv->neighbor[p];                k = recv->neighbor[p];
3215            if (send_ptr[k][i_r] == 0) sum++;                if (send_ptr[k][i_r] == 0) sum++;
3216            send_ptr[k][i_r] ++;                send_ptr[k][i_r] ++;
3217            send_idx[k][len[k]] = global_id_RAP[i_c];                send_idx[k][len[k]] = global_id_RAP[i_c];
3218            len[k] ++;                len[k] ++;
3219            break;                break;
3220          }              }
3221        }            }
3222      }          }
3223       }       }
3224     }     }
3225     if (global_id_RAP) {     if (global_id_RAP) {
# Line 3286  Paso_SystemMatrix* Paso_Preconditioner_A Line 3234  Paso_SystemMatrix* Paso_Preconditioner_A
3234     offsetInShared[0] = 0;     offsetInShared[0] = 0;
3235     for (p=0, k=0; p<size; p++) {     for (p=0, k=0; p<size; p++) {
3236       for (i=0; i<num_Pmain_cols; i++) {       for (i=0; i<num_Pmain_cols; i++) {
3237      if (send_ptr[p][i] > 0) {          if (send_ptr[p][i] > 0) {
3238        shared[k] = i;            shared[k] = i;
3239        k++;            k++;
3240        send_ptr[p][send_len[p]] = send_ptr[p][i];            send_ptr[p][send_len[p]] = send_ptr[p][i];
3241        send_len[p]++;            send_len[p]++;
3242      }          }
3243       }       }
3244       if (k > offsetInShared[num_neighbors]) {       if (k > offsetInShared[num_neighbors]) {
3245      neighbor[num_neighbors] = p;          neighbor[num_neighbors] = p;
3246      num_neighbors ++;          num_neighbors ++;
3247      offsetInShared[num_neighbors] = k;          offsetInShared[num_neighbors] = k;
3248       }       }
3249     }     }
3250     send = Paso_SharedComponents_alloc(num_Pmain_cols, num_neighbors,     send.reset(new paso::SharedComponents(num_Pmain_cols, num_neighbors,
3251                          neighbor, shared, offsetInShared, 1, 0, mpi_info);                 neighbor, shared, offsetInShared, 1, 0, mpi_info));
3252
3253     /* send/recv number of rows will be sent from current proc     /* send/recv number of rows will be sent from current proc
3254        recover info for the receiver of row_connector from the sender */        recover info for the receiver of row_connector from the sender */
# Line 3312  Paso_SystemMatrix* Paso_Preconditioner_A Line 3260  Paso_SystemMatrix* Paso_Preconditioner_A
3260     j = 0;     j = 0;
3261     for (i=0; i<size; i++) {     for (i=0; i<size; i++) {
3262       if (i != rank && recv_len[i] > 0) {       if (i != rank && recv_len[i] > 0) {
3263      neighbor[num_neighbors] = i;          neighbor[num_neighbors] = i;
3264      num_neighbors ++;          num_neighbors ++;
3265      j += recv_len[i];          j += recv_len[i];
3266      offsetInShared[num_neighbors] = j;          offsetInShared[num_neighbors] = j;
3267       }       }
3268     }     }
3269     delete[] shared;     delete[] shared;
# Line 3325  Paso_SystemMatrix* Paso_Preconditioner_A Line 3273  Paso_SystemMatrix* Paso_Preconditioner_A
3273     for (i=0; i<k; i++) {     for (i=0; i<k; i++) {
3274       shared[i] = i + num_Pmain_cols;       shared[i] = i + num_Pmain_cols;
3275     }     }
3276     recv = Paso_SharedComponents_alloc(num_Pmain_cols, num_neighbors,     recv.reset(new paso::SharedComponents(num_Pmain_cols, num_neighbors,
3277                          neighbor, shared, offsetInShared, 1, 0, mpi_info);                 neighbor, shared, offsetInShared, 1, 0, mpi_info));
3278     row_connector = paso::Connector_alloc(send, recv);     row_connector.reset(new paso::Connector(send, recv));
3279     delete[] shared;     delete[] shared;
Paso_SharedComponents_free(recv);
Paso_SharedComponents_free(send);
3280
3281     /* send/recv pattern->ptr for rowCoupleBlock */     /* send/recv pattern->ptr for rowCoupleBlock */
3282     num_RAPext_rows = offsetInShared[num_neighbors];     num_RAPext_rows = offsetInShared[num_neighbors];
# Line 3340  Paso_SystemMatrix* Paso_Preconditioner_A Line 3286  Paso_SystemMatrix* Paso_Preconditioner_A
3286       i = offsetInShared[p+1];       i = offsetInShared[p+1];
3287       #ifdef ESYS_MPI       #ifdef ESYS_MPI
3288       MPI_Irecv(&(row_couple_ptr[j]), i-j, MPI_INT, neighbor[p],       MPI_Irecv(&(row_couple_ptr[j]), i-j, MPI_INT, neighbor[p],
3289          mpi_info->msg_tag_counter+neighbor[p],                  mpi_info->msg_tag_counter+neighbor[p],
3290          mpi_info->comm, &mpi_requests[p]);                  mpi_info->comm, &mpi_requests[p]);
3291       #endif       #endif
3292     }     }
3293     send = row_connector->send;     send = row_connector->send;
3294     for (p=0; p<send->numNeighbors; p++) {     for (p=0; p<send->numNeighbors; p++) {
3295       #ifdef ESYS_MPI       #ifdef ESYS_MPI
3296       MPI_Issend(send_ptr[send->neighbor[p]], send_len[send->neighbor[p]],       MPI_Issend(send_ptr[send->neighbor[p]], send_len[send->neighbor[p]],
3297          MPI_INT, send->neighbor[p],                  MPI_INT, send->neighbor[p],
3298          mpi_info->msg_tag_counter+rank,                  mpi_info->msg_tag_counter+rank,
3299          mpi_info->comm, &mpi_requests[p+num_neighbors]);                  mpi_info->comm, &mpi_requests[p+num_neighbors]);
3300       #endif       #endif
3301     }     }
3302     #ifdef ESYS_MPI     #ifdef ESYS_MPI
3303     MPI_Waitall(num_neighbors + send->numNeighbors,     MPI_Waitall(num_neighbors + send->numNeighbors,
3304      mpi_requests, mpi_stati);          mpi_requests, mpi_stati);
3305     #endif     #endif
3306     ESYS_MPI_INC_COUNTER(*mpi_info, size);     ESYS_MPI_INC_COUNTER(*mpi_info, size);
3307     delete[] send_len;     delete[] send_len;
# Line 3376  Paso_SystemMatrix* Paso_Preconditioner_A Line 3322  Paso_SystemMatrix* Paso_Preconditioner_A
3322       j2 = row_couple_ptr[offsetInShared[p+1]];       j2 = row_couple_ptr[offsetInShared[p+1]];
3323       #ifdef ESYS_MPI       #ifdef ESYS_MPI
3324       MPI_Irecv(&(row_couple_idx[j1]), j2-j1, MPI_INT, neighbor[p],       MPI_Irecv(&(row_couple_idx[j1]), j2-j1, MPI_INT, neighbor[p],
3325          mpi_info->msg_tag_counter+neighbor[p],                  mpi_info->msg_tag_counter+neighbor[p],
3326          mpi_info->comm, &mpi_requests[p]);                  mpi_info->comm, &mpi_requests[p]);
3327       #endif       #endif
3328     }     }
3329     for (p=0; p<send->numNeighbors; p++) {     for (p=0; p<send->numNeighbors; p++) {
3330       #ifdef ESYS_MPI       #ifdef ESYS_MPI
3331       MPI_Issend(send_idx[send->neighbor[p]], len[send->neighbor[p]],       MPI_Issend(send_idx[send->neighbor[p]], len[send->neighbor[p]],
3332          MPI_INT, send->neighbor[p],                  MPI_INT, send->neighbor[p],
3333          mpi_info->msg_tag_counter+rank,                  mpi_info->msg_tag_counter+rank,
3334          mpi_info->comm, &mpi_requests[p+num_neighbors]);                  mpi_info->comm, &mpi_requests[p+num_neighbors]);
3335       #endif       #endif
3336     }     }
3337     #ifdef ESYS_MPI     #ifdef ESYS_MPI
3338     MPI_Waitall(num_neighbors + send->numNeighbors,     MPI_Waitall(num_neighbors + send->numNeighbors,
3339          mpi_requests, mpi_stati);                  mpi_requests, mpi_stati);
3340     #endif     #endif
3341     ESYS_MPI_INC_COUNTER(*mpi_info, size);     ESYS_MPI_INC_COUNTER(*mpi_info, size);
3342
# Line 3414  Paso_SystemMatrix* Paso_Preconditioner_A Line 3360  Paso_SystemMatrix* Paso_Preconditioner_A
3360     Esys_MPIInfo_free(mpi_info);     Esys_MPIInfo_free(mpi_info);
3361
3362     /* Now, we can create pattern for mainBlock and coupleBlock */     /* Now, we can create pattern for mainBlock and coupleBlock */
3363     main_pattern = paso::Pattern_alloc(MATRIX_FORMAT_DEFAULT, num_Pmain_cols,     paso::Pattern_ptr main_pattern(new paso::Pattern(MATRIX_FORMAT_DEFAULT,
3364              num_Pmain_cols, RAP_main_ptr, RAP_main_idx);                 num_Pmain_cols, num_Pmain_cols, RAP_main_ptr, RAP_main_idx));
3365     col_couple_pattern = paso::Pattern_alloc(MATRIX_FORMAT_DEFAULT,     paso::Pattern_ptr col_couple_pattern(new paso::Pattern(
3366              num_Pmain_cols, num_RAPext_cols,                 MATRIX_FORMAT_DEFAULT, num_Pmain_cols, num_RAPext_cols,
3367              RAP_couple_ptr, RAP_couple_idx);                 RAP_couple_ptr, RAP_couple_idx));
3368     row_couple_pattern = paso::Pattern_alloc(MATRIX_FORMAT_DEFAULT,     paso::Pattern_ptr row_couple_pattern(new paso::Pattern(
3369              num_RAPext_rows, num_Pmain_cols,                 MATRIX_FORMAT_DEFAULT, num_RAPext_rows, num_Pmain_cols,
3370              row_couple_ptr, row_couple_idx);                 row_couple_ptr, row_couple_idx));
3371
3372     /* next, create the system matrix */     /* next, create the system matrix */
3373     pattern = new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,     pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
3374                  output_dist, input_dist, main_pattern, col_couple_pattern,                  output_dist, input_dist, main_pattern, col_couple_pattern,
3375                  row_couple_pattern, col_connector, row_connector);                  row_couple_pattern, col_connector, row_connector));
3376     out = Paso_SystemMatrix_alloc(A->type, pattern,     out.reset(new paso::SystemMatrix(A->type, pattern, row_block_size,
3377                  row_block_size, col_block_size, FALSE);                                      col_block_size, false));
3378
3379     /* finally, fill in the data*/     /* finally, fill in the data*/
3380     memcpy(out->mainBlock->val, RAP_main_val,     memcpy(out->mainBlock->val, RAP_main_val,