228 |
case Nodes: |
case Nodes: |
229 |
case ReducedNodes: //FIXME: reduced |
case ReducedNodes: //FIXME: reduced |
230 |
return &m_nodeId[0]; |
return &m_nodeId[0]; |
231 |
|
case DegreesOfFreedom: |
232 |
|
case ReducedDegreesOfFreedom: //FIXME: reduced |
233 |
|
return &m_dofId[0]; |
234 |
case Elements: |
case Elements: |
235 |
case ReducedElements: |
case ReducedElements: |
236 |
return &m_elementId[0]; |
return &m_elementId[0]; |
744 |
const IndexVector faces=getNumFacesPerBoundary(); |
const IndexVector faces=getNumFacesPerBoundary(); |
745 |
const index_t left = (m_offset0==0 ? 0 : 1); |
const index_t left = (m_offset0==0 ? 0 : 1); |
746 |
const index_t bottom = (m_offset1==0 ? 0 : 1); |
const index_t bottom = (m_offset1==0 ? 0 : 1); |
747 |
|
const int numDOF=getNumDOF(); |
748 |
|
vector<IndexVector> colIndices(numDOF); // for the couple blocks |
749 |
|
int dofCounter=numDOF; |
750 |
|
|
751 |
// corner node from bottom-left |
// corner node from bottom-left |
752 |
if (faces[0] == 0 && faces[2] == 0) { |
if (faces[0] == 0 && faces[2] == 0) { |
753 |
neighbour.push_back(m_mpiInfo->rank-m_NX-1); |
neighbour.push_back(m_mpiInfo->rank-m_NX-1); |
754 |
offsetInShared.push_back(offsetInShared.back()+1); |
offsetInShared.push_back(offsetInShared.back()+1); |
755 |
sendShared.push_back(m_nodeId[m_N0+left]); |
sendShared.push_back(-1); |
756 |
recvShared.push_back(m_nodeId[0]); |
recvShared.push_back(dofCounter); |
757 |
|
colIndices[0].push_back(dofCounter-numDOF); |
758 |
|
++dofCounter; |
759 |
} |
} |
760 |
// bottom edge |
// bottom edge |
761 |
if (faces[2] == 0) { |
if (faces[2] == 0) { |
762 |
neighbour.push_back(m_mpiInfo->rank-m_NX); |
neighbour.push_back(m_mpiInfo->rank-m_NX); |
763 |
offsetInShared.push_back(offsetInShared.back()+m_N0-left); |
offsetInShared.push_back(offsetInShared.back()+m_N0-left); |
764 |
for (dim_t i=left; i<m_N0; i++) { |
for (dim_t i=0; i<m_N0-left; i++, dofCounter++) { |
765 |
// easy case, we know the neighbour id's |
sendShared.push_back(i); |
766 |
sendShared.push_back(m_nodeId[m_N0+i]); |
recvShared.push_back(dofCounter); |
767 |
recvShared.push_back(m_nodeId[i]); |
if (i>0) |
768 |
|
colIndices[i-1].push_back(dofCounter-numDOF); |
769 |
|
colIndices[i].push_back(dofCounter-numDOF); |
770 |
|
if (i<m_N0-left-1) |
771 |
|
colIndices[i+1].push_back(dofCounter-numDOF); |
772 |
} |
} |
773 |
} |
} |
774 |
// corner node from bottom-right |
// corner node from bottom-right |
775 |
if (faces[1] == 0 && faces[2] == 0) { |
if (faces[1] == 0 && faces[2] == 0) { |
776 |
neighbour.push_back(m_mpiInfo->rank-m_NX+1); |
neighbour.push_back(m_mpiInfo->rank-m_NX+1); |
|
const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); |
|
|
const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1); |
|
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
777 |
offsetInShared.push_back(offsetInShared.back()+1); |
offsetInShared.push_back(offsetInShared.back()+1); |
778 |
sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]); |
sendShared.push_back(-1); |
779 |
recvShared.push_back(first+N0*(N1-1)); |
recvShared.push_back(dofCounter); |
780 |
} |
colIndices[m_N0-left-2].push_back(dofCounter-numDOF); |
781 |
// left edge |
colIndices[m_N0-left-1].push_back(dofCounter-numDOF); |
782 |
if (faces[0] == 0) { |
++dofCounter; |
|
neighbour.push_back(m_mpiInfo->rank-1); |
|
|
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom); |
|
|
for (dim_t i=bottom; i<m_N1; i++) { |
|
|
// easy case, we know the neighbour id's |
|
|
sendShared.push_back(m_nodeId[i*m_N0+1]); |
|
|
recvShared.push_back(m_nodeId[i*m_N0]); |
|
|
} |
|
783 |
} |
} |
784 |
// right edge |
// right edge |
785 |
if (faces[1] == 0) { |
if (faces[1] == 0) { |
786 |
neighbour.push_back(m_mpiInfo->rank+1); |
neighbour.push_back(m_mpiInfo->rank+1); |
|
const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); |
|
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
787 |
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom); |
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom); |
788 |
for (dim_t i=bottom; i<m_N1; i++) { |
for (dim_t i=0; i<m_N1-bottom; i++, dofCounter++) { |
789 |
sendShared.push_back(m_nodeId[(i+1)*m_N0-1]); |
sendShared.push_back((i+1)*(m_N0-left)-1); |
790 |
recvShared.push_back(first+rightN0*(i-bottom)); |
recvShared.push_back(dofCounter); |
791 |
|
if (i>0) |
792 |
|
colIndices[i*(m_N0-left)-1].push_back(dofCounter-numDOF); |
793 |
|
colIndices[(i+1)*(m_N0-left)-1].push_back(dofCounter-numDOF); |
794 |
|
if (i<m_N1-bottom-1) |
795 |
|
colIndices[(i+2)*(m_N0-left)-1].push_back(dofCounter-numDOF); |
796 |
} |
} |
797 |
} |
} |
798 |
// corner node from top-left |
// corner node from top-right |
799 |
if (faces[0] == 0 && faces[3] == 0) { |
if (faces[1] == 0 && faces[3] == 0) { |
800 |
neighbour.push_back(m_mpiInfo->rank+m_NX-1); |
neighbour.push_back(m_mpiInfo->rank+m_NX+1); |
|
const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); |
|
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
801 |
offsetInShared.push_back(offsetInShared.back()+1); |
offsetInShared.push_back(offsetInShared.back()+1); |
802 |
sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]); |
sendShared.push_back(numDOF-1); |
803 |
recvShared.push_back(first+N0-1); |
recvShared.push_back(dofCounter); |
804 |
|
colIndices[numDOF-1].push_back(dofCounter-numDOF); |
805 |
|
++dofCounter; |
806 |
} |
} |
807 |
// top edge |
// top edge |
808 |
if (faces[3] == 0) { |
if (faces[3] == 0) { |
809 |
neighbour.push_back(m_mpiInfo->rank+m_NX); |
neighbour.push_back(m_mpiInfo->rank+m_NX); |
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
810 |
offsetInShared.push_back(offsetInShared.back()+m_N0-left); |
offsetInShared.push_back(offsetInShared.back()+m_N0-left); |
811 |
for (dim_t i=left; i<m_N0; i++) { |
for (dim_t i=0; i<m_N0-left; i++, dofCounter++) { |
812 |
sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]); |
sendShared.push_back(numDOF-m_N0+left+i); |
813 |
recvShared.push_back(first+i-left); |
recvShared.push_back(dofCounter); |
814 |
|
if (i>0) |
815 |
|
colIndices[numDOF-m_N0+left+i-1].push_back(dofCounter-numDOF); |
816 |
|
colIndices[numDOF-m_N0+left+i].push_back(dofCounter-numDOF); |
817 |
|
if (i<m_N0-left-1) |
818 |
|
colIndices[numDOF-m_N0+left+i+1].push_back(dofCounter-numDOF); |
819 |
} |
} |
820 |
} |
} |
821 |
// corner node from top-right |
// corner node from top-left |
822 |
if (faces[1] == 0 && faces[3] == 0) { |
if (faces[0] == 0 && faces[3] == 0) { |
823 |
neighbour.push_back(m_mpiInfo->rank+m_NX+1); |
neighbour.push_back(m_mpiInfo->rank+m_NX-1); |
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
824 |
offsetInShared.push_back(offsetInShared.back()+1); |
offsetInShared.push_back(offsetInShared.back()+1); |
825 |
sendShared.push_back(m_nodeId[m_N0*m_N1-1]); |
sendShared.push_back(numDOF-m_N0+left); |
826 |
recvShared.push_back(first); |
recvShared.push_back(dofCounter); |
827 |
|
colIndices[numDOF-m_N0+left].push_back(dofCounter-numDOF); |
828 |
|
++dofCounter; |
829 |
} |
} |
830 |
const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank]; |
// left edge |
831 |
/* |
if (faces[0] == 0) { |
832 |
|
neighbour.push_back(m_mpiInfo->rank-1); |
833 |
|
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom); |
834 |
|
for (dim_t i=0; i<m_N1-bottom; i++, dofCounter++) { |
835 |
|
sendShared.push_back(-1); |
836 |
|
recvShared.push_back(dofCounter); |
837 |
|
if (i>0) |
838 |
|
colIndices[(i-1)*(m_N0-left)].push_back(dofCounter-numDOF); |
839 |
|
colIndices[i*(m_N0-left)].push_back(dofCounter-numDOF); |
840 |
|
if (i<m_N1-bottom-1) |
841 |
|
colIndices[(i+1)*(m_N0-left)].push_back(dofCounter-numDOF); |
842 |
|
} |
843 |
|
} |
844 |
|
/**/ |
845 |
cout << "--- rcv_shcomp ---" << endl; |
cout << "--- rcv_shcomp ---" << endl; |
846 |
cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; |
cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; |
847 |
for (size_t i=0; i<neighbour.size(); i++) { |
for (size_t i=0; i<neighbour.size(); i++) { |
855 |
for (size_t i=0; i<sendShared.size(); i++) { |
for (size_t i=0; i<sendShared.size(); i++) { |
856 |
cout << "shared[" << i << "]=" << sendShared[i] << endl; |
cout << "shared[" << i << "]=" << sendShared[i] << endl; |
857 |
} |
} |
858 |
*/ |
/**/ |
859 |
|
|
860 |
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
861 |
numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
899 |
} |
} |
900 |
*/ |
*/ |
901 |
|
|
902 |
ptr.clear(); |
// column & row couple patterns |
903 |
|
ptr.assign(1, 0); |
904 |
index.clear(); |
index.clear(); |
905 |
|
|
906 |
// column & row couple patterns |
for (index_t i=0; i<numDOF; i++) { |
907 |
Paso_Pattern *colCouplePattern, *rowCouplePattern; |
index.insert(index.end(), colIndices[i].begin(), colIndices[i].end()); |
908 |
generateCouplePatterns(&colCouplePattern, &rowCouplePattern); |
ptr.push_back(ptr.back()+colIndices[i].size()); |
909 |
|
} |
910 |
|
|
911 |
|
// paso will manage the memory |
912 |
|
indexC = MEMALLOC(index.size(), index_t); |
913 |
|
ptrC = MEMALLOC(ptr.size(), index_t); |
914 |
|
copy(index.begin(), index.end(), indexC); |
915 |
|
copy(ptr.begin(), ptr.end(), ptrC); |
916 |
|
M=ptr.size()-1; |
917 |
|
N=dofCounter-numDOF; |
918 |
|
Paso_Pattern *colCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, |
919 |
|
M, N, ptrC, indexC); |
920 |
|
|
921 |
|
/**/ |
922 |
|
cout << "--- colCouple_pattern ---" << endl; |
923 |
|
cout << "M=" << M << ", N=" << N << endl; |
924 |
|
for (size_t i=0; i<ptr.size(); i++) { |
925 |
|
cout << "ptr[" << i << "]=" << ptr[i] << endl; |
926 |
|
} |
927 |
|
for (size_t i=0; i<index.size(); i++) { |
928 |
|
cout << "index[" << i << "]=" << index[i] << endl; |
929 |
|
} |
930 |
|
/**/ |
931 |
|
|
932 |
|
// now build the row couple pattern |
933 |
|
IndexVector ptr2(1,0); |
934 |
|
IndexVector index2; |
935 |
|
for (dim_t id=0; id<N; id++) { |
936 |
|
dim_t n=0; |
937 |
|
for (dim_t i=0; i<M; i++) { |
938 |
|
for (dim_t j=ptr[i]; j<ptr[i+1]; j++) { |
939 |
|
if (index[j]==id) { |
940 |
|
index2.push_back(i); |
941 |
|
n++; |
942 |
|
break; |
943 |
|
} |
944 |
|
} |
945 |
|
} |
946 |
|
ptr2.push_back(ptr2.back()+n); |
947 |
|
} |
948 |
|
|
949 |
|
// paso will manage the memory |
950 |
|
indexC = MEMALLOC(index2.size(), index_t); |
951 |
|
ptrC = MEMALLOC(ptr2.size(), index_t); |
952 |
|
copy(index2.begin(), index2.end(), indexC); |
953 |
|
copy(ptr2.begin(), ptr2.end(), ptrC); |
954 |
|
Paso_Pattern *rowCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, |
955 |
|
N, M, ptrC, indexC); |
956 |
|
|
957 |
|
/**/ |
958 |
|
cout << "--- rowCouple_pattern ---" << endl; |
959 |
|
cout << "M=" << N << ", N=" << M << endl; |
960 |
|
for (size_t i=0; i<ptr2.size(); i++) { |
961 |
|
cout << "ptr[" << i << "]=" << ptr2[i] << endl; |
962 |
|
} |
963 |
|
for (size_t i=0; i<index2.size(); i++) { |
964 |
|
cout << "index[" << i << "]=" << index2[i] << endl; |
965 |
|
} |
966 |
|
/**/ |
967 |
|
|
968 |
// allocate paso distribution |
// allocate paso distribution |
969 |
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
1042 |
} |
} |
1043 |
|
|
1044 |
//protected |
//protected |
1045 |
|
dim_t Rectangle::getNumDOF() const |
1046 |
|
{ |
1047 |
|
return m_nodeDistribution[m_mpiInfo->rank+1] |
1048 |
|
-m_nodeDistribution[m_mpiInfo->rank]; |
1049 |
|
} |
1050 |
|
|
1051 |
|
//protected |
1052 |
dim_t Rectangle::getNumFaceElements() const |
dim_t Rectangle::getNumFaceElements() const |
1053 |
{ |
{ |
1054 |
const IndexVector faces = getNumFacesPerBoundary(); |
const IndexVector faces = getNumFacesPerBoundary(); |
1107 |
} |
} |
1108 |
m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); |
m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); |
1109 |
|
|
1110 |
|
m_dofId.resize(getNumDOF()); |
1111 |
m_nodeId.resize(getNumNodes()); |
m_nodeId.resize(getNumNodes()); |
1112 |
|
|
1113 |
// the bottom row and left column are not owned by this rank so the |
// the bottom row and left column are not owned by this rank so the |
1145 |
#pragma omp parallel for |
#pragma omp parallel for |
1146 |
for (dim_t i1=bottom; i1<m_N1; i1++) { |
for (dim_t i1=bottom; i1<m_N1; i1++) { |
1147 |
for (dim_t i0=left; i0<m_N0; i0++) { |
for (dim_t i0=left; i0<m_N0; i0++) { |
1148 |
m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left); |
const index_t idx=i0-left+(i1-bottom)*(m_N0-left); |
1149 |
|
m_nodeId[i0+i1*m_N0] = firstId+idx; |
1150 |
|
m_dofId[idx] = firstId+idx; |
1151 |
} |
} |
1152 |
} |
} |
1153 |
m_nodeTags.assign(getNumNodes(), 0); |
m_nodeTags.assign(getNumNodes(), 0); |
1242 |
return num; |
return num; |
1243 |
} |
} |
1244 |
|
|
|
//private |
|
|
void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const |
|
|
{ |
|
|
IndexVector ptr(1,0); |
|
|
IndexVector index; |
|
|
const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1); |
|
|
const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1); |
|
|
const IndexVector faces=getNumFacesPerBoundary(); |
|
|
|
|
|
// bottom edge |
|
|
dim_t n=0; |
|
|
if (faces[0] == 0) { |
|
|
index.push_back(2*(myN0+myN1+1)); |
|
|
index.push_back(2*(myN0+myN1+1)+1); |
|
|
n+=2; |
|
|
if (faces[2] == 0) { |
|
|
index.push_back(0); |
|
|
index.push_back(1); |
|
|
index.push_back(2); |
|
|
n+=3; |
|
|
} |
|
|
} else if (faces[2] == 0) { |
|
|
index.push_back(1); |
|
|
index.push_back(2); |
|
|
n+=2; |
|
|
} |
|
|
// n=neighbours of bottom-left corner node |
|
|
ptr.push_back(ptr.back()+n); |
|
|
n=0; |
|
|
if (faces[2] == 0) { |
|
|
for (dim_t i=1; i<myN0-1; i++) { |
|
|
index.push_back(i); |
|
|
index.push_back(i+1); |
|
|
index.push_back(i+2); |
|
|
ptr.push_back(ptr.back()+3); |
|
|
} |
|
|
index.push_back(myN0-1); |
|
|
index.push_back(myN0); |
|
|
n+=2; |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+1); |
|
|
index.push_back(myN0+2); |
|
|
index.push_back(myN0+3); |
|
|
n+=3; |
|
|
} |
|
|
} else { |
|
|
for (dim_t i=1; i<myN0-1; i++) { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+2); |
|
|
index.push_back(myN0+3); |
|
|
n+=2; |
|
|
} |
|
|
} |
|
|
// n=neighbours of bottom-right corner node |
|
|
ptr.push_back(ptr.back()+n); |
|
|
|
|
|
// 2nd row to 2nd last row |
|
|
for (dim_t i=1; i<myN1-1; i++) { |
|
|
// left edge |
|
|
if (faces[0] == 0) { |
|
|
index.push_back(2*(myN0+myN1+2)-i); |
|
|
index.push_back(2*(myN0+myN1+2)-i-1); |
|
|
index.push_back(2*(myN0+myN1+2)-i-2); |
|
|
ptr.push_back(ptr.back()+3); |
|
|
} else { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
for (dim_t j=1; j<myN0-1; j++) { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
// right edge |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+i+1); |
|
|
index.push_back(myN0+i+2); |
|
|
index.push_back(myN0+i+3); |
|
|
ptr.push_back(ptr.back()+3); |
|
|
} else { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
} |
|
|
|
|
|
// top edge |
|
|
n=0; |
|
|
if (faces[0] == 0) { |
|
|
index.push_back(2*myN0+myN1+5); |
|
|
index.push_back(2*myN0+myN1+4); |
|
|
n+=2; |
|
|
if (faces[3] == 0) { |
|
|
index.push_back(2*myN0+myN1+3); |
|
|
index.push_back(2*myN0+myN1+2); |
|
|
index.push_back(2*myN0+myN1+1); |
|
|
n+=3; |
|
|
} |
|
|
} else if (faces[3] == 0) { |
|
|
index.push_back(2*myN0+myN1+2); |
|
|
index.push_back(2*myN0+myN1+1); |
|
|
n+=2; |
|
|
} |
|
|
// n=neighbours of top-left corner node |
|
|
ptr.push_back(ptr.back()+n); |
|
|
n=0; |
|
|
if (faces[3] == 0) { |
|
|
for (dim_t i=1; i<myN0-1; i++) { |
|
|
index.push_back(2*myN0+myN1+i+1); |
|
|
index.push_back(2*myN0+myN1+i); |
|
|
index.push_back(2*myN0+myN1+i-1); |
|
|
ptr.push_back(ptr.back()+3); |
|
|
} |
|
|
index.push_back(myN0+myN1+4); |
|
|
index.push_back(myN0+myN1+3); |
|
|
n+=2; |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+myN1+2); |
|
|
index.push_back(myN0+myN1+1); |
|
|
index.push_back(myN0+myN1); |
|
|
n+=3; |
|
|
} |
|
|
} else { |
|
|
for (dim_t i=1; i<myN0-1; i++) { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+myN1+1); |
|
|
index.push_back(myN0+myN1); |
|
|
n+=2; |
|
|
} |
|
|
} |
|
|
// n=neighbours of top-right corner node |
|
|
ptr.push_back(ptr.back()+n); |
|
|
|
|
|
dim_t M=ptr.size()-1; |
|
|
map<index_t,index_t> idMap; |
|
|
dim_t N=0; |
|
|
for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) { |
|
|
if (idMap.find(*it)==idMap.end()) { |
|
|
idMap[*it]=N; |
|
|
*it=N++; |
|
|
} else { |
|
|
*it=idMap[*it]; |
|
|
} |
|
|
} |
|
|
|
|
|
/* |
|
|
cout << "--- colCouple_pattern ---" << endl; |
|
|
cout << "M=" << M << ", N=" << N << endl; |
|
|
for (size_t i=0; i<ptr.size(); i++) { |
|
|
cout << "ptr[" << i << "]=" << ptr[i] << endl; |
|
|
} |
|
|
for (size_t i=0; i<index.size(); i++) { |
|
|
cout << "index[" << i << "]=" << index[i] << endl; |
|
|
} |
|
|
*/ |
|
|
|
|
|
// now build the row couple pattern |
|
|
IndexVector ptr2(1,0); |
|
|
IndexVector index2; |
|
|
for (dim_t id=0; id<N; id++) { |
|
|
n=0; |
|
|
for (dim_t i=0; i<M; i++) { |
|
|
for (dim_t j=ptr[i]; j<ptr[i+1]; j++) { |
|
|
if (index[j]==id) { |
|
|
index2.push_back(i); |
|
|
n++; |
|
|
break; |
|
|
} |
|
|
} |
|
|
} |
|
|
ptr2.push_back(ptr2.back()+n); |
|
|
} |
|
|
|
|
|
/* |
|
|
cout << "--- rowCouple_pattern ---" << endl; |
|
|
cout << "M=" << N << ", N=" << M << endl; |
|
|
for (size_t i=0; i<ptr2.size(); i++) { |
|
|
cout << "ptr[" << i << "]=" << ptr2[i] << endl; |
|
|
} |
|
|
for (size_t i=0; i<index2.size(); i++) { |
|
|
cout << "index[" << i << "]=" << index2[i] << endl; |
|
|
} |
|
|
*/ |
|
|
|
|
|
// paso will manage the memory |
|
|
index_t* indexC = MEMALLOC(index.size(), index_t); |
|
|
index_t* ptrC = MEMALLOC(ptr.size(), index_t); |
|
|
copy(index.begin(), index.end(), indexC); |
|
|
copy(ptr.begin(), ptr.end(), ptrC); |
|
|
*colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC); |
|
|
|
|
|
// paso will manage the memory |
|
|
indexC = MEMALLOC(index2.size(), index_t); |
|
|
ptrC = MEMALLOC(ptr2.size(), index_t); |
|
|
copy(index2.begin(), index2.end(), indexC); |
|
|
copy(ptr2.begin(), ptr2.end(), ptrC); |
|
|
*rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC); |
|
|
} |
|
|
|
|
1245 |
//protected |
//protected |
1246 |
void Rectangle::interpolateNodesOnElements(escript::Data& out, |
void Rectangle::interpolateNodesOnElements(escript::Data& out, |
1247 |
escript::Data& in, bool reduced) const |
escript::Data& in, bool reduced) const |
1405 |
} |
} |
1406 |
|
|
1407 |
//protected |
//protected |
1408 |
|
void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const |
1409 |
|
{ |
1410 |
|
const dim_t numComp = in.getDataPointSize(); |
1411 |
|
out.requireWrite(); |
1412 |
|
|
1413 |
|
const index_t left = (m_offset0==0 ? 0 : 1); |
1414 |
|
const index_t bottom = (m_offset1==0 ? 0 : 1); |
1415 |
|
index_t n=0; |
1416 |
|
for (index_t i=bottom; i<m_N1; i++) { |
1417 |
|
for (index_t j=left; j<m_N0; j++, n++) { |
1418 |
|
const double* src=in.getSampleDataRO(j+i*m_N0); |
1419 |
|
copy(src, src+numComp, out.getSampleDataRW(n)); |
1420 |
|
} |
1421 |
|
} |
1422 |
|
} |
1423 |
|
|
1424 |
|
//protected |
1425 |
void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, |
void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, |
1426 |
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
1427 |
const escript::Data& C, const escript::Data& D, |
const escript::Data& C, const escript::Data& D, |
2132 |
/* GENERATOR SNIP_PDE_SINGLE BOTTOM */ |
/* GENERATOR SNIP_PDE_SINGLE BOTTOM */ |
2133 |
|
|
2134 |
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
2135 |
|
const index_t left = (m_offset0==0 ? 0 : 1); |
2136 |
|
const index_t bottom = (m_offset1==0 ? 0 : 1); |
2137 |
|
const index_t firstNode=(m_N0-bottom)*k1+k0-left; |
2138 |
|
const int numDOF=getNumDOF(); |
2139 |
|
int dof=numDOF; |
2140 |
|
|
2141 |
IndexVector rowIndex; |
IndexVector rowIndex; |
2142 |
const index_t firstNode=k1*m_N0+k0; |
if (m_offset0==0) { |
2143 |
rowIndex.push_back(firstNode); |
if (m_offset1>0 && k1==0) { |
2144 |
rowIndex.push_back(firstNode+1); |
rowIndex.push_back(dof++); |
2145 |
rowIndex.push_back(firstNode+m_N0); |
rowIndex.push_back(dof++); |
2146 |
rowIndex.push_back(firstNode+1+m_N0); |
rowIndex.push_back(firstNode); |
2147 |
if (add_EM_S) { |
rowIndex.push_back(firstNode+1); |
2148 |
addToSystemMatrix(mat, 4, rowIndex, 1, 4, rowIndex, 1, EM_S); |
} else { |
2149 |
|
rowIndex.push_back(firstNode); |
2150 |
|
rowIndex.push_back(firstNode+1); |
2151 |
|
rowIndex.push_back(firstNode+m_N0-left); |
2152 |
|
rowIndex.push_back(firstNode+m_N0-left+1); |
2153 |
|
} |
2154 |
|
} else { |
2155 |
|
if (k0==0) { |
2156 |
|
rowIndex.push_back(dof++); |
2157 |
|
if (m_offset1>0 && k1==0) |
2158 |
|
rowIndex.push_back(dof++); |
2159 |
|
else |
2160 |
|
rowIndex.push_back(firstNode); |
2161 |
|
rowIndex.push_back(dof++); |
2162 |
|
rowIndex.push_back(firstNode+m_N0-left); |
2163 |
|
} else { |
2164 |
|
rowIndex.push_back(firstNode); |
2165 |
|
rowIndex.push_back(firstNode+1); |
2166 |
|
rowIndex.push_back(firstNode+m_N0-left); |
2167 |
|
rowIndex.push_back(firstNode+m_N0-left+1); |
2168 |
|
} |
2169 |
|
} |
2170 |
|
Paso_Coupler_startCollect(mat->col_coupler, &EM_F[0]); |
2171 |
|
double* recvBuffer=Paso_Coupler_finishCollect(mat->col_coupler); |
2172 |
|
for (int i=0; i<mat->col_coupler->connector->send->numSharedComponents; i++) { |
2173 |
|
if (mat->col_coupler->connector->send->shared[i]>=0) |
2174 |
|
EM_F[mat->col_coupler->connector->send->shared[i]]+=recvBuffer[i]; |
2175 |
} |
} |
2176 |
|
|
2177 |
|
IndexVector colIndex=rowIndex; |
2178 |
|
/* |
2179 |
|
Paso_Coupler* coupler=Paso_Coupler_alloc(mat->col_coupler->connector, 4); |
2180 |
|
Paso_Coupler_startCollect(coupler, &EM_S[0]); |
2181 |
|
recvBuffer=Paso_Coupler_finishCollect(coupler); |
2182 |
|
for (int i=0; i<coupler->connector->recv->numSharedComponents; i++) { |
2183 |
|
if (coupler->connector->recv->shared[i]>=0) { |
2184 |
|
for (int j=0; j<4; j++) |
2185 |
|
EM_S.push_back(recvBuffer[i+4*j]); |
2186 |
|
colIndex.push_back(coupler->connector->recv->shared[i]); |
2187 |
|
} |
2188 |
|
} |
2189 |
|
Paso_Coupler_free(coupler);*/ |
2190 |
|
|
2191 |
if (add_EM_F) { |
if (add_EM_F) { |
2192 |
|
cout << "-- AddtoRHS -- " << endl; |
2193 |
double *F_p=rhs.getSampleDataRW(0); |
double *F_p=rhs.getSampleDataRW(0); |
2194 |
|
cout << "F:"<<endl; |
2195 |
for (index_t i=0; i<4; i++) { |
for (index_t i=0; i<4; i++) { |
2196 |
if (rowIndex[i]<getNumNodes()) |
if (rowIndex[i]<getNumDOF()) { |
2197 |
F_p[rowIndex[i]]+=EM_F[i]; |
F_p[rowIndex[i]]+=EM_F[i]; |
2198 |
|
cout << "F[" << rowIndex[i] << "]=" << F_p[rowIndex[i]] << endl; |
2199 |
|
} |
2200 |
} |
} |
2201 |
|
cout << "---"<<endl; |
2202 |
} |
} |
2203 |
|
if (add_EM_S) { |
2204 |
|
cout << "-- AddtoSystem -- " << endl; |
2205 |
|
addToSystemMatrix(mat, rowIndex, 1, colIndex, 1, EM_S); |
2206 |
|
} |
2207 |
|
|
2208 |
} // end k0 loop |
} // end k0 loop |
2209 |
} // end k1 loop |
} // end k1 loop |
2210 |
} // end of coloring |
} // end of coloring |
2211 |
} // end of parallel region |
} // end of parallel region |
2212 |
} |
} |
2213 |
|
|
2214 |
void Rectangle::addToSystemMatrix(Paso_SystemMatrix* in, dim_t NN_Eq, |
void Rectangle::addToSystemMatrix(Paso_SystemMatrix* mat, |
2215 |
const IndexVector& nodes_Eq, dim_t num_Eq, dim_t NN_Sol, |
const IndexVector& nodes_Eq, dim_t num_Eq, const IndexVector& nodes_Sol, |
2216 |
const IndexVector& nodes_Sol, dim_t num_Sol, const vector<double>& array) const |
dim_t num_Sol, const vector<double>& array) const |
2217 |
{ |
{ |
2218 |
const dim_t row_block_size = in->row_block_size; |
const dim_t numMyCols = mat->pattern->mainPattern->numInput; |
2219 |
const dim_t col_block_size = in->col_block_size; |
const dim_t numMyRows = mat->pattern->mainPattern->numOutput; |
2220 |
const dim_t block_size = in->block_size; |
|
2221 |
const dim_t num_subblocks_Eq = num_Eq / row_block_size; |
const index_t* mainBlock_ptr = mat->mainBlock->pattern->ptr; |
2222 |
const dim_t num_subblocks_Sol = num_Sol / col_block_size; |
const index_t* mainBlock_index = mat->mainBlock->pattern->index; |
2223 |
const dim_t numMyCols = in->pattern->mainPattern->numInput; |
double* mainBlock_val = mat->mainBlock->val; |
2224 |
const dim_t numMyRows = in->pattern->mainPattern->numOutput; |
const index_t* col_coupleBlock_ptr = mat->col_coupleBlock->pattern->ptr; |
2225 |
|
const index_t* col_coupleBlock_index = mat->col_coupleBlock->pattern->index; |
2226 |
const index_t* mainBlock_ptr = in->mainBlock->pattern->ptr; |
double* col_coupleBlock_val = mat->col_coupleBlock->val; |
2227 |
const index_t* mainBlock_index = in->mainBlock->pattern->index; |
const index_t* row_coupleBlock_ptr = mat->row_coupleBlock->pattern->ptr; |
2228 |
double* mainBlock_val = in->mainBlock->val; |
const index_t* row_coupleBlock_index = mat->row_coupleBlock->pattern->index; |
2229 |
const index_t* col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr; |
double* row_coupleBlock_val = mat->row_coupleBlock->val; |
2230 |
const index_t* col_coupleBlock_index = in->col_coupleBlock->pattern->index; |
|
2231 |
double* col_coupleBlock_val = in->col_coupleBlock->val; |
for (dim_t k_Eq = 0; k_Eq < nodes_Eq.size(); ++k_Eq) { |
|
const index_t* row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr; |
|
|
const index_t* row_coupleBlock_index = in->row_coupleBlock->pattern->index; |
|
|
double* row_coupleBlock_val = in->row_coupleBlock->val; |
|
|
for (dim_t k_Eq = 0; k_Eq < NN_Eq; ++k_Eq) { |
|
2232 |
// down columns of array |
// down columns of array |
2233 |
const dim_t j_Eq = nodes_Eq[k_Eq]; |
const dim_t j_Eq = nodes_Eq[k_Eq]; |
2234 |
for (dim_t l_row = 0; l_row < num_subblocks_Eq; ++l_row) { |
const dim_t i_row = j_Eq; |
2235 |
const dim_t i_row = j_Eq * num_subblocks_Eq + l_row; |
printf("row:%d\n", i_row); |
2236 |
// only look at the matrix rows stored on this processor |
// only look at the matrix rows stored on this processor |
2237 |
if (i_row < numMyRows) { |
if (i_row < numMyRows) { |
2238 |
for (dim_t k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) { |
for (dim_t k_Sol = 0; k_Sol < nodes_Sol.size(); ++k_Sol) { |
2239 |
// across rows of array |
const dim_t i_col = nodes_Sol[k_Sol]; |
2240 |
const dim_t j_Sol = nodes_Sol[k_Sol]; |
if (i_col < numMyCols) { |
2241 |
for (dim_t l_col = 0; l_col < num_subblocks_Sol; ++l_col) { |
for (dim_t k = mainBlock_ptr[i_row]; k < mainBlock_ptr[i_row + 1]; ++k) { |
2242 |
// only look at the matrix rows stored on this processor |
if (mainBlock_index[k] == i_col) { |
2243 |
const dim_t i_col = j_Sol * num_subblocks_Sol + l_col; |
mainBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())]; |
2244 |
if (i_col < numMyCols) { |
break; |
2245 |
for (dim_t k = mainBlock_ptr[i_row]; |
} |
2246 |
k < mainBlock_ptr[i_row + 1]; ++k) { |
} |
2247 |
if (mainBlock_index[k] == i_col) { |
} else { |
2248 |
// entry array(k_Sol, j_Eq) is a block |
for (dim_t k = col_coupleBlock_ptr[i_row]; k < col_coupleBlock_ptr[i_row + 1]; ++k) { |
2249 |
// (row_block_size x col_block_size) |
cout << "col:" << i_col-numMyCols << " colIdx:" << col_coupleBlock_index[k] << endl; |
2250 |
for (dim_t ic = 0; ic < col_block_size; ++ic) { |
if (col_coupleBlock_index[k] == i_col - numMyCols) { |
2251 |
const dim_t i_Sol = ic + col_block_size * l_col; |
col_coupleBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())]; |
2252 |
for (dim_t ir = 0; ir < row_block_size; ++ir) { |
break; |
|
const dim_t i_Eq = ir + row_block_size * l_row; |
|
|
mainBlock_val[k*block_size + ir + row_block_size*ic] += |
|
|
array[INDEX4 |
|
|
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)]; |
|
|
} |
|
|
} |
|
|
break; |
|
|
} |
|
|
} |
|
|
} else { |
|
|
for (dim_t k = col_coupleBlock_ptr[i_row]; |
|
|
k < col_coupleBlock_ptr[i_row + 1]; ++k) { |
|
|
if (col_coupleBlock_index[k] == i_col - numMyCols) { |
|
|
// entry array(k_Sol, j_Eq) is a block |
|
|
// (row_block_size x col_block_size) |
|
|
for (dim_t ic = 0; ic < col_block_size; ++ic) { |
|
|
const dim_t i_Sol = ic + col_block_size * l_col; |
|
|
for (dim_t ir = 0; ir < row_block_size; ++ir) { |
|
|
const dim_t i_Eq = ir + row_block_size * l_row; |
|
|
col_coupleBlock_val[k * block_size + ir + row_block_size * ic] += |
|
|
array[INDEX4 |
|
|
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)]; |
|
|
} |
|
|
} |
|
|
break; |
|
|
} |
|
|
} |
|
2253 |
} |
} |
2254 |
} |
} |
2255 |
} |
} |
2256 |
} else { |
} |
2257 |
for (dim_t k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) { |
} else { |
2258 |
// across rows of array |
for (dim_t k_Sol = 0; k_Sol < nodes_Sol.size(); ++k_Sol) { |
2259 |
const dim_t j_Sol = nodes_Sol[k_Sol]; |
// across rows of array |
2260 |
for (dim_t l_col = 0; l_col < num_subblocks_Sol; ++l_col) { |
const dim_t i_col = nodes_Sol[k_Sol]; |
2261 |
const dim_t i_col = j_Sol * num_subblocks_Sol + l_col; |
if (i_col < numMyCols) { |
2262 |
if (i_col < numMyCols) { |
for (dim_t k = row_coupleBlock_ptr[i_row - numMyRows]; |
2263 |
for (dim_t k = row_coupleBlock_ptr[i_row - numMyRows]; |
k < row_coupleBlock_ptr[i_row - numMyRows + 1]; ++k) |
2264 |
k < row_coupleBlock_ptr[i_row - numMyRows + 1]; ++k) |
{ |
2265 |
{ |
cout << "col:" << i_col << " rowIdx:" << row_coupleBlock_index[k] << endl; |
2266 |
if (row_coupleBlock_index[k] == i_col) { |
if (row_coupleBlock_index[k] == i_col) { |
2267 |
// entry array(k_Sol, j_Eq) is a block |
row_coupleBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())]; |
2268 |
// (row_block_size x col_block_size) |
break; |
|
for (dim_t ic = 0; ic < col_block_size; ++ic) { |
|
|
const dim_t i_Sol = ic + col_block_size * l_col; |
|
|
for (dim_t ir = 0; ir < row_block_size; ++ir) { |
|
|
const dim_t i_Eq = ir + row_block_size * l_row; |
|
|
row_coupleBlock_val[k * block_size + ir + row_block_size * ic] += |
|
|
array[INDEX4 |
|
|
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)]; |
|
|
} |
|
|
} |
|
|
break; |
|
|
} |
|
|
} |
|
2269 |
} |
} |
2270 |
} |
} |
2271 |
} |
} |