#include "binaryIO.h" #include "getmatrix.h" #include "userset.h" #include "utils.h" #include "omp.h" #include #include #include #include // contains clock() #include #include #include #include #include #include #include using namespace std; // #################################################################### Matrix::Matrix(int const nrows, int const ncols) : _nrows(nrows), _ncols(ncols), _dd(0) {} Matrix::~Matrix() {} vector const & Matrix::GetDiag() const { //bool ddEmpty; ////#pragma omp critical //ddEmpty= (_dd.empty()); // local variable! // GH: Move allocation etc. to constructor !? if ( _dd.empty() ) { //#pragma omp single //std::cout << "PPPPPPPPPPPPPPPPPPPP\n"; #pragma omp barrier #pragma omp single _dd.resize(Nrows()); //#pragma omp barrier this->GetDiag(_dd); } assert( Nrows()==static_cast(_dd.size()) ); //#pragma omp master //std::cout << "."; return _dd; } // #################################################################### CRS_Matrix::CRS_Matrix() : Matrix(0, 0), _nnz(0), _id(0), _ik(0), _sk(0) {} CRS_Matrix::CRS_Matrix(const std::string &file) : Matrix(0, 0), _nnz(0), _id(0), _ik(0), _sk(0) { readBinary(file); _nrows = static_cast(size(_id) - 1); _ncols = _nrows; } CRS_Matrix::~CRS_Matrix() {} void CRS_Matrix::Mult(vector &w, vector const &u) const { assert( _ncols == static_cast(u.size()) ); // compatibility of inner dimensions assert( _nrows == static_cast(w.size()) ); // compatibility of outer dimensions #pragma omp parallel for for (int row = 0; row < _nrows; ++row) { double wi = 0.0; for (int ij = _id[row]; ij < _id[row + 1]; ++ij) { wi += _sk[ij] * u[ _ik[ij] ]; } w[row] = wi; } return; } void CRS_Matrix::Defect(vector &w, vector const &f, vector const &u) const { assert( _ncols == static_cast(u.size()) ); // compatibility of inner dimensions assert( _nrows == static_cast(w.size()) ); // compatibility of outer dimensions assert( w.size() == f.size() ); //#pragma omp parallel for #pragma omp for for (int row = 0; row < _nrows; ++row) { double wi = f[row]; for (int ij = _id[row]; ij < _id[row + 1]; ++ij) { wi -= _sk[ij] * u[ _ik[ij] ]; } w[row] = wi; } return; } void CRS_Matrix::JacobiSmoother(std::vector const &f, std::vector &u, std::vector &r, int nsmooth, double const omega, bool zero) const { // ToDO: ensure compatible dimensions //#pragma omp master //cout << "Jac in\n"; assert(_ncols==_nrows); assert( _ncols == static_cast(u.size()) ); // compatibility of inner dimensions assert( _nrows == static_cast(r.size()) ); // compatibility of outer dimensions assert( r.size() == f.size() ); auto const &D = Matrix::GetDiag(); // accumulated diagonal of matrix @p SK. //#pragma omp barrier //#pragma omp master //cout << "Matrix::GetDiag finished\n"; if (zero) { // assumes initial solution is zero #pragma omp for for (int k = 0; k < _nrows; ++k) { // u := u + om*D^{-1}*f u[k] = omega*f[k] / D[k]; // MPI: distributed to accumulated vector needed } //#pragma omp single --nsmooth; // first smoothing sweep done } //cout << zero << endl; //cout << nsmooth << endl; for (int ns = 1; ns <= nsmooth; ++ns) { //Defect(r, f, u); // r := f - K*u #pragma omp for for (int row = 0; row < _nrows; ++row) { double wi = f[row]; for (int ij = _id[row]; ij < _id[row + 1]; ++ij) { wi -= _sk[ij] * u[ _ik[ij] ]; } r[row] = wi; } #pragma omp for for (int k = 0; k < _nrows; ++k) { // u := u + om*D^{-1}*r u[k] = u[k] + omega * r[k] / D[k]; // MPI: distributed to accumulated vector needed } } //#pragma omp master //cout << "Jac out\n"; return; } void CRS_Matrix::GetDiag(vector &d) const { // be carefull when using a rectangular matrix int const nm = min(_nrows, _ncols); assert( nm == static_cast(d.size()) ); // instead of stopping we could resize d and warn the user //#pragma omp parallel for #pragma omp for for (int row = 0; row < nm; ++row) { const int ia = fetch(row, row); // Find diagonal entry of row assert(ia >= 0); d[row] = _sk[ia]; } cout << ">>>>> CRS_Matrix::GetDiag <<<<<" << endl; return; } inline int CRS_Matrix::fetch(int const row, int const col) const { int const id2 = _id[row + 1]; // end and int ip = _id[row]; // start of recent row (global index) while (ip < id2 && _ik[ip] != col) { // find index col (global index) ++ip; } if (ip >= id2) { ip = -1; #ifndef NDEBUG // compiler option -DNDEBUG switches off the check cout << "No column " << col << " in row " << row << endl; assert(ip >= id2); #endif } return ip; } void CRS_Matrix::Debug() const { // ID points to first entry of row // no symmetry assumed cout << "\nMatrix (" << _nrows << " x " << _ncols << " with nnz = " << _id[_nrows] << ")\n"; for (int row = 0; row < _nrows; ++row) { cout << "Row " << row << " : "; int const id1 = _id[row]; int const id2 = _id[row + 1]; for (int j = id1; j < id2; ++j) { cout.setf(ios::right, ios::adjustfield); cout << "[" << setw(2) << _ik[j] << "] " << setw(4) << _sk[j] << " "; } cout << endl; } return; } bool CRS_Matrix::Compare2Old(int nnode, int const id[], int const ik[], double const sk[]) const { bool bn = (nnode == _nrows); // number of rows if (!bn) { cout << "######### Error: " << "number of rows" << endl; } bool bz = (id[nnode] == _nnz); // number of non zero elements if (!bz) { cout << "######### Error: " << "number of non zero elements" << endl; } bool bd = equal(id, id + nnode + 1, _id.cbegin()); // row starts if (!bd) { cout << "######### Error: " << "row starts" << endl; } bool bk = equal(ik, ik + id[nnode], _ik.cbegin()); // column indices if (!bk) { cout << "######### Error: " << "column indices" << endl; } bool bv = equal(sk, sk + id[nnode], _sk.cbegin()); // values if (!bv) { cout << "######### Error: " << "values" << endl; } return bn && bz && bd && bk && bv; } void CRS_Matrix::writeBinary(const std::string &file) { vector cnt(size(_id) - 1); for (size_t k = 0; k < size(cnt); ++k) { cnt[k] = _id[k + 1] - _id[k]; } //adjacent_difference( cbegin(_id)+1, cend(_id), cnt ); write_binMatrix(file, cnt, _ik, _sk); } void CRS_Matrix::readBinary(const std::string &file) { vector cnt; read_binMatrix(file, cnt, _ik, _sk); _id.resize(size(cnt) + 1); _id[0] = 0; for (size_t k = 0; k < size(cnt); ++k) { _id[k + 1] = _id[k] + cnt[k]; } //partial_sum( cbegin(cnt), cend(cnt), begin(_id)+1 ); } // #################################################################### FEM_Matrix::FEM_Matrix(Mesh const &mesh) : CRS_Matrix(), _mesh(mesh) { Derive_Matrix_Pattern(); return; } FEM_Matrix::~FEM_Matrix() {} void FEM_Matrix::Derive_Matrix_Pattern_fast() { cout << "\n############ FEM_Matrix::Derive_Matrix_Pattern "; MyTimer tstart; //tstart.tic(); int const nelem(_mesh.Nelems()); int const ndof_e(_mesh.NdofsElement()); auto const &ia(_mesh.GetConnectivity()); // Determine the number of matrix rows _nrows = *max_element(ia.cbegin(), ia.cbegin() + ndof_e * nelem); ++_nrows; // node numberng: 0 ... nnode-1 assert(*min_element(ia.cbegin(), ia.cbegin() + ndof_e * nelem) == 0); // numbering starts with 0 ? // CSR data allocation _id.resize(_nrows + 1); // Allocate memory for CSR row pointer //########################################################################## auto const v2v = _mesh.Node2NodeGraph(); _nnz = 0; // number of connections _id[0] = 0; // start of matrix row zero for (size_t v = 0; v < v2v.size(); ++v ) { _id[v + 1] = _id[v] + v2v[v].size(); _nnz += v2v[v].size(); } assert(_nnz == _id[_nrows]); _sk.resize(_nnz); // Allocate memory for CSR column index vector // CSR data allocation _ik.resize(_nnz); // Allocate memory for CSR column index vector // Copy column indices int kk = 0; for (const auto & v : v2v) { for (size_t vi = 0; vi < v.size(); ++vi) { _ik[kk] = v[vi]; ++kk; } } _ncols = *max_element(_ik.cbegin(), _ik.cend()); // maximal column number ++_ncols; // node numbering: 0 ... nnode-1 //cout << _nrows << " " << _ncols << endl; assert(_ncols == _nrows); cout << "finished in " << tstart.toc() << " sec. ########\n"; return; } void FEM_Matrix::Derive_Matrix_Pattern_slow() { cout << "\n############ FEM_Matrix::Derive_Matrix_Pattern slow "; auto tstart = clock(); int const nelem(_mesh.Nelems()); int const ndof_e(_mesh.NdofsElement()); auto const &ia(_mesh.GetConnectivity()); // Determine the number of matrix rows _nrows = *max_element(ia.cbegin(), ia.cbegin() + ndof_e * nelem); ++_nrows; // node numberng: 0 ... nnode-1 assert(*min_element(ia.cbegin(), ia.cbegin() + ndof_e * nelem) == 0); // numbering starts with 0 ? // Collect for each node those nodes it is connected to (multiple entries) // Detect the neighboring nodes vector< list > cc(_nrows); // cc[i] is the list of nodes a node 'i' is connected to for (int i = 0; i < nelem; ++i) { int const idx = ndof_e * i; for (int k = 0; k < ndof_e; ++k) { list &cck = cc[ia[idx + k]]; cck.insert( cck.end(), ia.cbegin() + idx, ia.cbegin() + idx + ndof_e ); } } // Delete the multiple entries _nnz = 0; for (auto &it : cc) { it.sort(); it.unique(); _nnz += it.size(); // cout << it.size() << " :: "; copy(it->begin(),it->end(), ostream_iterator(cout," ")); cout << endl; } // CSR data allocation _id.resize(_nrows + 1); // Allocate memory for CSR row pointer _ik.resize(_nnz); // Allocate memory for CSR column index vector // copy CSR data _id[0] = 0; // begin of first row for (size_t i = 0; i < cc.size(); ++i) { //cout << i << " " << nid.at(i) << endl;; const list &ci = cc[i]; const auto nci = static_cast(ci.size()); _id[i + 1] = _id[i] + nci; // begin of next line copy(ci.begin(), ci.end(), _ik.begin() + _id[i] ); } assert(_nnz == _id[_nrows]); _sk.resize(_nnz); // Allocate memory for CSR column index vector _ncols = *max_element(_ik.cbegin(), _ik.cend()); // maximal column number ++_ncols; // node numbering: 0 ... nnode-1 //cout << _nrows << " " << _ncols << endl; assert(_ncols == _nrows); double duration = static_cast(clock() - tstart) / CLOCKS_PER_SEC; // ToDo: change to systemclock cout << "finished in " << duration << " sec. ########\n"; return; } void FEM_Matrix::CalculateLaplace_mult(vector &f) { cout << "\n############ FEM_Matrix::CalculateLaplace_mult "; double tstart = omp_get_wtime(); // OpenMP assert(_mesh.NdofsElement() == 3); // only for triangular, linear elements //cout << _nnz << " vs. " << _id[_nrows] << " " << _nrows<< endl; assert(_nnz == _id[_nrows]); for (int k = 0; k < _nrows; ++k) { _sk[k] = 0.0; } double ske[3][3], fe[3]; // Loop over all elements auto const nelem = _mesh.Nelems(); auto const &ia = _mesh.GetConnectivity(); auto const &xc = _mesh.GetCoords(); const vector sd_vec = _mesh.ElementSubdomains; #pragma omp parallel for private(ske,fe) for (int i = 0; i < nelem; ++i) { auto subdomain = sd_vec[i]; double lambda = ThermalConductivity(subdomain); //cout << subdomain << endl; CalcElemSpecific(ia.data() + 3 * i, xc.data(), lambda, ske); //AddElem(ia.data()+3 * i, ske, fe, _id.data(), _ik.data(), _sk.data(), f.data()); // GH: deprecated AddElem_3(ia.data() + 3 * i, ske, fe, f); } double duration = omp_get_wtime() - tstart; // OpenMP cout << "finished in " << duration << " sec. ########\n"; // ToDo: change to systemclock //Debug(); return; } double FEM_Matrix::ThermalConductivity(const int subdomain) { double lambda = 0.0; switch (subdomain) { // ceramic mug case 0: lambda = 3.0; // anything from 1 to 4 break; // water case 1: lambda = 0.6; break; // air case 2: lambda = 0.026; // depends on temperature actually break; default: lambda = 1.0; break; } return lambda; } double FEM_Matrix::VolumetricHeatCapacity(const int subdomain) { double c = 0.0; switch (subdomain) { // ceramic mug case 1: c = 2.0 * 1e6; break; // water case 2: c = 4.184 * 1e6; break; // air case 3: c = 1.2 * 1e3; break; default: c = 1.0; break; } return c; } void FEM_Matrix::AddMass_mult(vector &f) { cout << "\n############ FEM_Matrix::AddMass_mult "; double tstart = omp_get_wtime(); // OpenMP assert(_mesh.NdofsElement() == 3); // only for triangular, linear elements //cout << _nnz << " vs. " << _id[_nrows] << " " << _nrows<< endl; assert(_nnz == _id[_nrows]); double ske[3][3], fe[3]; // Loop over all elements auto const nelem = _mesh.Nelems(); auto const &ia = _mesh.GetConnectivity(); auto const &xc = _mesh.GetCoords(); const vector sd_vec = _mesh.ElementSubdomains; #pragma omp parallel for private(ske,fe) for (int i = 0; i < nelem; ++i) { auto subdomain = sd_vec[i]; double c = VolumetricHeatCapacity(subdomain); //cout << subdomain << endl; CalcElem_MasseSpecific(ia.data() + 3 * i, xc.data(), c, ske); //AddElem(ia.data()+3 * i, ske, fe, _id.data(), _ik.data(), _sk.data(), f.data()); // GH: deprecated AddElem_3(ia.data() + 3 * i, ske, fe, f); } double duration = omp_get_wtime() - tstart; // OpenMP cout << "finished in " << duration << " sec. ########\n"; // ToDo: change to systemclock //Debug(); return; } void FEM_Matrix::CalculateLaplace(vector &f) { cout << "\n############ FEM_Matrix::CalculateLaplace "; //double tstart = clock(); double tstart = omp_get_wtime(); // OpenMP assert(_mesh.NdofsElement() == 3); // only for triangular, linear elements //cout << _nnz << " vs. " << _id[_nrows] << " " << _nrows<< endl; assert(_nnz == _id[_nrows]); for (int k = 0; k < _nrows; ++k) { _sk[k] = 0.0; } for (int k = 0; k < _nrows; ++k) { f[k] = 0.0; } double ske[3][3], fe[3]; // Loop over all elements auto const nelem = _mesh.Nelems(); auto const &ia = _mesh.GetConnectivity(); auto const &xc = _mesh.GetCoords(); #pragma omp parallel for private(ske,fe) for (int i = 0; i < nelem; ++i) { CalcElem(ia.data() + 3 * i, xc.data(), ske, fe); //AddElem(ia.data()+3 * i, ske, fe, _id.data(), _ik.data(), _sk.data(), f.data()); // GH: deprecated AddElem_3(ia.data() + 3 * i, ske, fe, f); } //double duration = (clock() - tstart) / CLOCKS_PER_SEC; double duration = omp_get_wtime() - tstart; // OpenMP cout << "finished in " << duration << " sec. ########\n"; // ToDo: change to systemclock //Debug(); return; } void FEM_Matrix::CalculateRHS(vector &f, const std::function &func) { cout << "\n############ FEM_Matrix::CalculateRHS "; //double tstart = clock(); double tstart = omp_get_wtime(); // OpenMP assert(_mesh.NdofsElement() == 3); // only for triangular, linear elements //cout << _nnz << " vs. " << _id[_nrows] << " " << _nrows<< endl; assert(_nnz == _id[_nrows]); for (int k = 0; k < _nrows; ++k) { f[k] = 0.0; } double fe[3]; // Loop over all elements auto const nelem = _mesh.Nelems(); auto const &ia = _mesh.GetConnectivity(); auto const &xc = _mesh.GetCoords(); #pragma omp parallel for private(fe) for (int i = 0; i < nelem; ++i) { CalcElem_RHS(ia.data() + 3 * i, xc.data(), fe, func); AddElemRHS_3(ia.data() + 3 * i, fe, f); } //double duration = (clock() - tstart) / CLOCKS_PER_SEC; double duration = omp_get_wtime() - tstart; // OpenMP cout << "finished in " << duration << " sec. ########\n"; // ToDo: change to systemclock //Debug(); return; } void FEM_Matrix::ApplyDirichletBC(std::vector const &u, std::vector &f) { auto const idx = _mesh.Index_DirichletNodes(); int const nidx = idx.size(); for (int i = 0; i < nidx; ++i) { int const row = idx[i]; for (int ij = _id[row]; ij < _id[row + 1]; ++ij) { int const col = _ik[ij]; if (col == row) { _sk[ij] = 1.0; f[row] = u[row]; } else { int const id1 = fetch(col, row); // Find entry (col,row) assert(id1 >= 0); f[col] -= _sk[id1] * u[row]; _sk[id1] = 0.0; _sk[ij] = 0.0; } } } return; } void FEM_Matrix::ApplyRobinBC_mult(std::vector &f, const double u_out) { auto const RobinEdges = _mesh.OuterEdges; auto const RobinEdgesSubdomains = _mesh.OuterEdgesSubdomains; auto const BoundaryEdges = _mesh.BoundaryEdges(); auto const BoundaryEdgeNodes = _mesh.BoundaryEdgeNodes(); assert (BoundaryEdgeNodes.size() == 2* BoundaryEdges.size()); vector Coordinates = _mesh.GetCoords(); for (size_t i = 0; i < RobinEdges.size(); ++i) { //cout << "Edge number " << RobinEdges[i] << ", subdomain: " << RobinEdgesSubdomains[i] << " " << endl; double alpha = Heat_transfer_coefficient(RobinEdgesSubdomains[i]); int const EdgeNode1 = BoundaryEdgeNodes[2*i]; int const EdgeNode2 = BoundaryEdgeNodes[2*i + 1]; double x_1 = Coordinates[EdgeNode1]; double y_1 = Coordinates[EdgeNode1 + 1]; double x_2 = Coordinates[EdgeNode2]; double y_2 = Coordinates[EdgeNode2 + 1]; double EdgeLength = sqrt((x_2 - x_1)*(x_2 - x_1) + (y_2 - y_1)*(y_2 - y_1)); int ii = _id[EdgeNode1]; int jj = _id[EdgeNode2]; int ij = fetch(_ik[_id[EdgeNode1]], EdgeNode1); int ji = fetch(_ik[_id[EdgeNode2]], EdgeNode2); _sk[ii] += EdgeLength*alpha/3; _sk[jj] += EdgeLength*alpha/3; _sk[ij] += EdgeLength*alpha/6; _sk[ji] += EdgeLength*alpha/6; f[ii] += EdgeLength*alpha*u_out/2; f[jj] += EdgeLength*alpha*u_out/2; } return; } double FEM_Matrix::Heat_transfer_coefficient(const int subdomain) { int matlab_sd_index = subdomain - 1; double alpha = 0.0; switch (matlab_sd_index) { // outside case 0: alpha = 10.0; break; // ceramic case 1: alpha = 1.0; break; // water case 2: alpha = 500.0; break; // air case 3: alpha = 10.0; break; default: alpha = 1.0; break; } return alpha; } void FEM_Matrix::AddElem_3(int const ial[3], double const ske[3][3], double const fe[3], vector &f) { for (int i = 0; i < 3; ++i) { const int ii = ial[i]; // row ii (global index) for (int j = 0; j < 3; ++j) { // no symmetry assumed const int jj = ial[j]; // column jj (global index) const int ip = fetch(ii, jj); // find column entry jj in row ii #ifndef NDEBUG // compiler option -DNDEBUG switches off the check if (ip < 0) { // no entry found !! cout << "Error in AddElem: (" << ii << "," << jj << ") [" << ial[0] << "," << ial[1] << "," << ial[2] << "]\n"; assert(ip >= 0); } #endif #pragma omp atomic _sk[ip] += ske[i][j]; } #pragma omp atomic f[ii] += fe[i]; } } void FEM_Matrix::AddElemRHS_3(int const ial[3], double const fe[3], vector &f) { for (int i = 0; i < 3; ++i) { const int ii = ial[i]; // row ii (global index) #pragma omp atomic f[ii] += fe[i]; } } bool CRS_Matrix::CheckSymmetry() const { cout << "+++ Check matrix symmetry +++" << endl; bool bs{true}; #pragma omp parallel for reduction(&&:bs) for (int row = 0; row < Nrows(); ++row) { for (int ij = _id[row]; ij < _id[row + 1]; ++ij) { const int col = _ik[ij]; // column col (global index) const int ip = fetch(col, row); // find column entry row in row col if (ip < 0) { // no entry found !! cout << "Matrix has non-symmetric pattern at (" << row << "," << col << ")" << endl; bs = false; //assert(ip >= 0); } if ( std::abs(_sk[ij] - _sk[ip]) > 1e-13) { cout << "Matrix has non-symmetric entries at (" << row << "," << col << ")" << endl; bs = false; } } } return bs; } bool CRS_Matrix::CheckRowSum() const { cout << "+++ Check row sum +++" << endl; vector rhs(Ncols(), 1.0); vector res(Nrows()); Mult(res, rhs); bool bb{true}; #pragma omp parallel for reduction(&&:bb) for (size_t k = 0; k < res.size(); ++k) { //if (std::abs(res[k]) != 0.0) if (std::abs(res[k]) > 1e-14) { cout << "!! Nonzero row " << k << " : sum = " << res[k] << endl; bb = false; } } return bb; } bool CRS_Matrix::CheckMproperty() const { cout << "+++ Check M property +++" << endl; bool bm{true}; //#pragma omp parallel for reduction(&&:bm) for (int row = 0; row < Nrows(); ++row) { for (int ij = _id[row]; ij < _id[row + 1]; ++ij) { bool b_diag{true}, b_off{true}; if (_ik[ij] == row) { b_diag = _sk[ij] > 0.0; if (!b_diag) { cout << "## negative diag in row " << row << " : " << _sk[ij] << endl; bm = false; } } else { b_off = _sk[ij] <= 0.0; if (!b_off) { //cout << "!! positive off-diag [" << row << "," << _ik[ij] << "] : " << _sk[ij] << endl; bm = false; } } } } return bm; } bool CRS_Matrix::ForceMproperty() { cout << "+++ Force M property +++" << endl; bool bm{false}; #pragma omp parallel for reduction(&&:bm) for (int row = 0; row < Nrows(); ++row) { double corr{0.0}; int idiag = {-1}; for (int ij = _id[row]; ij < _id[row + 1]; ++ij) { if (_ik[ij] != row && _sk[ij] > 0.0) { corr += _sk[ij]; _sk[ij] = 0.0; bm = true; } if (_ik[ij] == row) { idiag = ij; } } assert(idiag >= 0); _sk[idiag] += corr; } return bm; } bool CRS_Matrix::CheckMatrix() const { bool b0 = CheckSymmetry(); if (!b0) { cout << " !!!! N O S Y M M E T R Y" << endl; } bool b1 = CheckRowSum(); if (!b1) { cout << " !!!! R O W S U M E R R O R" << endl; } bool b2 = CheckMproperty(); if (!b2) { cout << " !!!! N O M - M A T R I X" << endl; } return b1 && b2; } void CRS_Matrix::GetDiag_M(vector &d) const { // be carefull when using a rectangular matrix //int const nm = min(_nrows, _ncols); #pragma omp single assert( min(_nrows, _ncols) == static_cast(d.size()) ); // instead of stopping we could resize d and warn the user #pragma omp for for (int row = 0; row < Nrows(); ++row) { d[row] = 0.0; double v_ii{-1.0}; for (int ij = _id[row]; ij < _id[row + 1]; ++ij) { if (_ik[ij] != row) { d[row] += std::abs(_sk[ij]); } else { v_ii = _sk[ij]; } } if ( d[row] < v_ii ) { d[row] = v_ii; } } #pragma omp master cout << "<<<<<<< GetDiag_M (finished) >>>>>>>>>" << endl; return; } // general routine for lin. triangular elements void CalcElem(int const ial[3], double const xc[], double ske[3][3], double fe[3]) //void CalcElem(const int* __restrict__ ial, const double* __restrict__ xc, double* __restrict__ ske[3], double* __restrict__ fe) { const int i1 = 2 * ial[0], i2 = 2 * ial[1], i3 = 2 * ial[2]; const double x13 = xc[i3 + 0] - xc[i1 + 0], y13 = xc[i3 + 1] - xc[i1 + 1], x21 = xc[i1 + 0] - xc[i2 + 0], y21 = xc[i1 + 1] - xc[i2 + 1], x32 = xc[i2 + 0] - xc[i3 + 0], y32 = xc[i2 + 1] - xc[i3 + 1]; const double jac = fabs(x21 * y13 - x13 * y21); ske[0][0] = 0.5 / jac * (y32 * y32 + x32 * x32); ske[0][1] = 0.5 / jac * (y13 * y32 + x13 * x32); ske[0][2] = 0.5 / jac * (y21 * y32 + x21 * x32); ske[1][0] = ske[0][1]; ske[1][1] = 0.5 / jac * (y13 * y13 + x13 * x13); ske[1][2] = 0.5 / jac * (y21 * y13 + x21 * x13); ske[2][0] = ske[0][2]; ske[2][1] = ske[1][2]; ske[2][2] = 0.5 / jac * (y21 * y21 + x21 * x21); const double xm = (xc[i1 + 0] + xc[i2 + 0] + xc[i3 + 0]) / 3.0, ym = (xc[i1 + 1] + xc[i2 + 1] + xc[i3 + 1]) / 3.0; //fe[0] = fe[1] = fe[2] = 0.5 * jac * FunctF(xm, ym) / 3.0; fe[0] = fe[1] = fe[2] = 0.5 * jac * fNice(xm, ym) / 3.0; } void CalcElemSpecific(int const ial[3], double const xc[], double const lambda, double ske[3][3]) { const int i1 = 2 * ial[0], i2 = 2 * ial[1], i3 = 2 * ial[2]; const double x13 = xc[i3 + 0] - xc[i1 + 0], y13 = xc[i3 + 1] - xc[i1 + 1], x21 = xc[i1 + 0] - xc[i2 + 0], y21 = xc[i1 + 1] - xc[i2 + 1], x32 = xc[i2 + 0] - xc[i3 + 0], y32 = xc[i2 + 1] - xc[i3 + 1]; const double jac = fabs(x21 * y13 - x13 * y21); ske[0][0] = lambda * 0.5 / jac * (y32 * y32 + x32 * x32); ske[0][1] = lambda * 0.5 / jac * (y13 * y32 + x13 * x32); ske[0][2] = lambda * 0.5 / jac * (y21 * y32 + x21 * x32); ske[1][0] = ske[0][1]; ske[1][1] = lambda * 0.5 / jac * (y13 * y13 + x13 * x13); ske[1][2] = lambda * 0.5 / jac * (y21 * y13 + x21 * x13); ske[2][0] = ske[0][2]; ske[2][1] = ske[1][2]; ske[2][2] = lambda * 0.5 / jac * (y21 * y21 + x21 * x21); } void CalcElem_RHS(int const ial[3], double const xc[], double fe[3], const std::function &func) { const int i1 = 2 * ial[0], i2 = 2 * ial[1], i3 = 2 * ial[2]; const double x13 = xc[i3 + 0] - xc[i1 + 0], y13 = xc[i3 + 1] - xc[i1 + 1], x21 = xc[i1 + 0] - xc[i2 + 0], y21 = xc[i1 + 1] - xc[i2 + 1]; //x32 = xc[i2 + 0] - xc[i3 + 0], y32 = xc[i2 + 1] - xc[i3 + 1]; const double jac = fabs(x21 * y13 - x13 * y21); const double xm = (xc[i1 + 0] + xc[i2 + 0] + xc[i3 + 0]) / 3.0, ym = (xc[i1 + 1] + xc[i2 + 1] + xc[i3 + 1]) / 3.0; fe[0] = fe[1] = fe[2] = 0.5 * jac * func(xm, ym) / 3.0; } void CalcElem_Masse(int const ial[3], double const xc[], double ske[3][3]) { const int i1 = 2 * ial[0], i2 = 2 * ial[1], i3 = 2 * ial[2]; const double x13 = xc[i3 + 0] - xc[i1 + 0], y13 = xc[i3 + 1] - xc[i1 + 1], x21 = xc[i1 + 0] - xc[i2 + 0], y21 = xc[i1 + 1] - xc[i2 + 1]; //x32 = xc[i2 + 0] - xc[i3 + 0], y32 = xc[i2 + 1] - xc[i3 + 1]; const double jac = fabs(x21 * y13 - x13 * y21); ske[0][0] += jac / 12.0; ske[0][1] += jac / 24.0; ske[0][2] += jac / 24.0; ske[1][0] += jac / 24.0; ske[1][1] += jac / 12.0; ske[1][2] += jac / 24.0; ske[2][0] += jac / 24.0; ske[2][1] += jac / 24.0; ske[2][2] += jac / 12.0; return; } void CalcElem_MasseSpecific(int const ial[3], double const xc[], double const c, double ske[3][3]) { const int i1 = 2 * ial[0], i2 = 2 * ial[1], i3 = 2 * ial[2]; const double x13 = xc[i3 + 0] - xc[i1 + 0], y13 = xc[i3 + 1] - xc[i1 + 1], x21 = xc[i1 + 0] - xc[i2 + 0], y21 = xc[i1 + 1] - xc[i2 + 1]; //x32 = xc[i2 + 0] - xc[i3 + 0], y32 = xc[i2 + 1] - xc[i3 + 1]; const double jac = fabs(x21 * y13 - x13 * y21); ske[0][0] += c * jac / 12.0; ske[0][1] += c * jac / 24.0; ske[0][2] += c * jac / 24.0; ske[1][0] += c * jac / 24.0; ske[1][1] += c * jac / 12.0; ske[1][2] += c * jac / 24.0; ske[2][0] += c * jac / 24.0; ske[2][1] += c * jac / 24.0; ske[2][2] += c * jac / 12.0; return; } // ##################################################################### BisectInterpolation::BisectInterpolation() : Matrix( 0, 0 ), _iv(), _vv() { } BisectInterpolation::BisectInterpolation(std::vector const &fathers) : Matrix( static_cast(fathers.size()) / 2, 1 + * max_element(fathers.cbegin(), fathers.cend()) ), _iv(fathers), _vv(fathers.size(), 0.5) { } BisectInterpolation::~BisectInterpolation() {} void BisectInterpolation::GetDiag(vector &d) const { assert( Nrows() == static_cast(d.size()) ); for (int k = 0; k < Nrows(); ++k) { if ( _iv[2 * k] == _iv[2 * k + 1] ) { d[k] = 1.0; } else { d[k] = 0.0; } } return; } void BisectInterpolation::Mult(vector &wf, vector const &uc) const { assert( Nrows() == static_cast(wf.size()) ); assert( Ncols() == static_cast(uc.size()) ); //#pragma omp parallel for #pragma omp for for (int k = 0; k < Nrows(); ++k) { wf[k] = _vv[2 * k] * uc[_iv[2 * k]] + _vv[2 * k + 1] * uc[_iv[2 * k + 1]]; } return; } void BisectInterpolation::MultT(vector const &wf, vector &uc) const { assert( Nrows() == static_cast( wf.size()) ); assert( Ncols() == static_cast( uc.size()) ); assert(2*Nrows() == static_cast(_iv.size()) ); assert(2*Nrows() == static_cast(_vv.size()) ); //#pragma omp single //cout << "xxx\n"; #pragma omp for for (int k = 0; k < Ncols(); ++k) uc[k] = 0.0; //#pragma omp single //cout << "yyy\n"; // GH: atomic slows down the code ==> use different storage for MultT operation (CRS-matrix?) //#pragma omp parallel for #pragma omp for for (int k = 0; k < Nrows(); ++k) { int const j1=_iv[2 * k ]; int const j2=_iv[2 * k + 1]; //#pragma omp critical //cout << uc.size() << " " << j1 << " " << j2 << "\n"; #pragma omp atomic uc[j1] += _vv[2 * k ] * wf[k]; //#pragma omp critical //cout << " aa\n"; #pragma omp atomic uc[j2] += _vv[2 * k + 1] * wf[k]; } //#pragma omp single //cout << "zzz\n"; return; } void BisectInterpolation::MultT_Full(vector const &wf, vector &uc) const { assert( Nrows() == static_cast(wf.size()) ); assert( Ncols() == static_cast(uc.size()) ); // GH: atomic slows down the code ==> use different storage for MultT operation (CRS-matrix?) ////#pragma omp parallel for for (int k = 0; k < Ncols(); ++k) uc[k] = 0.0; vector full(uc.size(),0.0); //#pragma omp parallel for for (int k = 0; k < Nrows(); ++k) { if (_iv[2 * k] != _iv[2 * k + 1]) { //#pragma omp atomic uc[_iv[2 * k] ] += _vv[2 * k ] * wf[k]; //#pragma omp atomic uc[_iv[2 * k + 1]] += _vv[2 * k + 1] * wf[k]; full[_iv[2 * k ]] += _vv[2 * k ]; full[_iv[2 * k + 1]] += _vv[2 * k + 1]; } else { //#pragma omp atomic uc[_iv[2 * k] ] += 2.0*_vv[2 * k ] * wf[k]; // uses a property of class BisectInterpolation //uc[_iv[2 * k] ] += _vv[2 * k ] * wf[k]; // uses a property of class BisectInterpolation full[_iv[2 * k] ] += 2.0*_vv[2 * k ]; } } for (size_t k=0; k &w, vector const &f, vector const &u) const { assert( Nrows() == static_cast(w.size()) ); assert( Ncols() == static_cast(u.size()) ); assert( w.size() == f.size() ); for (int k = 0; k < Nrows(); ++k) { w[k] = f[k] - _vv[2 * k] * u[_iv[2 * k]] + _vv[2 * k + 1] * u[_iv[2 * k + 1]]; } return; } void BisectInterpolation::Debug() const { for (int k = 0; k < Nrows(); ++k) { cout << k << " : fathers(" << _iv[2 * k] << "," << _iv[2 * k + 1] << ") "; cout << "weights(" << _vv[2 * k] << "," << _vv[2 * k + 1] << endl; } cout << endl; return; } int BisectInterpolation::fetch(int row, int col) const { int idx(-1); if (_iv[2 * row ] == col) idx = 2 * row; if (_iv[2 * row + 1] == col) idx = 2 * row + 1; assert(idx >= 0); return idx; } // ##################################################################### //BisectIntDirichlet::BisectIntDirichlet(std::vector const &fathers, std::vector const &idxc_dir) //: BisectInterpolation(fathers) //{ //vector bdir(Ncols(), false); // Indicator for Dirichlet coarse nodes //for (size_t kc = 0; kc < idxc_dir.size(); ++kc) { //bdir.at(idxc_dir[kc]) = true; // Mark Dirichlet node from coarse mesh //} //for (size_t j = 0; j < _iv.size(); ++j) { //if ( bdir.at(_iv[j]) ) _vv[j] = 0.0; // set weight to zero iff (at least) one father is Dirichlet node //} //return; //} BisectIntDirichlet::BisectIntDirichlet(std::vector const &fathers, std::vector idxc_dir) : BisectInterpolation(fathers), _idxDir(std::move(idxc_dir)) { //vector bdir(Ncols(), false); // Indicator for Dirichlet coarse nodes //for (size_t kc = 0; kc < idxc_dir.size(); ++kc) { //bdir.at(idxc_dir[kc]) = true; // Mark Dirichlet node from coarse mesh //} //for (size_t j = 0; j < _iv.size(); ++j) { //if ( bdir.at(_iv[j]) ) _vv[j] = 0.0; // set weight to zero iff (at least) one father is Dirichlet node //} return; } BisectIntDirichlet::~BisectIntDirichlet() {} void BisectIntDirichlet::MultT(vector const &wf, vector &uc) const { BisectInterpolation::MultT(wf, uc); for (int kc : _idxDir) { uc.at(kc) = 0.0; // Set Dirichlet node on coarse mesh to Zero } return; } // ##################################################################### void DefectRestrict(CRS_Matrix const &SK, BisectInterpolation const &P, vector &fc, vector &ff, vector &uf) { assert( P.Nrows() == static_cast(ff.size()) ); assert( P.Ncols() == static_cast(fc.size()) ); assert( ff.size() == uf.size() ); assert( P.Nrows() == SK.Nrows() ); //#pragma omp parallel for #pragma omp for for (int k = 0; k < P.Ncols(); ++k) fc[k] = 0.0; // GH: atomic slows down the code ==> use different storage for MultT operation (CRS-matrix?) //#pragma omp parallel for #pragma omp for for (int row = 0; row < SK._nrows; ++row) { double wi = ff[row]; for (int ij = SK._id[row]; ij < SK._id[row + 1]; ++ij) { wi -= SK._sk[ij] * uf[ SK._ik[ij] ]; } const int i1 = P._iv[2 * row]; const int i2 = P._iv[2 * row + 1]; //if (i1 != i2) { #pragma omp atomic fc[i1] += P._vv[2 * row ] * wi; #pragma omp atomic fc[i2] += P._vv[2 * row + 1] * wi; } //else { //#pragma omp atomic //fc[i1] += 2.0 * P._vv[2 * row ] * wi; // uses a property of class BisectInterpolation //} } return; }