// Exercise 5 – Parallelization of Exercise 3 //HOW TO RUN IT ON MY MAC: //cd ~/Desktop/jacobi_oo_stl/jacobi_oo_stl //export CPPFLAGS="-I/opt/homebrew/opt/libomp/include" //export LDFLAGS="-L/opt/homebrew/opt/libomp/lib" //export PATH="/opt/homebrew/opt/llvm/bin:$PATH" //clang++ -std=c++17 -O3 \ -Xpreprocessor -fopenmp \ $CPPFLAGS \ main.cpp \ jacsolve.cpp \ getmatrix.cpp \ geom.cpp \ vdop.cpp \ userset.cpp \ $LDFLAGS -lomp \ -o Ex5 //./Ex5 #include "geom.h" #include "getmatrix.h" //in here it is defined the parallel function #include "jacsolve.h" //in here it is defined the parallel function #include "userset.h" #include "vdop.h" #include #include #include #include using namespace std; using namespace std::chrono; int main(int, char **) { // ------------------------------------------------------------ // OpenMP info // ------------------------------------------------------------ #pragma omp parallel { #pragma omp single cout << "Using " << omp_get_num_threads() << " OpenMP threads\n\n"; } // ------------------------------------------------------------ // Domain decomposition (MPI-style layout kept, MPI=1) // ------------------------------------------------------------ const int numprocs = 1; const int myrank = 0; if (myrank == 0) cout << "There are " << numprocs << " processes running.\n\n"; const int procx = static_cast(sqrt(numprocs + 0.0)); const int procy = procx; if (procx * procy != numprocs) { cout << "Wrong number of processors!\n"; return 1; } // ------------------------------------------------------------ // Problem size // ------------------------------------------------------------ int nx = 100; int ny = 100; int NXglob = nx * procx; int NYglob = ny * procy; cout << "Intervals: " << NXglob << " x " << NYglob << "\n"; // ##################### STL ########################################### { Mesh_2d_3_square const mesh(nx, ny); //mesh.Debug(); CRS_Matrix SK(mesh); // sparse FEM matrix //SK.Debug(); vector u(SK.Nrows(), 0.0); // solution vector f(SK.Nrows(), 0.0); // RHS // Parallel FEM matrix assembly auto t0 = high_resolution_clock::now(); SK.CalculateLaplace_omp(f); // PARALLEL assembly //SK.Debug(); auto t1 = high_resolution_clock::now(); cout << "FEM assembly time (parallel): " << duration(t1 - t0).count() << " s\n"; // Initial condition mesh.SetValues(u, [](double x, double y) -> double {return 0.0 * x *y;} ); // lambda function // Apply Dirichlet BC SK.ApplyDirichletBC(u, f); //SK.Debug(); // Parallel Jacobi solver t0 = high_resolution_clock::now(); JacobiSolve_omp(SK, f, u); // PARALLEL Jacobi t1 = high_resolution_clock::now(); cout << "JacobiSolve time (parallel): " << duration(t1 - t0).count() << " s\n"; } // ============================================================ { Mesh_2d_3_matlab const mesh("square_100.txt"); CRS_Matrix SK(mesh); //SK.Debug(); vector u(SK.Nrows(), 0.0); vector f(SK.Nrows(), 0.0); auto t0 = high_resolution_clock::now(); SK.CalculateLaplace_omp(f); // PARALLEL assembly //SK.Debug(); auto t1 = high_resolution_clock::now(); cout << "FEM assembly time (parallel, matlab mesh): " << duration(t1 - t0).count() << " s\n"; mesh.SetValues(u, [](double x, double y) -> double {return 0.0 * x *y;} ); // lambda function SK.ApplyDirichletBC(u, f); //SK.Debug(); t0 = high_resolution_clock::now(); JacobiSolve_omp(SK, f, u); // PARALLEL Jacobi t1 = high_resolution_clock::now(); cout << "JacobiSolve time (parallel, matlab mesh): " << duration(t1 - t0).count() << " s\n"; } return 0; }