69 lines
2.2 KiB
C++
69 lines
2.2 KiB
C++
// HOW TO COMPILE ON MAC (paste these lines in terminal):
|
|
// export CPPFLAGS="-I/opt/homebrew/opt/libomp/include"
|
|
// export LDFLAGS="-L/opt/homebrew/opt/libomp/lib"
|
|
// clang++ -std=c++17 -O3 -Xpreprocessor -fopenmp $CPPFLAGS mainEx6.cpp bench_funcs.cpp $LDFLAGS -lomp -o Ex6
|
|
// ./Ex6
|
|
//
|
|
// You can set OMP_NUM_THREADS in the shell to control threads.
|
|
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <chrono>
|
|
#include <omp.h>
|
|
#include "bench_funcs.h"
|
|
|
|
using namespace std;
|
|
using namespace std::chrono;
|
|
|
|
int main()
|
|
{
|
|
size_t n = 200000;
|
|
size_t maxit = 5000;
|
|
double omega = 1.0;
|
|
double tol = 1e-8;
|
|
|
|
// show threads
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp single
|
|
cout << "Using " << omp_get_num_threads() << " OpenMP threads\n";
|
|
}
|
|
|
|
CSR K_atomic, K_noatom;
|
|
vector<double> f;
|
|
|
|
cout << "\n--- Build with ATOMICS (parallel over elements with atomic adds) ---\n";
|
|
auto t0 = high_resolution_clock::now();
|
|
build_fem_system_atomic(n, K_atomic, f);
|
|
auto t1 = high_resolution_clock::now();
|
|
cout << "Assembly (atomic) time = " << duration<double>(t1 - t0).count() << " s\n";
|
|
|
|
cout << "\nBuild with NO ATOMICS (grouped / per-row accumulation)\n";
|
|
t0 = high_resolution_clock::now();
|
|
build_fem_system_no_atomic(n, K_noatom, f);
|
|
t1 = high_resolution_clock::now();
|
|
cout << "Assembly (no-atomic) time = " << duration<double>(t1 - t0).count() << " s\n";
|
|
|
|
// quick check: compare val arrays (they should be equal)
|
|
bool same = true;
|
|
if (K_atomic.val.size() == K_noatom.val.size()) {
|
|
for (size_t i = 0; i < K_atomic.val.size(); ++i) {
|
|
if (std::fabs(K_atomic.val[i] - K_noatom.val[i]) > 1e-12) {
|
|
same = false; break;
|
|
}
|
|
}
|
|
} else same = false;
|
|
|
|
cout << "\nMatrix equality check (atomic vs no-atomic): " << (same ? "OK" : "DIFFER") << "\n";
|
|
|
|
// Run Jacobi solver on the no-atomic assembled matrix
|
|
cout << "\nRun Jacobi on NO-ATOMIC matrix\n";
|
|
vector<double> u;
|
|
t0 = high_resolution_clock::now();
|
|
jacobi_csr_parallel(K_noatom, f, u, maxit, omega, tol);
|
|
t1 = high_resolution_clock::now();
|
|
cout << "Jacobi time = " << duration<double>(t1 - t0).count() << " s\n";
|
|
|
|
cout << "\nDone.\n";
|
|
return 0;
|
|
}
|