//HOW TO RUN IT ON MY TERMINAL (MAC) //export LDFLAGS="-L/opt/homebrew/opt/libomp/lib" //export CPPFLAGS="-I/opt/homebrew/opt/libomp/include" //clang++ -std=c++17 -O2 -Xpreprocessor -fopenmp $CPPFLAGS main.cpp mylib.cpp -L/opt/homebrew/opt/libomp/lib -lomp -o dotprod //./dotprod #include "check_env.h" #include "mylib.h" #include // atoi() #include // strncmp() #include #include #include // OpenMP #include #include using namespace std; int main(int argc, char const *argv[]) { unsigned int N = 5000001; // smaller default for testing int const NLOOPS = 5; // smaller loop for quick test //########################################################################## // Read Parameter from command line cout << "=== Checking command line parameters ===" << endl; for (int i = 1; i < argc; i++) { string ss(argv[i]); if ("-n"==ss && i + 1 < argc) N = static_cast(atoi(argv[i + 1])); else cout << "Corect call: " << argv[0] << " -n \n"; } cout << "\nVector size N = " << N << endl; //########################################################################## check_env(argc, argv); //########################################################################## cout << "\n=== Starting OpenMP test ===" << endl; int nthreads; #pragma omp parallel default(none) shared(cout,nthreads) { int const th_id = omp_get_thread_num(); int const nthrds = omp_get_num_threads(); #pragma omp critical cout << "Hello from thread " << th_id << " out of " << nthrds << endl; #pragma omp master nthreads = nthrds; } cout << "Total threads started: " << nthreads << endl; //########################################################################## cout << "\n=== Memory allocation ===" << endl; vector x(N), y(N); cout << "Allocated memory for vectors x and y (" << N << " doubles each)" << endl; //########################################################################## cout << "\n=== Data initialization ===" << endl; for (unsigned int i = 0; i < N; ++i) { x[i] = i + 1; y[i] = 1.0 / x[i]; } cout << "Vectors initialized: x[i] = i+1, y[i] = 1/x[i]" << endl; //########################################################################## cout << "\n=== Start Benchmarking inner product ===" << endl; double tstart = omp_get_wtime(); double sk(0.0); for (int i = 0; i < NLOOPS; ++i) { sk = scalar(x, y); sk = scalar_trans(x, y); } double t1 = (omp_get_wtime() - tstart) / NLOOPS; cout << " = " << sk << endl; if (static_cast(sk) != N) cout << "!! WRONG result !!" << endl; cout << "Timing (average per loop) in sec: " << t1 << endl; cout << "GFLOPS: " << 2.0 * N / t1 / 1024 / 1024 / 1024 << endl; cout << "Memory throughput (GiByte/s): " << 2.0 * N / t1 / 1024 / 1024 / 1024 * sizeof(x[0]) << endl; //########################################################################## cout << "\n=== Testing reduction_vec (combining vectors across threads) ===" << endl; cout << "Each thread initializes a vector of size 100; then vectors are combined." << endl; auto vr = reduction_vec(100); cout << "Resulting combined vector:" << endl; cout << vr << endl; cout << "\n=== Program finished ===" << endl; return 0; }