// HOW TO COMPILE ON MAC // export CPPFLAGS="-I/opt/homebrew/opt/libomp/include" // export LDFLAGS="-L/opt/homebrew/opt/libomp/lib" // clang++ -std=c++17 -O3 -Xpreprocessor -fopenmp $CPPFLAGS main.cpp bench_funcs.cpp $LDFLAGS -lomp -o Ex4 // ./Ex4 #include #include #include #include #include #include "bench_funcs.h" #include using namespace std; using namespace std::chrono; void gen_vector_x_y(size_t N, vector& x, vector& y) { x.resize(N); y.resize(N); for (size_t i = 0; i < N; ++i) { x[i] = (i % 219) + 1; y[i] = 1.0 / x[i]; } } void gen_matrix_A(size_t M, size_t N, vector& A) { A.resize(M * N); for (size_t i = 0; i < M; ++i) for (size_t j = 0; j < N; ++j) A[i * N + j] = ((i + j) % 219) + 1; } high_resolution_clock::time_point tic_timer; void tic() { tic_timer = high_resolution_clock::now(); } double toc() { auto t1 = high_resolution_clock::now(); duration elapsed = t1 - tic_timer; return elapsed.count(); } // CHANGE THE flag VARIABLE IN main() TO CHOOSE A, B, C, D. int main() { size_t N = 1'000'000; size_t M = 3000, L = 3000; // Check OpenMP threads #pragma omp parallel { #pragma omp single cout << "Using " << omp_get_num_threads() << " OpenMP threads\n"; } // Show menu cout << "Choose an option:\n"; cout << " 1) SUM + DOT PRODUCT\n"; cout << " 2) MATRIX-VECTOR PRODUCT\n"; cout << " 3) MATRIX-MATRIX PRODUCT\n"; cout << " 4) POLYNOMIAL EVALUATION\n"; cout << "Enter your choice (1-4): "; int flag; cin >> flag; // validate input if(flag < 1 || flag > 4){ cout << "Invalid choice. Exiting.\n"; return 1; } // A) SUM + DOT PRODUCT (parallel) if (flag == 1) { vector x, y; gen_vector_x_y(N, x, y); cout << "Running parallel SUM" << endl; tic(); double ssum = sum_basic(x); (void)ssum; double dt_sum = toc(); cout << " sum time = " << dt_sum << " s\n"; cout << "Running parallel DOT PRODUCT" << endl; tic(); double sdot = dot_basic(x, y); (void)sdot; double dt_dot = toc(); cout << " dot time = " << dt_dot << " s\n"; double flops_dot = 2.0 * N; double gflops_dot = (flops_dot / dt_dot) / 1e9; cout << "A: N=" << N << "\n"; cout << " SUM time=" << dt_sum << " s\n"; cout << " DOT time=" << dt_dot << " s GFLOPS=" << gflops_dot << "\n"; } // B) MATRIX–VECTOR PRODUCT (parallel) else if (flag == 2) { size_t m = M, n = 5000; vector A, x, b; gen_matrix_A(m, n, A); x.resize(n); for (size_t j = 0; j < n; ++j) x[j] = 1.0 / (((17 + j) % 219) + 1); cout << "Running Matrix times vector (parallel)\n"; tic(); matvec_rowmajor(A, m, n, x, b); double dt = toc(); double flops = 2.0 * m * n; double gflops = (flops / dt) / 1e9; cout << "B: M=" << m << " N=" << n << " time=" << dt << " s GFLOPS=" << gflops << endl; } // C) MATRIX–MATRIX PRODUCT (parallel) else if (flag == 3) { size_t m = M, l = L, n = 500; vector A, B, C; gen_matrix_A(m, l, A); gen_matrix_A(l, n, B); cout << "Running Multiplication of matrices (parallel)\n"; tic(); matmul_rowmajor(A, m, l, B, n, C); double dt = toc(); double flops = 2.0 * m * l * n; double gflops = (flops / dt) / 1e9; cout << "C: M=" << m << " L=" << l << " N=" << n << " time=" << dt << " s GFLOPS=" << gflops << endl; } // D) POLYNOMIAL EVALUATION (parallel Horner) else if (flag == 4) { size_t p = 100; // degree vector a(p+1), x(N), y; for (size_t k = 0; k <= p; ++k) a[k] = 1.0 / (k+1); for (size_t i = 0; i < N; ++i) x[i] = (i % 219) * 0.001 + 1.0; cout << "Running polynomial function (parallel)\n"; tic(); polyp_horner(a, x, y); double dt = toc(); double flops = 2.0 * p * N; double gflops = (flops / dt) / 1e9; cout << "D: p=" << p << " N=" << N << " time=" << dt << " s GFLOPS=" << gflops << endl; } else { cout << "Invalid flag — choose 1, 2, 3, or 4.\n"; } cout << "\nDone \n"; return 0; }