168 lines
4.5 KiB
C++
168 lines
4.5 KiB
C++
// HOW TO COMPILE ON MAC
|
||
// export CPPFLAGS="-I/opt/homebrew/opt/libomp/include"
|
||
// export LDFLAGS="-L/opt/homebrew/opt/libomp/lib"
|
||
// clang++ -std=c++17 -O3 -Xpreprocessor -fopenmp $CPPFLAGS main.cpp bench_funcs.cpp $LDFLAGS -lomp -o Ex4
|
||
// ./Ex4
|
||
|
||
|
||
#include <iostream>
|
||
#include <vector>
|
||
#include <cmath>
|
||
#include <iomanip>
|
||
#include <chrono>
|
||
#include "bench_funcs.h"
|
||
#include <omp.h>
|
||
|
||
using namespace std;
|
||
using namespace std::chrono;
|
||
|
||
void gen_vector_x_y(size_t N, vector<double>& x, vector<double>& y) {
|
||
x.resize(N);
|
||
y.resize(N);
|
||
for (size_t i = 0; i < N; ++i) {
|
||
x[i] = (i % 219) + 1;
|
||
y[i] = 1.0 / x[i];
|
||
}
|
||
}
|
||
|
||
void gen_matrix_A(size_t M, size_t N, vector<double>& A) {
|
||
A.resize(M * N);
|
||
for (size_t i = 0; i < M; ++i)
|
||
for (size_t j = 0; j < N; ++j)
|
||
A[i * N + j] = ((i + j) % 219) + 1;
|
||
}
|
||
|
||
high_resolution_clock::time_point tic_timer;
|
||
void tic() { tic_timer = high_resolution_clock::now(); }
|
||
double toc() {
|
||
auto t1 = high_resolution_clock::now();
|
||
duration<double> elapsed = t1 - tic_timer;
|
||
return elapsed.count();
|
||
}
|
||
|
||
// CHANGE THE flag VARIABLE IN main() TO CHOOSE A, B, C, D.
|
||
int main() {
|
||
size_t N = 1'000'000;
|
||
size_t M = 3000, L = 3000;
|
||
|
||
// Check OpenMP threads
|
||
#pragma omp parallel
|
||
{
|
||
#pragma omp single
|
||
cout << "Using " << omp_get_num_threads() << " OpenMP threads\n";
|
||
}
|
||
|
||
// Show menu
|
||
cout << "Choose an option:\n";
|
||
cout << " 1) SUM + DOT PRODUCT\n";
|
||
cout << " 2) MATRIX-VECTOR PRODUCT\n";
|
||
cout << " 3) MATRIX-MATRIX PRODUCT\n";
|
||
cout << " 4) POLYNOMIAL EVALUATION\n";
|
||
cout << "Enter your choice (1-4): ";
|
||
|
||
int flag;
|
||
cin >> flag;
|
||
|
||
// validate input
|
||
if(flag < 1 || flag > 4){
|
||
cout << "Invalid choice. Exiting.\n";
|
||
return 1;
|
||
}
|
||
|
||
// A) SUM + DOT PRODUCT (parallel)
|
||
if (flag == 1) {
|
||
vector<double> x, y;
|
||
gen_vector_x_y(N, x, y);
|
||
|
||
cout << "Running parallel SUM" << endl;
|
||
tic();
|
||
double ssum = sum_basic(x); (void)ssum;
|
||
double dt_sum = toc();
|
||
cout << " sum time = " << dt_sum << " s\n";
|
||
|
||
cout << "Running parallel DOT PRODUCT" << endl;
|
||
tic();
|
||
double sdot = dot_basic(x, y); (void)sdot;
|
||
double dt_dot = toc();
|
||
cout << " dot time = " << dt_dot << " s\n";
|
||
|
||
double flops_dot = 2.0 * N;
|
||
double gflops_dot = (flops_dot / dt_dot) / 1e9;
|
||
|
||
cout << "A: N=" << N << "\n";
|
||
cout << " SUM time=" << dt_sum << " s\n";
|
||
cout << " DOT time=" << dt_dot << " s GFLOPS=" << gflops_dot << "\n";
|
||
}
|
||
|
||
// B) MATRIX–VECTOR PRODUCT (parallel)
|
||
else if (flag == 2) {
|
||
size_t m = M, n = 5000;
|
||
vector<double> A, x, b;
|
||
|
||
gen_matrix_A(m, n, A);
|
||
x.resize(n);
|
||
for (size_t j = 0; j < n; ++j)
|
||
x[j] = 1.0 / (((17 + j) % 219) + 1);
|
||
|
||
cout << "Running Matrix times vector (parallel)\n";
|
||
tic();
|
||
matvec_rowmajor(A, m, n, x, b);
|
||
double dt = toc();
|
||
|
||
double flops = 2.0 * m * n;
|
||
double gflops = (flops / dt) / 1e9;
|
||
|
||
cout << "B: M=" << m << " N=" << n
|
||
<< " time=" << dt << " s GFLOPS=" << gflops << endl;
|
||
}
|
||
|
||
// C) MATRIX–MATRIX PRODUCT (parallel)
|
||
else if (flag == 3) {
|
||
size_t m = M, l = L, n = 500;
|
||
vector<double> A, B, C;
|
||
|
||
gen_matrix_A(m, l, A);
|
||
gen_matrix_A(l, n, B);
|
||
|
||
cout << "Running Multiplication of matrices (parallel)\n";
|
||
tic();
|
||
matmul_rowmajor(A, m, l, B, n, C);
|
||
double dt = toc();
|
||
|
||
double flops = 2.0 * m * l * n;
|
||
double gflops = (flops / dt) / 1e9;
|
||
|
||
cout << "C: M=" << m << " L=" << l << " N=" << n
|
||
<< " time=" << dt << " s GFLOPS=" << gflops << endl;
|
||
}
|
||
|
||
// D) POLYNOMIAL EVALUATION (parallel Horner)
|
||
else if (flag == 4) {
|
||
size_t p = 100; // degree
|
||
vector<double> a(p+1), x(N), y;
|
||
|
||
for (size_t k = 0; k <= p; ++k)
|
||
a[k] = 1.0 / (k+1);
|
||
|
||
for (size_t i = 0; i < N; ++i)
|
||
x[i] = (i % 219) * 0.001 + 1.0;
|
||
|
||
cout << "Running polynomial function (parallel)\n";
|
||
tic();
|
||
polyp_horner(a, x, y);
|
||
double dt = toc();
|
||
|
||
double flops = 2.0 * p * N;
|
||
double gflops = (flops / dt) / 1e9;
|
||
|
||
cout << "D: p=" << p << " N=" << N
|
||
<< " time=" << dt << " s GFLOPS=" << gflops << endl;
|
||
}
|
||
|
||
else {
|
||
cout << "Invalid flag — choose 1, 2, 3, or 4.\n";
|
||
}
|
||
|
||
cout << "\nDone \n";
|
||
return 0;
|
||
}
|