// bench_funcs.cpp #include "bench_funcs.h" #include #include #include #include #include // A: parallel sum double sum_basic(const std::vector& x) { double sum = 0.0; #pragma omp parallel for reduction(+:sum) for (std::size_t i = 0; i < x.size(); ++i) sum += x[i]; return sum; } // A: parallel dot product double dot_basic(const std::vector& x, const std::vector& y) { std::size_t N = x.size(); double sum = 0.0; #pragma omp parallel for reduction(+:sum) for (std::size_t i = 0; i < N; ++i) sum += x[i] * y[i]; return sum; } // Kahan remains same double dot_kahan(const std::vector& x, const std::vector& y) { double sum = 0.0; double c = 0.0; for (std::size_t i = 0; i < x.size(); ++i) { double prod = x[i] * y[i]; double yk = prod - c; double t = sum + yk; c = (t - sum) - yk; sum = t; } return sum; } // Norm (sum of squares) double norm_basic(const std::vector& x) { double sumsq = 0.0; #pragma omp parallel for reduction(+:sumsq) for (std::size_t i = 0; i < x.size(); ++i) sumsq += x[i]*x[i]; return sumsq; } // B: matvec (row-major) void matvec_rowmajor(const std::vector& A, std::size_t M, std::size_t N, const std::vector& x, std::vector& b) { b.assign(M, 0.0); #pragma omp parallel for for (std::size_t i = 0; i < M; ++i) { double tmp = 0.0; const double* Ai = &A[i*N]; for (std::size_t j = 0; j < N; ++j) tmp += Ai[j] * x[j]; b[i] = tmp; } } // C: matmul (row-major) void matmul_rowmajor(const std::vector& A, std::size_t M, std::size_t L, const std::vector& B, std::size_t N, std::vector& C) { C.assign(M * N, 0.0); // Parallelize over output rows (i) #pragma omp parallel for for (std::size_t i = 0; i < M; ++i) { for (std::size_t k = 0; k < L; ++k) { double Aik = A[i*L + k]; const double* Bk = &B[k*N]; double* Ci = &C[i*N]; for (std::size_t j = 0; j < N; ++j) { Ci[j] += Aik * Bk[j]; } } } } // D: polynomial evaluation (Horner), parallel over x points void polyp_horner(const std::vector& a, const std::vector& x, std::vector& y) { std::size_t p = a.size() - 1; std::size_t N = x.size(); y.assign(N, 0.0); #pragma omp parallel for for (std::size_t i = 0; i < N; ++i) { double xi = x[i]; double val = a[p]; for (std::size_t k = p; k-- > 0; ) { val = val * xi + a[k]; } y[i] = val; } }