diff --git a/Sheet3/bench_funcs_blas.cpp b/Sheet3/bench_funcs_blas.cpp new file mode 100644 index 0000000..deaa719 --- /dev/null +++ b/Sheet3/bench_funcs_blas.cpp @@ -0,0 +1,27 @@ +#include "bench_funcs_blas.h" +#include + +double dot_cblas(const std::vector& x, const std::vector& y) { + return cblas_ddot((int)x.size(), x.data(), 1, y.data(), 1); +} + +void matvec_cblas(const std::vector& A, std::size_t M, std::size_t N, + const std::vector& x, std::vector& b) { + b.resize(M); + cblas_dgemv(CblasRowMajor, CblasNoTrans, + (int)M, (int)N, + 1.0, A.data(), (int)N, + x.data(), 1, + 0.0, b.data(), 1); +} + +void matmul_cblas(const std::vector& A, std::size_t M, std::size_t L, + const std::vector& B, std::size_t N, + std::vector& C) { + C.resize(M * N); + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + (int)M, (int)N, (int)L, + 1.0, A.data(), (int)L, + B.data(), (int)N, + 0.0, C.data(), (int)N); +} diff --git a/Sheet3/bench_funcs_blas.h b/Sheet3/bench_funcs_blas.h new file mode 100644 index 0000000..e835be6 --- /dev/null +++ b/Sheet3/bench_funcs_blas.h @@ -0,0 +1,20 @@ +#ifndef BENCH_FUNCS_BLAS_H +#define BENCH_FUNCS_BLAS_H + +#include + +// ===== BLAS-based benchmark functions ===== + +// (A2) cBLAS dot product +double dot_cblas(const std::vector& x, const std::vector& y); + +// (B2) cBLAS matrix–vector product +void matvec_cblas(const std::vector& A, std::size_t M, std::size_t N, + const std::vector& x, std::vector& b); + +// (C2) cBLAS matrix–matrix product +void matmul_cblas(const std::vector& A, std::size_t M, std::size_t L, + const std::vector& B, std::size_t N, + std::vector& C); + +#endif diff --git a/Sheet3/main_blas.cpp b/Sheet3/main_blas.cpp new file mode 100644 index 0000000..5d7bff1 --- /dev/null +++ b/Sheet3/main_blas.cpp @@ -0,0 +1,108 @@ +#include +#include +#include +#include +#include +#include "bench_funcs_blas.h" + +using namespace std; +using namespace std::chrono; + +void gen_vector_x_y(std::size_t N, std::vector& x, std::vector& y) { + x.resize(N); + y.resize(N); + for (std::size_t i = 0; i < N; ++i) { + x[i] = static_cast((i % 219) + 1); // xi := (i mod 219) + 1 + y[i] = 1.0 / x[i]; // yi := 1/xi + } +} + +void gen_matrix_A(std::size_t M, std::size_t N, std::vector& A) { + A.resize(M * N); + for (std::size_t i = 0; i < M; ++i) { + for (std::size_t j = 0; j < N; ++j) { + A[i * N + j] = static_cast(((i + j) % 219) + 1); + } + } +} + +high_resolution_clock::time_point tic_timer; +void tic() { tic_timer = high_resolution_clock::now(); } +double toc() { + auto t1 = high_resolution_clock::now(); + duration elapsed = t1 - tic_timer; + return elapsed.count(); +} + +//CHANGE FLAG BASED ON WHAT YOU WANT TO DO +int main() { + cout << fixed << setprecision(6); + int flag = 2; // 1=A2 (dot), 2=B2 (matvec), 3=C2 (matmul) + + if (flag == 1) {// A2) DOT via cBLAS + size_t N = 5000000; + vector x, y; + gen_vector_x_y(N, x, y); + cout << "Running cBLAS dot (A2)\n"; + tic(); + double s = dot_cblas(x, y); + double dt = toc(); + + double flops = 2.0 * N; + double gflops = (flops / dt) / 1e9; + double traffic_bytes = 2.0 * N * sizeof(double); + double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0); + + cout << "A2 (BLAS): N=" << N << " time=" << dt + << " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n"; + } + + else if (flag == 2) { // B2) GEMV via cBLAS + size_t M = 10000, N = 10000; + vector A, x, b; + gen_matrix_A(M, N, A); + x.resize(N); + for (size_t j = 0; j < N; ++j) + x[j] = 1.0 / (((17 + j) % 219) + 1); + + cout << "Running cBLAS matvec (B2)\n"; + tic(); + matvec_cblas(A, M, N, x, b); + double dt = toc(); + + double flops = 2.0 * M * N; + double gflops = (flops / dt) / 1e9; + double traffic_bytes = (M * N + N + M) * sizeof(double); + double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0); + + cout << "B2 (BLAS): M=" << M << " N=" << N << " time=" << dt + << " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n"; + } + + else if (flag == 3) { // C2) GEMM via cBLAS + size_t M = 500, L = 500, N = 500; + vector A, B, C; + gen_matrix_A(M, L, A); + gen_matrix_A(L, N, B); + + cout << "Running cBLAS matmul (C2)\n"; + tic(); + matmul_cblas(A, M, L, B, N, C); + double dt = toc(); + + double flops = 2.0 * M * L * N; + double gflops = (flops / dt) / 1e9; + double traffic_bytes = (M * L + L * N + M * N) * sizeof(double); + double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0); + + cout << "C2 (BLAS): M=" << M << " L=" << L << " N=" << N << " time=" << dt + << " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n"; + } + + else { + cout << "Invalid flag. Choose 1–3.\n"; + } + + cout << "\nDone\n"; + return 0; +}