#include #include #include #include #include #include "bench_funcs_blas.h" using namespace std; using namespace std::chrono; void gen_vector_x_y(std::size_t N, std::vector& x, std::vector& y) { x.resize(N); y.resize(N); for (std::size_t i = 0; i < N; ++i) { x[i] = static_cast((i % 219) + 1); // xi := (i mod 219) + 1 y[i] = 1.0 / x[i]; // yi := 1/xi } } void gen_matrix_A(std::size_t M, std::size_t N, std::vector& A) { A.resize(M * N); for (std::size_t i = 0; i < M; ++i) { for (std::size_t j = 0; j < N; ++j) { A[i * N + j] = static_cast(((i + j) % 219) + 1); } } } high_resolution_clock::time_point tic_timer; void tic() { tic_timer = high_resolution_clock::now(); } double toc() { auto t1 = high_resolution_clock::now(); duration elapsed = t1 - tic_timer; return elapsed.count(); } //CHANGE FLAG BASED ON WHAT YOU WANT TO DO //int main() { //cout << fixed << setprecision(6); //int flag = 2; // 1=A2 (dot), 2=B2 (matvec), 3=C2 (matmul) // GH int main(const int argc, const char *argv[]) { const unsigned int flag = (argc > 1) ? atoi(argv[1]) : 2; // HG cout << fixed << setprecision(6); if (flag == 1) {// A2) DOT via cBLAS size_t N = 5000000; vector x, y; gen_vector_x_y(N, x, y); cout << "Running cBLAS dot (A2)\n"; tic(); double s = dot_cblas(x, y); double dt = toc(); double flops = 2.0 * N; double gflops = (flops / dt) / 1e9; double traffic_bytes = 2.0 * N * sizeof(double); double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0); cout << "A2 (BLAS): N=" << N << " time=" << dt << " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n"; } else if (flag == 2) { // B2) GEMV via cBLAS size_t M = 10000, N = 10000; vector A, x, b; gen_matrix_A(M, N, A); x.resize(N); for (size_t j = 0; j < N; ++j) x[j] = 1.0 / (((17 + j) % 219) + 1); cout << "Running cBLAS matvec (B2)\n"; tic(); matvec_cblas(A, M, N, x, b); double dt = toc(); double flops = 2.0 * M * N; double gflops = (flops / dt) / 1e9; double traffic_bytes = (M * N + N + M) * sizeof(double); double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0); cout << "B2 (BLAS): M=" << M << " N=" << N << " time=" << dt << " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n"; } else if (flag == 3) { // C2) GEMM via cBLAS size_t M = 500, L = 500, N = 500; // GH: Use also different sizes vector A, B, C; gen_matrix_A(M, L, A); gen_matrix_A(L, N, B); cout << "Running cBLAS matmul (C2)\n"; tic(); matmul_cblas(A, M, L, B, N, C); double dt = toc(); double flops = 2.0 * M * L * N; double gflops = (flops / dt) / 1e9; double traffic_bytes = (M * L + L * N + M * N) * sizeof(double); double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0); cout << "C2 (BLAS): M=" << M << " L=" << L << " N=" << N << " time=" << dt << " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n"; } else { cout << "Invalid flag. Choose 1–3.\n"; } cout << "\nDone\n"; return 0; }