113 lines
3.4 KiB
C++
113 lines
3.4 KiB
C++
#include <iostream>
|
||
#include <vector>
|
||
#include <iomanip>
|
||
#include <chrono>
|
||
#include <cmath>
|
||
#include "bench_funcs_blas.h"
|
||
|
||
using namespace std;
|
||
using namespace std::chrono;
|
||
|
||
void gen_vector_x_y(std::size_t N, std::vector<double>& x, std::vector<double>& y) {
|
||
x.resize(N);
|
||
y.resize(N);
|
||
for (std::size_t i = 0; i < N; ++i) {
|
||
x[i] = static_cast<double>((i % 219) + 1); // xi := (i mod 219) + 1
|
||
y[i] = 1.0 / x[i]; // yi := 1/xi
|
||
}
|
||
}
|
||
|
||
void gen_matrix_A(std::size_t M, std::size_t N, std::vector<double>& A) {
|
||
A.resize(M * N);
|
||
for (std::size_t i = 0; i < M; ++i) {
|
||
for (std::size_t j = 0; j < N; ++j) {
|
||
A[i * N + j] = static_cast<double>(((i + j) % 219) + 1);
|
||
}
|
||
}
|
||
}
|
||
|
||
high_resolution_clock::time_point tic_timer;
|
||
void tic() { tic_timer = high_resolution_clock::now(); }
|
||
double toc() {
|
||
auto t1 = high_resolution_clock::now();
|
||
duration<double> elapsed = t1 - tic_timer;
|
||
return elapsed.count();
|
||
}
|
||
|
||
//CHANGE FLAG BASED ON WHAT YOU WANT TO DO
|
||
//int main() {
|
||
//cout << fixed << setprecision(6);
|
||
//int flag = 2; // 1=A2 (dot), 2=B2 (matvec), 3=C2 (matmul)
|
||
// GH
|
||
int main(const int argc, const char *argv[])
|
||
{
|
||
const unsigned int flag = (argc > 1) ? atoi(argv[1]) : 2;
|
||
// HG
|
||
cout << fixed << setprecision(6);
|
||
if (flag == 1) {// A2) DOT via cBLAS
|
||
size_t N = 5000000;
|
||
vector<double> x, y;
|
||
gen_vector_x_y(N, x, y);
|
||
cout << "Running cBLAS dot (A2)\n";
|
||
tic();
|
||
double s = dot_cblas(x, y);
|
||
double dt = toc();
|
||
|
||
double flops = 2.0 * N;
|
||
double gflops = (flops / dt) / 1e9;
|
||
double traffic_bytes = 2.0 * N * sizeof(double);
|
||
double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0);
|
||
|
||
cout << "A2 (BLAS): N=" << N << " time=" << dt
|
||
<< " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n";
|
||
}
|
||
|
||
else if (flag == 2) { // B2) GEMV via cBLAS
|
||
size_t M = 10000, N = 10000;
|
||
vector<double> A, x, b;
|
||
gen_matrix_A(M, N, A);
|
||
x.resize(N);
|
||
for (size_t j = 0; j < N; ++j)
|
||
x[j] = 1.0 / (((17 + j) % 219) + 1);
|
||
|
||
cout << "Running cBLAS matvec (B2)\n";
|
||
tic();
|
||
matvec_cblas(A, M, N, x, b);
|
||
double dt = toc();
|
||
|
||
double flops = 2.0 * M * N;
|
||
double gflops = (flops / dt) / 1e9;
|
||
double traffic_bytes = (M * N + N + M) * sizeof(double);
|
||
double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0);
|
||
|
||
cout << "B2 (BLAS): M=" << M << " N=" << N << " time=" << dt
|
||
<< " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n";
|
||
}
|
||
|
||
else if (flag == 3) { // C2) GEMM via cBLAS
|
||
size_t M = 500, L = 500, N = 500; // GH: Use also different sizes
|
||
vector<double> A, B, C;
|
||
gen_matrix_A(M, L, A);
|
||
gen_matrix_A(L, N, B);
|
||
|
||
cout << "Running cBLAS matmul (C2)\n";
|
||
tic();
|
||
matmul_cblas(A, M, L, B, N, C);
|
||
double dt = toc();
|
||
|
||
double flops = 2.0 * M * L * N;
|
||
double gflops = (flops / dt) / 1e9;
|
||
double traffic_bytes = (M * L + L * N + M * N) * sizeof(double);
|
||
double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0);
|
||
|
||
cout << "C2 (BLAS): M=" << M << " L=" << L << " N=" << N << " time=" << dt
|
||
<< " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n";
|
||
}
|
||
|
||
else {
|
||
cout << "Invalid flag. Choose 1–3.\n";
|
||
}
|
||
|
||
cout << "\nDone\n";
|
||
return 0;
|
||
}
|