Task 6, Sheet 3
This commit is contained in:
parent
44e8b9d13b
commit
0ff49e29d3
3 changed files with 155 additions and 0 deletions
108
Sheet3/main_blas.cpp
Normal file
108
Sheet3/main_blas.cpp
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include "bench_funcs_blas.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace std::chrono;
|
||||
|
||||
void gen_vector_x_y(std::size_t N, std::vector<double>& x, std::vector<double>& y) {
|
||||
x.resize(N);
|
||||
y.resize(N);
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
x[i] = static_cast<double>((i % 219) + 1); // xi := (i mod 219) + 1
|
||||
y[i] = 1.0 / x[i]; // yi := 1/xi
|
||||
}
|
||||
}
|
||||
|
||||
void gen_matrix_A(std::size_t M, std::size_t N, std::vector<double>& A) {
|
||||
A.resize(M * N);
|
||||
for (std::size_t i = 0; i < M; ++i) {
|
||||
for (std::size_t j = 0; j < N; ++j) {
|
||||
A[i * N + j] = static_cast<double>(((i + j) % 219) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
high_resolution_clock::time_point tic_timer;
|
||||
void tic() { tic_timer = high_resolution_clock::now(); }
|
||||
double toc() {
|
||||
auto t1 = high_resolution_clock::now();
|
||||
duration<double> elapsed = t1 - tic_timer;
|
||||
return elapsed.count();
|
||||
}
|
||||
|
||||
//CHANGE FLAG BASED ON WHAT YOU WANT TO DO
|
||||
int main() {
|
||||
cout << fixed << setprecision(6);
|
||||
int flag = 2; // 1=A2 (dot), 2=B2 (matvec), 3=C2 (matmul)
|
||||
|
||||
if (flag == 1) {// A2) DOT via cBLAS
|
||||
size_t N = 5000000;
|
||||
vector<double> x, y;
|
||||
gen_vector_x_y(N, x, y);
|
||||
cout << "Running cBLAS dot (A2)\n";
|
||||
tic();
|
||||
double s = dot_cblas(x, y);
|
||||
double dt = toc();
|
||||
|
||||
double flops = 2.0 * N;
|
||||
double gflops = (flops / dt) / 1e9;
|
||||
double traffic_bytes = 2.0 * N * sizeof(double);
|
||||
double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0);
|
||||
|
||||
cout << "A2 (BLAS): N=" << N << " time=" << dt
|
||||
<< " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n";
|
||||
}
|
||||
|
||||
else if (flag == 2) { // B2) GEMV via cBLAS
|
||||
size_t M = 10000, N = 10000;
|
||||
vector<double> A, x, b;
|
||||
gen_matrix_A(M, N, A);
|
||||
x.resize(N);
|
||||
for (size_t j = 0; j < N; ++j)
|
||||
x[j] = 1.0 / (((17 + j) % 219) + 1);
|
||||
|
||||
cout << "Running cBLAS matvec (B2)\n";
|
||||
tic();
|
||||
matvec_cblas(A, M, N, x, b);
|
||||
double dt = toc();
|
||||
|
||||
double flops = 2.0 * M * N;
|
||||
double gflops = (flops / dt) / 1e9;
|
||||
double traffic_bytes = (M * N + N + M) * sizeof(double);
|
||||
double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0);
|
||||
|
||||
cout << "B2 (BLAS): M=" << M << " N=" << N << " time=" << dt
|
||||
<< " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n";
|
||||
}
|
||||
|
||||
else if (flag == 3) { // C2) GEMM via cBLAS
|
||||
size_t M = 500, L = 500, N = 500;
|
||||
vector<double> A, B, C;
|
||||
gen_matrix_A(M, L, A);
|
||||
gen_matrix_A(L, N, B);
|
||||
|
||||
cout << "Running cBLAS matmul (C2)\n";
|
||||
tic();
|
||||
matmul_cblas(A, M, L, B, N, C);
|
||||
double dt = toc();
|
||||
|
||||
double flops = 2.0 * M * L * N;
|
||||
double gflops = (flops / dt) / 1e9;
|
||||
double traffic_bytes = (M * L + L * N + M * N) * sizeof(double);
|
||||
double gib_s = (traffic_bytes / dt) / (1024.0 * 1024.0 * 1024.0);
|
||||
|
||||
cout << "C2 (BLAS): M=" << M << " L=" << L << " N=" << N << " time=" << dt
|
||||
<< " s GFLOPS=" << gflops << " GiB/s=" << gib_s << "\n";
|
||||
}
|
||||
|
||||
else {
|
||||
cout << "Invalid flag. Choose 1–3.\n";
|
||||
}
|
||||
|
||||
cout << "\nDone\n";
|
||||
return 0;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue