scf_celebic/ex3/code/task_4+6.cpp
dino.celebic e662f3c84b fixes
2025-11-13 13:17:09 +01:00

148 lines
No EOL
4.3 KiB
C++

#include "task_3.h"
#include "task_4+6.h"
#include "timing.h"
#include <cblas.h> // cBLAS Library
#include <iostream>
#include <vector>
using namespace std;
void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {
printf("Memory allocated : %.3f GByte\n", 1.0 * memory / 1024 / 1024 / 1024 * size);
printf("Duration per loop : %.3f sec\n", sec);
printf("GFLOPS : %.3f\n", 1.0 * flops / sec / 1024 / 1024 / 1024);
printf("GiByte/s : %.3f\n", 1.0 * memory / sec / 1024 / 1024 / 1024 * size);
}
tuple<vector<double>, vector<double>> init_A(size_t N) {
vector<double> x(N), y(N);
for (size_t i = 0; i < N; ++i) {
x[i] = i%219 + 1.0;
y[i] = 1.0 / x[i];
}
return make_tuple(x, y);
}
void benchmark_A(vector<double> const &x, vector<double> const &y, size_t NLOOPS, bool cblas) {
size_t N = x.size();
double s(0.0), sum(0.0);
if (cblas == false) {
for (size_t i = 0; i < NLOOPS; ++i) {
s = scalar(x, y);
sum += s;
}
} else if (cblas == true) {
for (size_t i = 0; i < NLOOPS; ++i) {
s = cblas_ddot(N, x.data(), 1, y.data(), 1);
sum += s;
}
}
// Check correctness
if (static_cast<size_t>(sum) != N*NLOOPS) {printf(" !! W R O N G result !!\n");}
}
tuple<vector<double>, vector<double>> init_B(size_t M, size_t N) {
vector<double> A(M*N), x(N);
for (size_t i = 0; i < M; ++i) {
for (size_t j = 0; j < N; ++j) {
A[i*N + j] = (i+j)%219 + 1.0;
}
}
for (size_t j = 0; j < N; ++j) {
x[j] = 1.0/A[17*N + j];
}
return make_tuple(A, x);
}
void benchmark_B(vector<double> const &A, vector<double> const &x, size_t NLOOPS, bool cblas) {
size_t N = x.size();
size_t M = A.size() / N;
vector<double> b(M);
double sum(0.0);
if (cblas == false) {
for (size_t i = 0; i < NLOOPS; ++i) {
b = matrix_vec(A,x);
sum += b[17];
}
} else if (cblas == true) {
for (size_t i = 0; i < NLOOPS; ++i) {
cblas_dgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0, A.data(), N, x.data(), 1, 0, b.data(), 1);
sum += b[17];
}
}
// Check correctness
if (static_cast<size_t>(sum) != N*NLOOPS) {printf(" !! W R O N G result !!\n");}
}
tuple<vector<double>, vector<double>> init_C(size_t M, size_t N, size_t L) {
vector<double> A(M*L), B(L*N);
for (size_t i = 0; i < M; ++i) {
for (size_t j = 0; j < L; ++j) {
A[i*L + j] = (i+j)%219 + 1.0;
}
}
// B chosen such that C[0,17]=L
// so B[i,17] = 1/A[0,i]
for (size_t i = 0; i < L; ++i) {
for (size_t j = 0; j < N; ++j) {
if (j==17) {
B[i*N + 17] = 1.0/A[i];
} else {
B[i*N + j] = (i+j)%219 + 1.0;
}
}
}
return make_tuple(A, B);
}
void benchmark_C(vector<double> const &A, vector<double> const &B, size_t L, size_t NLOOPS, bool cblas) {
size_t M = A.size() / L;
size_t N = B.size() / L;
vector<double> C(M*N);
double sum(0.0);
if (cblas == false) {
for (size_t i = 0; i < NLOOPS; ++i) {
C = matrix_matrix(A,B,M);
sum += C[17];
}
} else if (cblas == true) {
for (size_t i = 0; i < NLOOPS; ++i) {
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, L, 1.0, A.data(), L, B.data(), N, 0.0, C.data(), N);
sum += C[17];
}
}
// Check correctness
if (static_cast<size_t>(sum) != L*NLOOPS) {printf(" !! W R O N G result !!\n");}
}
tuple<vector<double>, vector<double>> init_D(size_t N, size_t p) {
// x_i = i/N for i=0,...,N-1
// a_j = 1 for j=0,...,p-1
vector<double> x(N), a(p);
for (size_t i = 0; i < N; ++i) {
x[i] = static_cast<double>(i) / N;
}
for (size_t j = 0; j < p; ++j) {
a[j] = 1.0;
}
return make_tuple(x, a);
}
void benchmark_D(vector<double> const &x, vector<double> const &a, size_t NLOOPS) {
size_t N = x.size();
vector<double> y(N);
double sum(0.0);
for (size_t i = 0; i < NLOOPS; ++i) {
y = poly(x,a);
sum += y[0];
}
// Check correctness
if (static_cast<size_t>(sum) != NLOOPS) {printf(" !! W R O N G result sum = %f !!\n", sum);}
}