SciFEM_Schratter/ex3_benchmarks/benchmark_tests.cpp
2025-11-12 13:58:25 +01:00

333 lines
9.6 KiB
C++

#include "benchmark_tests.h"
#include "benchmarks.h"
#include <chrono>
#include <iostream>
#include <math.h>
using namespace std::chrono;
vector<double> test_A(const size_t &NLOOPS, const size_t &N, const function<double(const vector<double>&, const vector<double>&)>& scalar_function)
{
cout << "#################### (A) ####################" << endl;
cout << "\nLOOPS = " << NLOOPS << endl;
cout << "\nN = " << N << endl;
// Memory allocation
cout << "Memory allocation\n";
vector<double> x(N), y(N);
cout.precision(2);
cout << 2.0*N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
cout.precision(6);
// Data initialization
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
for (size_t i = 0; i < N; ++i)
{
x[i] = i % 219 + 1;
y[i] = 1.0/x[i];
}
cout << "\nStart Benchmarking scalar\n";
auto t1 = system_clock::now(); // start timer
// Do calculation
double check(0.0),ss(0.0);
for (size_t i = 0; i < NLOOPS; ++i)
{
check = scalar_function(x, y);
ss += check; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/NLOOPS; // duration per loop seconds
// Check the correct result
cout << "\n <x,y> = " << check << endl;
if (static_cast<unsigned int>(check) != N)
cout << " !! W R O N G result !!\n";
cout << endl;
// Timings and Performance
cout << endl;
cout.precision(2);
double Gflops = 2.0*N / t_diff / 1024 / 1024 / 1024;
double MemBandwidth = 2.0*N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
cout << "Total duration : " << t_diff*NLOOPS << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << Gflops << endl;
cout << "GiByte/s : " << MemBandwidth << endl;
//##########################################################################
cout << "\nStart Benchmarking norm\n";
auto t3 = system_clock::now(); // start timer
// Do calculation
double ss2(0.0);
for (size_t i = 0; i < NLOOPS; ++i)
{
auto sk1 = sqrt(scalar(x, x));
ss2 += sk1; // prevents the optimizer from removing unused calculation results.
}
auto t4 = system_clock::now(); // stop timer
auto duration2 = duration_cast<microseconds>(t4 - t3); // duration in microseconds
double t_diff2 = static_cast<double>(duration2.count()) / 1e6; // overall duration in seconds
t_diff2 = t_diff2/NLOOPS; // duration per loop seconds
cout << "ss(norm): " << ss2 << endl;
cout << "Timing in sec. : " << t_diff2 << endl;
return vector<double>{t_diff, Gflops, MemBandwidth};
}
vector<double> test_B(const size_t &NLOOPS, const size_t &N, const size_t &M, const function<vector<double>(const vector<double>&, const vector<double>&)>& MatVec_function)
{
cout << "#################### (B) ####################" << endl;
cout << "\nLOOPS = " << NLOOPS << endl;
cout << "\nN = " << N << endl;
cout << "\nM = " << M << endl;
// Memory allocation
cout << "Memory allocation\n";
vector<double> A(M*N);
vector<double> x(N);
cout.precision(2);
cout << (1.0*M*N + N) * sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
cout.precision(6);
// Data initialization
for (size_t i = 0; i < M; ++i)
for (size_t j = 0; j < N; ++j)
A[N*i + j] = (i + j) % 219 + 1;
for (size_t j = 0; j < N; ++j)
{
x[j] = 1.0/A[N*17 + j];
}
cout << "\nStart Benchmarking MatVec\n";
auto t1 = system_clock::now(); // start timer
// Do calculation
vector<double> b(M);
for (size_t i = 0; i < NLOOPS; ++i)
{
b = MatVec_function(A, x);
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/NLOOPS; // duration per loop seconds
// Check the correct result
cout << "\n <A[17,*],x> = " << b[17] << endl;
if (static_cast<size_t>(b[17]) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
// Timings and Performance
cout << endl;
cout.precision(2);
double Gflops = (2.0*N*M) / t_diff / 1024 / 1024 / 1024;
double MemBandwidth = (2.0*N*M + M)/ t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
cout << "Total duration : " << t_diff*NLOOPS << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << Gflops << endl;
cout << "GiByte/s : " << MemBandwidth << endl;
return vector<double>{t_diff, Gflops, MemBandwidth};
}
vector<double> test_C(const size_t &NLOOPS, const size_t &L, const size_t &M, const size_t &N, const function<vector<double>(const vector<double>&, const vector<double>&, size_t const &shared_dim)>& MatMat_function)
{
cout << "#################### (C) ####################" << endl;
cout << "\nLOOPS = " << NLOOPS << endl;
cout << "\nL = " << L << endl;
cout << "\nM = " << M << endl;
cout << "\nN = " << N << endl;
// Memory allocation
cout << "Memory allocation\n";
vector<double> A(M*L);
vector<double> B(L*N);
cout.precision(2);
cout << (1.0*M*L + L*N) *sizeof(A[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
cout.precision(6);
// Data initialization
for (size_t i = 0; i < M; ++i)
for (size_t k = 0; k < L; ++k)
A[L*i + k] = (i + k) % 219 + 1;
for (size_t k = 0; k < L; ++k)
for (size_t j = 0; j < N; ++j)
B[N*k + j] = 1.0/A[L*17 + k];
cout << "\nStart Benchmarking MatMat\n";
auto t1 = system_clock::now(); // start timer
// Do calculation
vector<double> C(M*N);
double check;
double check_sum; // GH: initialize
for (size_t i = 0; i < NLOOPS; ++i)
{
C = MatMat_function(A, B, L);
check = C[N*17];
check_sum += check; // prevents the optimizer from removing unused calculation results.
}
cout << check_sum;
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/NLOOPS; // duration per loop seconds
// Check the correct result
cout << "\n C[17,0] = " << check << endl;
if (static_cast<unsigned int>(check) != L)
{
cout << " !! W R O N G result !!, should be " << L <<"\n";
}
cout << endl;
// Timings and Performance
cout << endl;
cout.precision(2);
double Gflops = (2.0*L*N*M) / t_diff / 1024 / 1024 / 1024;
double MemBandwidth = (2.0*L*N*M + M*N)/ t_diff / 1024 / 1024 / 1024 * sizeof(A[0]);
cout << "Total duration : " << t_diff*NLOOPS << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << Gflops << endl;
cout << "GiByte/s : " << MemBandwidth << endl;
return vector<double>{t_diff, Gflops, MemBandwidth};
}
vector<double> test_D(const size_t &NLOOPS, const size_t &N, const size_t &p)
{
cout << "#################### (D) ####################" << endl;
cout << "\nLOOPS = " << NLOOPS << endl;
cout << "\nN = " << N << endl;
cout << "\np = " << p << endl;
// Memory allocation
cout << "Memory allocation\n";
vector<double> a(p + 1, 0);
vector<double> x(N);
cout.precision(2);
cout << (1.0*(p + 1) + N) *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
cout.precision(6);
// Data initialization
for (size_t j = 0; j < N; ++j)
x[j] = 1.0*j;
for (size_t k = 0; k < p + 1; ++k)
a[k] = pow(-1.0, k); // poly(x) = 1 - x + x^2 - x^3 + x^4 - ...
cout << "\nStart Benchmarking poly\n";
auto t1 = system_clock::now(); // start timer
// Do calculation
vector<double> y(N);
double check;
double check_sum;
for (size_t i = 0; i < NLOOPS; ++i)
{
y = poly(a, x);
check = y[0];
check_sum += check; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/NLOOPS; // duration per loop seconds
// Check the correct result
cout << "\n poly(" << x[0] << ") = " << check << endl;
if (abs(check - 1.0) > 1.0/1e6)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
// Timings and Performance
cout << endl;
cout.precision(2);
double Gflops = (N*(p + 1)*3.0) / t_diff / 1024 / 1024 / 1024;
double MemBandwidth = (N*(2.0 + 3.0*(p + 1)))/ t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
cout << "Total duration : " << t_diff*NLOOPS << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << Gflops << endl;
cout << "GiByte/s : " << MemBandwidth << endl;
return vector<double>{t_diff, Gflops, MemBandwidth};
}