From ce217b844fbdd506db16b9641f842464f8bbbb7a Mon Sep 17 00:00:00 2001 From: "g.mandl" Date: Thu, 13 Nov 2025 02:29:23 +0100 Subject: [PATCH] exercises 2 to 5 --- BSP_3_2to5/bsp_3_lib_bench.cpp | 519 +++++++++++++++++++++++++++++++ BSP_3_2to5/bsp_3_lib_bench.h | 136 ++++++++ BSP_3_2to5/bsp_3_results_2-5.txt | 106 +++++++ BSP_3_2to5/bsp_3_x.cbp | 42 +++ BSP_3_2to5/main.cpp | 17 + 5 files changed, 820 insertions(+) create mode 100644 BSP_3_2to5/bsp_3_lib_bench.cpp create mode 100644 BSP_3_2to5/bsp_3_lib_bench.h create mode 100644 BSP_3_2to5/bsp_3_results_2-5.txt create mode 100644 BSP_3_2to5/bsp_3_x.cbp create mode 100644 BSP_3_2to5/main.cpp diff --git a/BSP_3_2to5/bsp_3_lib_bench.cpp b/BSP_3_2to5/bsp_3_lib_bench.cpp new file mode 100644 index 0000000..3c99ac2 --- /dev/null +++ b/BSP_3_2to5/bsp_3_lib_bench.cpp @@ -0,0 +1,519 @@ +#include "bsp_3_lib_bench.h" +#include +#include +#include +#include +#include + +using namespace std; +using namespace std::chrono; // timing + +double scalar(vector const &x, vector const &y) +{ + assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG + size_t const N = x.size(); + double sum = 0.0; + for (size_t i = 0; i < N; ++i) + { + sum += x[i] * y[i]; + //sum += exp(x[i])*log(y[i]); + } + return sum; +} + + +double scalar_kahan(vector const &x, vector const &y) +{ + assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG + size_t const N = x.size(); + double sum = 0.0; + double c = 0.0; + for (size_t i = 0; i < N; ++i) + { + double yk = x[i] * y[i] - c; + double t = sum + yk; + c = t - sum - yk; + sum = t; + //sum += exp(x[i])*log(y[i]); + } + return sum; +} + + +double norm_eucl(std::vector const &x) +{ + size_t const N = x.size(); + double sum = 0.0; + for (size_t i = 0; i < N; ++i) + { + sum += x[i]*x[i]; + //sum += exp(x[i])*log(y[i]); + } + sum = sqrt(sum); + return sum; +} + + +vector MatVec(vector const & a, vector const & x) // row wise access +{ + int const nelem = static_cast(a.size()); // #elements in matrix + int const mcols = static_cast(x.size()); // #elements in vector <==> #columns in matrix + + assert(nelem % mcols == 0); // nelem has to be a multiple of mcols (==> #rows) + int const nrows = nelem/mcols; // integer division! + + vector b(nrows); // allocate resulting vector + + for(size_t i = 0; i < nrows; ++i) + { + double tmp = 0.0; + for(size_t j = 0; j < mcols; ++j) + { + tmp = tmp + a[i*mcols+j] * x[j]; + } + b[i] = tmp; + } + + return b; +} + + +vector MatVec_column(vector const & a, vector const & x) // column wise access +{ + int const nelem = static_cast(a.size()); // #elements in matrix + int const mcols = static_cast(x.size()); // #elements in vector <==> #columns in matrix + + assert(nelem % mcols == 0); // nelem has to be a multiple of mcols (==> #rows) + int const nrows = nelem/mcols; // integer division! + + vector b(nrows); // allocate resulting vector + + // if we do it directly we have cache issues - not optimal + // to make the code more efficient we change the two loops and put the b[i] inside the inner loop + // b is not so large compared to a, so higher amount of writing operations to not matter that much + for(size_t j = 0; j < mcols; ++j) + { + double xj = x[j]; + for(size_t i = 0; i < nrows; ++i) + { + b[i] += a[j*nrows+i] * xj; + } + } + + return b; +} + + +vector MatMatProd(vector const & a, vector const & b, int const & L) +{ + size_t const a_nelem = a.size(); + size_t const b_nelem = b.size(); + + assert(static_cast(a_nelem) % L == 0 && static_cast(b_nelem) % L == 0); + + size_t M = a_nelem/L; + size_t N = b_nelem/L; + + vector c(N*M,0); + + for(size_t i = 0; i < M; ++i) + { + for(size_t k = 0; k < L; ++k) + { + for(size_t j = 0; j < N; ++j) + { + c[i*M+j] = c[i*M+j] + a[i*L+k]*b[k*N+j]; + } + } + } + + return c; +} + + +vector PolynomEval(vector const & a, vector const & x) +{ + // we want to use the Horner-scheme + vector sol(x.size(),0); + + for(size_t i = 0; i < x.size(); ++i) + { + double tmp = a[a.size()-1]; + for(int k = static_cast(a.size())-2; k >= 0; --k) + { + tmp = tmp*x[i] + a[k]; + } + sol[i] = tmp; + } + + return sol; +} + + +void benchmark_A(int const & N, int const & Nloops) +{ + //########################################################################## + cout << "\nStart Benchmarking A: scalar product\n"; + + vector x(N), y(N); + for(size_t k = 0; k < x.size(); ++k) + { + x[k] = (k % 219) + 1; + y[k] = 1.0/x[k]; + } + + auto t1 = system_clock::now(); // start timer +// Do calculation + double sk(0.0), ss(0.0); + for (int i = 0; i < Nloops; ++i) + { + sk = scalar(x, y); + ss += sk; // prevents the optimizer from removing unused calculation results. + } + + auto t2 = system_clock::now(); // stop timer + auto duration = duration_cast(t2 - t1); // duration in microseconds + double t_diff = static_cast(duration.count()) / 1e6; // overall duration in seconds + t_diff = t_diff/Nloops; // duration per loop seconds + + //assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "==" + +//########################################################################## +// Check the correct result + cout << "\n = " << sk << endl; + if (static_cast(sk) != N) + { + cout << " !! W R O N G result !!\n"; + } + cout << endl; + +//########################################################################## +// Timings and Performance + cout << endl; + cout.precision(2); + cout << "N = " << N << endl; + cout << "Time for Nloops: " << t_diff*Nloops << endl; + cout << "Timing in sec. : " << t_diff << endl; + cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl; + cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl; + cout << endl << endl; + + return; +} + + +void benchmark_A_kahan(int const & N, int const & Nloops) +{ + //########################################################################## + cout << "\nStart Benchmarking A: scalar product with Kahan summation\n"; + + vector x(N), y(N); + for(size_t k = 0; k < x.size(); ++k) + { + x[k] = (k % 219) + 1; + y[k] = 1.0/x[k]; + } + + auto t1 = system_clock::now(); // start timer +// Do calculation + double sk(0.0), ss(0.0); + for (int i = 0; i < Nloops; ++i) + { + sk = scalar(x, y); + ss += sk; // prevents the optimizer from removing unused calculation results. + } + + auto t2 = system_clock::now(); // stop timer + auto duration = duration_cast(t2 - t1); // duration in microseconds + double t_diff = static_cast(duration.count()) / 1e6; // overall duration in seconds + t_diff = t_diff/Nloops; // duration per loop seconds + + //assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "==" + +//########################################################################## +// Check the correct result + cout << "\n = " << sk << endl; + if (static_cast(sk) != N) + { + cout << " !! W R O N G result !!\n"; + } + cout << endl; + +//########################################################################## +// Timings and Performance + cout << endl; + cout.precision(2); + cout << "N = " << N << endl; + cout << "Time for Nloops: " << t_diff*Nloops << endl; + cout << "Timing in sec. : " << t_diff << endl; + //cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl; + //cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl; + cout << endl << endl; + + return; +} + + +void benchmark_A_norm(int const & N, int const & Nloops) +{ + //########################################################################## + cout << "\nStart Benchmarking A_norm: euclidean norm\n"; + + vector x(N,1.0); + + auto t1 = system_clock::now(); // start timer +// Do calculation + double sk(0.0), ss(0.0); + for (int i = 0; i < Nloops; ++i) + { + sk = norm_eucl(x); + ss += sk; // prevents the optimizer from removing unused calculation results. + } + + auto t2 = system_clock::now(); // stop timer + auto duration = duration_cast(t2 - t1); // duration in microseconds + double t_diff = static_cast(duration.count()) / 1e6; // overall duration in seconds + t_diff = t_diff/Nloops; // duration per loop seconds + + //assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "==" + +//########################################################################## +// Check the correct result + cout << "\n ||x|| = " << sk << endl; + if (sk - sqrt(N) > 1e-7) + { + cout << " !! W R O N G result !!\n"; + } + cout << endl; + +//########################################################################## +// Timings and Performance + cout << endl; + cout.precision(2); + cout << "N = " << N << endl; + cout << "Time for Nloops: " << t_diff*Nloops << endl; + cout << "Timing in sec. : " << t_diff << endl; + cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl; + cout << "GiByte/s : " << N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl; + cout << endl << endl; + + return; +} + + +void benchmark_B(int const & N, int const & M, int const & Nloops) +{ + //########################################################################## + cout << "\nStart Benchmarking B: Matrix-Vector Product (row wise access)\n"; + + vector x(N), b(M), a(N*M); + // initialize data + for(size_t i = 0; i < M; ++i) + { + for(size_t j = 0; j < N; ++j) + { + a[i*N+j] = (i+j) % 219 + 1; + } + } + for(size_t i = 0; i < N; ++i) + { + x[i] = 1.0/a[17*N+i]; + } + + auto t1 = system_clock::now(); // start timer +// Do calculation + double ss(0.0); + for (int i = 0; i < Nloops; ++i) + { + b = MatVec(a,x); + ss += b[0]; // prevents the optimizer from removing unused calculation results. + } + + auto t2 = system_clock::now(); // stop timer + auto duration = duration_cast(t2 - t1); // duration in microseconds + double t_diff = static_cast(duration.count()) / 1e6; // overall duration in seconds + t_diff = t_diff/Nloops; // duration per loop seconds + + //assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "==" + +//########################################################################## +// Check the correct result + cout << "\n = " << b[17] << endl; + if (static_cast(b[17]) != N) + { + cout << " !! W R O N G result !!\n"; + } + cout << endl; + +//########################################################################## +// Timings and Performance + cout << endl; + cout.precision(2); + cout << "N = " << N << "\t M = " << M << endl; + cout << "Time for Nloops: " << t_diff*Nloops << endl; + cout << "Timing in sec. : " << t_diff << endl; + cout << "GFLOPS : " << 2.0 * N * M / t_diff / 1024 / 1024 / 1024 << endl; + cout << "GiByte/s : " << (2.0 * N * M + M) / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl; + cout << endl << endl; + + return; +} + + +void benchmark_B_column(int const & N, int const & M, int const & Nloops) +{ + //########################################################################## + cout << "\nStart Benchmarking B: Matrix-Vector Product (column wise access)\n"; + + vector x(N), b(M), a(N*M); + // initialize data + for(size_t i = 0; i < M; ++i) + { + for(size_t j = 0; j < N; ++j) + { + a[i*N+j] = (i+j) % 219 + 1; + } + } + for(size_t i = 0; i < N; ++i) + { + x[i] = 1.0/a[17*N+i]; + } + + auto t1 = system_clock::now(); // start timer +// Do calculation + double ss(0.0); + for (int i = 0; i < Nloops; ++i) + { + b = MatVec_column(a,x); + ss += b[0]; // prevents the optimizer from removing unused calculation results. + } + + auto t2 = system_clock::now(); // stop timer + auto duration = duration_cast(t2 - t1); // duration in microseconds + double t_diff = static_cast(duration.count()) / 1e6; // overall duration in seconds + t_diff = t_diff/Nloops; // duration per loop seconds + + //assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "==" + +//########################################################################## +// Check the correct result + cout << "\n = " << b[17] << endl; + if (static_cast(b[17]) != N) + { + cout << " !! W R O N G result !!\n"; + } + cout << endl; + +//########################################################################## +// Timings and Performance + cout << endl; + cout.precision(2); + cout << "N = " << N << "\t M = " << M << endl; + cout << "Time for Nloops: " << t_diff*Nloops << endl; + cout << "Timing in sec. : " << t_diff << endl; + cout << "GFLOPS : " << 2.0 * N * M / t_diff / 1024 / 1024 / 1024 << endl; + cout << "GiByte/s : " << (2.0 * N * M + M) / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl; + cout << endl << endl; + + return; +} + + +void benchmark_C(int const & N, int const & M, int const & L, int const & Nloops) +{ + //########################################################################## + cout << "\nStart Benchmarking C: Matrix-Matrix Product\n"; + + vector a(M*L,1.0), b(L*N,1.0), c(N*M); + // with this data we get C[i,j] = L for all i and j + + auto t1 = system_clock::now(); // start timer +// Do calculation + double ss(0.0); + for (int i = 0; i < Nloops; ++i) + { + c = MatMatProd(a,b,L); + ss += c[0]; // prevents the optimizer from removing unused calculation results. + } + + auto t2 = system_clock::now(); // stop timer + auto duration = duration_cast(t2 - t1); // duration in microseconds + double t_diff = static_cast(duration.count()) / 1e6; // overall duration in seconds + t_diff = t_diff/Nloops; // duration per loop seconds + + //assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "==" + +//########################################################################## +// Check the correct result + cout << "\n C[10,15] = " << c[10*N+15] << endl; + if (static_cast(c[10*N+15]) != L) + { + cout << " !! W R O N G result !!\n"; + } + cout << endl; + +//########################################################################## +// Timings and Performance + cout << endl; + cout.precision(2); + cout << "N = " << N << "\t M = " << M << "\t L = " << L << endl; + cout << "Time for Nloops: " << t_diff*Nloops << endl; + cout << "Timing in sec. : " << t_diff << endl; + cout << "GFLOPS : " << 2.0 * N * M * L / t_diff / 1024 / 1024 / 1024 << endl; + cout << "GiByte/s : " << (L*(N+M) + M*N) / t_diff / 1024 / 1024 / 1024 * sizeof(a[0]) << endl; + cout << endl << endl; + + return; +} + + +void benchmark_D(int const & p, int const & N, int const & Nloops) +{ + //########################################################################## + cout << "\nStart Benchmarking D: polynomial evaluation\n"; + + vector x(N,1), sol(N), a(p+1); + for(size_t i = 0; i < a.size(); ++i) + { + a[i] = pow(-1.0,i); // 1-x+x^2-x^3+x^4... + } + a[0] = 1; + + auto t1 = system_clock::now(); // start timer +// Do calculation + double ss(0.0); + for (int i = 0; i < Nloops; ++i) + { + sol = PolynomEval(a,x); + ss += sol[0]; // prevents the optimizer from removing unused calculation results. + } + + auto t2 = system_clock::now(); // stop timer + auto duration = duration_cast(t2 - t1); // duration in microseconds + double t_diff = static_cast(duration.count()) / 1e6; // overall duration in seconds + t_diff = t_diff/Nloops; // duration per loop seconds + + //assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "==" + +//########################################################################## +// Check the correct result + cout << "\n p(x[0]) = " << sol[0] << endl; + if (static_cast(sol[0]) != (static_cast(a.size()) % 2)) + { + cout << " !! W R O N G result !!\n"; + } + cout << endl; + +//########################################################################## +// Timings and Performance + cout << endl; + cout.precision(2); + cout << "p = " << p << "\t N = " << N << endl; + cout << "Time for Nloops: " << t_diff*Nloops << endl; + cout << "Timing in sec. : " << t_diff << endl; + cout << "GFLOPS : " << 2.0*(p+1)*N / t_diff / 1024 / 1024 / 1024 << endl; + cout << "GiByte/s : " << N*(3+2*p) / t_diff / 1024 / 1024 / 1024 * sizeof(a[0]) << endl; + cout << endl << endl; + + return; +} diff --git a/BSP_3_2to5/bsp_3_lib_bench.h b/BSP_3_2to5/bsp_3_lib_bench.h new file mode 100644 index 0000000..ff83f58 --- /dev/null +++ b/BSP_3_2to5/bsp_3_lib_bench.h @@ -0,0 +1,136 @@ +#ifndef BSP_3_LIB_BENCH_H_INCLUDED +#define BSP_3_LIB_BENCH_H_INCLUDED + +#include + +/** Inner product + @param[in] x vector + @param[in] y vector + @return resulting Euclidean inner product +*/ +double scalar(std::vector const &x, std::vector const &y); + + +/** Inner product with Kahan summation + @param[in] x vector + @param[in] y vector + @return resulting Euclidean inner product +*/ +double scalar_kahan(std::vector const &x, std::vector const &y); + + +/** euclidean norm + @param[in] x vector + @return resulting Euclidean norm +*/ +double norm_eucl(std::vector const &x); + + +/** \brief Matrix-Vektor-Multiplikation (row-wise access) + * + * \param[in] a Matrix with row wise access + * \param[in] x vector which gets multiplied + * \return resulting product a*x (vector) + * + */ +std::vector MatVec(std::vector const & a, std::vector const & x); + + +/** \brief Matrix-Vektor-Multiplikation (column-wise access) + * + * \param[in] a Matrix with row wise access + * \param[in] x vector which gets multiplied + * \return resulting product a*x (vector) + * + */ +std::vector MatVec_column(std::vector const & a, std::vector const & x); + + +/** \brief Matrix-Matrix-Multiplikation (row-wise access) + * + * \param[in] a matrix with row wise access (M*L) + * \param[in] b matrix with row wise access (L*N) + * \param[in] L inner dimension of the matrix product + * \return resulting product a*b + * + */ +std::vector MatMatProd(std::vector const & a, std::vector const & b, int const & L); + + +/** \brief Polynomauswertung an Stelle x + * + * \param[in] a Vekor mit den Koeffizienten des Polynoms a=[a0,a1,a2,...] + * \param[in] x Vektor, für welchen das Polynom ausgewertet werden soll + * \return resulting vector p(x) + * + */ +std::vector PolynomEval(std::vector const & a, std::vector const & x); + + +/** \brief Benchmarking A - the scalar product + * + * \param N size of the vector + * \param Nloops number of iterations we want to do for the measuring + * + */ +void benchmark_A(int const & N, int const & Nloops); + + +/** \brief Benchmarking A - the scalar product with Kahan summation + * + * \param N size of the vector + * \param Nloops number of iterations we want to do for the measuring + * + */ +void benchmark_A_kahan(int const & N, int const & Nloops); + + +/** \brief Benchmarking A - norm + * + * \param N size of the vector + * \param Nloops number of iterations we want to do for the measuring + * + */ +void benchmark_A_norm(int const & N, int const & Nloops); + + +/** \brief Benchmarking B - matrix-vector product Ax=b (row wise access) + * + * \param N size of vector x + * \param M size of vector b (=> A: M*N) + * \param Nloops number of iterations we want to do for the measuring + * + */ +void benchmark_B(int const & N, int const & M, int const & Nloops); + + +/** \brief Benchmarking B - matrix-vector product Ax=b (column wise access) + * + * \param N size of vector x + * \param M size of vector b (=> A: M*N) + * \param Nloops number of iterations we want to do for the measuring + * + */ +void benchmark_B_column(int const & N, int const & M, int const & Nloops); + + +/** \brief Benchmarking C - Matrix-Matrix product C=A*B A_M*L, B_L*N + * + * \param N + * \param M + * \param L + * \param Nloops number of iterations we want to do for the measuring + * + */ +void benchmark_C(int const & N, int const & M, int const & L, int const & Nloops); + + +/** \brief Benchmarking D - polynomial evaluation + * + * \param p the degree of the polynomial + * \param N size of the input vector x where p(x) + * \param Nloops number of iterations we want to do for the measuring + * + */ +void benchmark_D(int const & p, int const & N, int const & Nloops); +#endif // BSP_3_LIB_BENCH_H_INCLUDED diff --git a/BSP_3_2to5/bsp_3_results_2-5.txt b/BSP_3_2to5/bsp_3_results_2-5.txt new file mode 100644 index 0000000..9c572bc --- /dev/null +++ b/BSP_3_2to5/bsp_3_results_2-5.txt @@ -0,0 +1,106 @@ +Aufgabe 2: Übersicht über memory, number of floating point operations and writing/reading operations + + memory (*8, in Bytes) FLOP writing/reading +A - scalar 2N 2N 2N +B - MatVec N*M+N 2*N*M 2*N*M+M +C - MatMatProd L*(M+N) 2*L*M*N L*N+L*M+M*N +D - PolyEval p+1+N 2*(p+1)*N 2*(p+1)*N + N + +Aufgabe 3-5: Ergebnisse saemtlicher benachmark tests + +Start Benchmarking A: scalar product + + = 2.5e+08 + + +N = 250000000 +Time for Nloops: 20 +Timing in sec. : 0.79 +GFLOPS : 0.59 +GiByte/s : 4.7 + + + +Start Benchmarking A: scalar product with Kahan summation + + = 2.5e+08 + + +N = 250000000 +Time for Nloops: 23 +Timing in sec. : 0.92 + + + +Start Benchmarking A_norm: euclidean norm + + ||x|| = 1.6e+04 + + +N = 250000000 +Time for Nloops: 22 +Timing in sec. : 0.88 +GFLOPS : 0.53 +GiByte/s : 2.1 + + + +Start Benchmarking B: Matrix-Vector Product (row wise access) + + = 8e+03 + + +N = 8000 M = 8000 +Time for Nloops: 17 +Timing in sec. : 0.23 +GFLOPS : 0.52 +GiByte/s : 4.1 + + + +Start Benchmarking B: Matrix-Vector Product (column wise access) + + = 8e+03 + + +N = 8000 M = 8000 +Time for Nloops: 18 +Timing in sec. : 0.24 +GFLOPS : 0.51 +GiByte/s : 4 + + + +Start Benchmarking C: Matrix-Matrix Product + + C[10,15] = 1e+03 + + +N = 1000 M = 1000 L = 1000 +Time for Nloops: 19 +Timing in sec. : 6.5 +GFLOPS : 0.29 +GiByte/s : 0.0034 + + + +Start Benchmarking D: polynomial evaluation + + p(x[0]) = 1 + + +p = 10000 N = 100000 +Time for Nloops: 19 +Timing in sec. : 3.8 +GFLOPS : 0.49 +GiByte/s : 4 + + +Anmerkungen zu Bsp 5: +A: die Berechnung der Norm benötigt länger, obwohl nur ein Vektor beteiligt ist; sqrt ist rechenintensiv im Verlgleich zu +,-,/,* +B: kahan summation benötigt etwas länger +C: keine gravierenden Unterschiede feststellbar (column wise aber bereits optimiert) + +Anmerkungen zu Bsp 4: +bei C) Matrix*Matrix wurde nicht das 10-100 fache des L3 Caches verwendet, da die Berechnung dann irsinnig lange gebraucht hätte; +dadurch etwas kleinere Dimensionen \ No newline at end of file diff --git a/BSP_3_2to5/bsp_3_x.cbp b/BSP_3_2to5/bsp_3_x.cbp new file mode 100644 index 0000000..a2217f0 --- /dev/null +++ b/BSP_3_2to5/bsp_3_x.cbp @@ -0,0 +1,42 @@ + + + + + + diff --git a/BSP_3_2to5/main.cpp b/BSP_3_2to5/main.cpp new file mode 100644 index 0000000..0db9823 --- /dev/null +++ b/BSP_3_2to5/main.cpp @@ -0,0 +1,17 @@ +#include "bsp_3_lib_bench.h" +#include + +using namespace std; + +int main() +{ + benchmark_A(25*1e7,25); + benchmark_A_kahan(25*1e7,25); + benchmark_A_norm(25*1e7,25); + benchmark_B(8000,8000,75); + benchmark_B_column(8000,8000,75); + benchmark_C(1000,1000,1000,3); + benchmark_D(1e4,1e5,5); + + return 0; +}