Upload files to "ex3_benchmarks"
This commit is contained in:
parent
90e669c6de
commit
1e81786622
5 changed files with 3246 additions and 0 deletions
2563
ex3_benchmarks/Doxyfile
Normal file
2563
ex3_benchmarks/Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
333
ex3_benchmarks/benchmark_tests.cpp
Normal file
333
ex3_benchmarks/benchmark_tests.cpp
Normal file
|
|
@ -0,0 +1,333 @@
|
||||||
|
#include "benchmark_tests.h"
|
||||||
|
#include "benchmarks.h"
|
||||||
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
|
#include <math.h>
|
||||||
|
using namespace std::chrono;
|
||||||
|
|
||||||
|
vector<double> test_A(const size_t &NLOOPS, const size_t &N, const function<double(const vector<double>&, const vector<double>&)>& scalar_function)
|
||||||
|
{
|
||||||
|
cout << "#################### (A) ####################" << endl;
|
||||||
|
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||||
|
cout << "\nN = " << N << endl;
|
||||||
|
|
||||||
|
|
||||||
|
// Memory allocation
|
||||||
|
cout << "Memory allocation\n";
|
||||||
|
|
||||||
|
vector<double> x(N), y(N);
|
||||||
|
|
||||||
|
cout.precision(2);
|
||||||
|
cout << 2.0*N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||||
|
cout.precision(6);
|
||||||
|
|
||||||
|
|
||||||
|
// Data initialization
|
||||||
|
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
|
||||||
|
|
||||||
|
for (size_t i = 0; i < N; ++i)
|
||||||
|
{
|
||||||
|
x[i] = i % 219 + 1;
|
||||||
|
y[i] = 1.0/x[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cout << "\nStart Benchmarking scalar\n";
|
||||||
|
|
||||||
|
auto t1 = system_clock::now(); // start timer
|
||||||
|
// Do calculation
|
||||||
|
double check(0.0),ss(0.0);
|
||||||
|
for (size_t i = 0; i < NLOOPS; ++i)
|
||||||
|
{
|
||||||
|
check = scalar_function(x, y);
|
||||||
|
ss += check; // prevents the optimizer from removing unused calculation results.
|
||||||
|
}
|
||||||
|
|
||||||
|
auto t2 = system_clock::now(); // stop timer
|
||||||
|
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||||
|
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||||
|
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Check the correct result
|
||||||
|
cout << "\n <x,y> = " << check << endl;
|
||||||
|
if (static_cast<unsigned int>(check) != N)
|
||||||
|
cout << " !! W R O N G result !!\n";
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
|
||||||
|
// Timings and Performance
|
||||||
|
cout << endl;
|
||||||
|
cout.precision(2);
|
||||||
|
|
||||||
|
|
||||||
|
double Gflops = 2.0*N / t_diff / 1024 / 1024 / 1024;
|
||||||
|
double MemBandwidth = 2.0*N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
|
||||||
|
|
||||||
|
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||||
|
cout << "Timing in sec. : " << t_diff << endl;
|
||||||
|
cout << "GFLOPS : " << Gflops << endl;
|
||||||
|
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||||
|
|
||||||
|
//##########################################################################
|
||||||
|
cout << "\nStart Benchmarking norm\n";
|
||||||
|
|
||||||
|
auto t3 = system_clock::now(); // start timer
|
||||||
|
// Do calculation
|
||||||
|
double ss2(0.0);
|
||||||
|
for (size_t i = 0; i < NLOOPS; ++i)
|
||||||
|
{
|
||||||
|
auto sk1 = sqrt(scalar(x, x));
|
||||||
|
ss2 += sk1; // prevents the optimizer from removing unused calculation results.
|
||||||
|
}
|
||||||
|
|
||||||
|
auto t4 = system_clock::now(); // stop timer
|
||||||
|
auto duration2 = duration_cast<microseconds>(t4 - t3); // duration in microseconds
|
||||||
|
double t_diff2 = static_cast<double>(duration2.count()) / 1e6; // overall duration in seconds
|
||||||
|
t_diff2 = t_diff2/NLOOPS; // duration per loop seconds
|
||||||
|
|
||||||
|
|
||||||
|
cout << "ss(norm): " << ss2 << endl;
|
||||||
|
cout << "Timing in sec. : " << t_diff2 << endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
vector<double> test_B(const size_t &NLOOPS, const size_t &N, const size_t &M, const function<vector<double>(const vector<double>&, const vector<double>&)>& MatVec_function)
|
||||||
|
{
|
||||||
|
cout << "#################### (B) ####################" << endl;
|
||||||
|
|
||||||
|
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||||
|
cout << "\nN = " << N << endl;
|
||||||
|
cout << "\nM = " << M << endl;
|
||||||
|
|
||||||
|
// Memory allocation
|
||||||
|
cout << "Memory allocation\n";
|
||||||
|
|
||||||
|
vector<double> A(M*N);
|
||||||
|
vector<double> x(N);
|
||||||
|
|
||||||
|
cout.precision(2);
|
||||||
|
cout << (1.0*M*N + N) * sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||||
|
cout.precision(6);
|
||||||
|
|
||||||
|
// Data initialization
|
||||||
|
|
||||||
|
for (size_t i = 0; i < M; ++i)
|
||||||
|
for (size_t j = 0; j < N; ++j)
|
||||||
|
A[N*i + j] = (i + j) % 219 + 1;
|
||||||
|
|
||||||
|
|
||||||
|
for (size_t j = 0; j < N; ++j)
|
||||||
|
{
|
||||||
|
x[j] = 1.0/A[N*17 + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "\nStart Benchmarking MatVec\n";
|
||||||
|
|
||||||
|
auto t1 = system_clock::now(); // start timer
|
||||||
|
// Do calculation
|
||||||
|
vector<double> b(M);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < NLOOPS; ++i)
|
||||||
|
{
|
||||||
|
b = MatVec_function(A, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto t2 = system_clock::now(); // stop timer
|
||||||
|
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||||
|
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||||
|
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||||
|
|
||||||
|
|
||||||
|
// Check the correct result
|
||||||
|
cout << "\n <A[17,*],x> = " << b[17] << endl;
|
||||||
|
if (static_cast<size_t>(b[17]) != N)
|
||||||
|
{
|
||||||
|
cout << " !! W R O N G result !!\n";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
|
||||||
|
// Timings and Performance
|
||||||
|
cout << endl;
|
||||||
|
cout.precision(2);
|
||||||
|
|
||||||
|
double Gflops = (2.0*N*M) / t_diff / 1024 / 1024 / 1024;
|
||||||
|
double MemBandwidth = (2.0*N*M + M)/ t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
|
||||||
|
|
||||||
|
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||||
|
cout << "Timing in sec. : " << t_diff << endl;
|
||||||
|
cout << "GFLOPS : " << Gflops << endl;
|
||||||
|
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
vector<double> test_C(const size_t &NLOOPS, const size_t &L, const size_t &M, const size_t &N, const function<vector<double>(const vector<double>&, const vector<double>&, size_t const &shared_dim)>& MatMat_function)
|
||||||
|
{
|
||||||
|
cout << "#################### (C) ####################" << endl;
|
||||||
|
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||||
|
cout << "\nL = " << L << endl;
|
||||||
|
cout << "\nM = " << M << endl;
|
||||||
|
cout << "\nN = " << N << endl;
|
||||||
|
|
||||||
|
|
||||||
|
// Memory allocation
|
||||||
|
cout << "Memory allocation\n";
|
||||||
|
|
||||||
|
vector<double> A(M*L);
|
||||||
|
vector<double> B(L*N);
|
||||||
|
|
||||||
|
cout.precision(2);
|
||||||
|
cout << (1.0*M*L + L*N) *sizeof(A[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||||
|
cout.precision(6);
|
||||||
|
|
||||||
|
|
||||||
|
// Data initialization
|
||||||
|
|
||||||
|
for (size_t i = 0; i < M; ++i)
|
||||||
|
for (size_t k = 0; k < L; ++k)
|
||||||
|
A[L*i + k] = (i + k) % 219 + 1;
|
||||||
|
|
||||||
|
for (size_t k = 0; k < L; ++k)
|
||||||
|
for (size_t j = 0; j < N; ++j)
|
||||||
|
B[N*k + j] = 1.0/A[L*17 + k];
|
||||||
|
|
||||||
|
|
||||||
|
cout << "\nStart Benchmarking MatMat\n";
|
||||||
|
|
||||||
|
auto t1 = system_clock::now(); // start timer
|
||||||
|
// Do calculation
|
||||||
|
vector<double> C(M*N);
|
||||||
|
double check;
|
||||||
|
double check_sum;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < NLOOPS; ++i)
|
||||||
|
{
|
||||||
|
C = MatMat_function(A, B, L);
|
||||||
|
|
||||||
|
check = C[N*17];
|
||||||
|
check_sum += check; // prevents the optimizer from removing unused calculation results.
|
||||||
|
}
|
||||||
|
cout << check_sum;
|
||||||
|
|
||||||
|
auto t2 = system_clock::now(); // stop timer
|
||||||
|
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||||
|
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||||
|
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||||
|
|
||||||
|
|
||||||
|
// Check the correct result
|
||||||
|
cout << "\n C[17,0] = " << check << endl;
|
||||||
|
if (static_cast<unsigned int>(check) != L)
|
||||||
|
{
|
||||||
|
cout << " !! W R O N G result !!, should be " << L <<"\n";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
// Timings and Performance
|
||||||
|
cout << endl;
|
||||||
|
cout.precision(2);
|
||||||
|
|
||||||
|
|
||||||
|
double Gflops = (2.0*L*N*M) / t_diff / 1024 / 1024 / 1024;
|
||||||
|
double MemBandwidth = (2.0*L*N*M + M*N)/ t_diff / 1024 / 1024 / 1024 * sizeof(A[0]);
|
||||||
|
|
||||||
|
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||||
|
cout << "Timing in sec. : " << t_diff << endl;
|
||||||
|
cout << "GFLOPS : " << Gflops << endl;
|
||||||
|
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
vector<double> test_D(const size_t &NLOOPS, const size_t &N, const size_t &p)
|
||||||
|
{
|
||||||
|
cout << "#################### (D) ####################" << endl;
|
||||||
|
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||||
|
cout << "\nN = " << N << endl;
|
||||||
|
cout << "\np = " << p << endl;
|
||||||
|
|
||||||
|
// Memory allocation
|
||||||
|
cout << "Memory allocation\n";
|
||||||
|
|
||||||
|
vector<double> a(p + 1, 0);
|
||||||
|
vector<double> x(N);
|
||||||
|
|
||||||
|
cout.precision(2);
|
||||||
|
cout << (1.0*(p + 1) + N) *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||||
|
cout.precision(6);
|
||||||
|
|
||||||
|
// Data initialization
|
||||||
|
|
||||||
|
for (size_t j = 0; j < N; ++j)
|
||||||
|
x[j] = 1.0*j;
|
||||||
|
|
||||||
|
for (size_t k = 0; k < p + 1; ++k)
|
||||||
|
a[k] = pow(-1.0, k); // poly(x) = 1 - x + x^2 - x^3 + x^4 - ...
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cout << "\nStart Benchmarking poly\n";
|
||||||
|
|
||||||
|
auto t1 = system_clock::now(); // start timer
|
||||||
|
// Do calculation
|
||||||
|
vector<double> y(N);
|
||||||
|
double check;
|
||||||
|
double check_sum;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < NLOOPS; ++i)
|
||||||
|
{
|
||||||
|
y = poly(a, x);
|
||||||
|
check = y[0];
|
||||||
|
|
||||||
|
check_sum += check; // prevents the optimizer from removing unused calculation results.
|
||||||
|
}
|
||||||
|
|
||||||
|
auto t2 = system_clock::now(); // stop timer
|
||||||
|
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||||
|
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||||
|
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Check the correct result
|
||||||
|
cout << "\n poly(" << x[0] << ") = " << check << endl;
|
||||||
|
if (abs(check - 1.0) > 1.0/1e6)
|
||||||
|
{
|
||||||
|
cout << " !! W R O N G result !!\n";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
|
||||||
|
// Timings and Performance
|
||||||
|
cout << endl;
|
||||||
|
cout.precision(2);
|
||||||
|
|
||||||
|
|
||||||
|
double Gflops = (N*(p + 1)*3.0) / t_diff / 1024 / 1024 / 1024;
|
||||||
|
double MemBandwidth = (N*(2.0 + 3.0*(p + 1)))/ t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
|
||||||
|
|
||||||
|
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||||
|
cout << "Timing in sec. : " << t_diff << endl;
|
||||||
|
cout << "GFLOPS : " << Gflops << endl;
|
||||||
|
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||||
|
}
|
||||||
15
ex3_benchmarks/benchmark_tests.h
Normal file
15
ex3_benchmarks/benchmark_tests.h
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
#pragma once
|
||||||
|
#include <vector>
|
||||||
|
#include <functional>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
vector<double> test_A(const size_t &NLOOPS, const size_t &N, const function<double(const vector<double>&, const vector<double>&)>& scalar_function);
|
||||||
|
|
||||||
|
vector<double> test_B(const size_t &NLOOPS, const size_t &N, const size_t &M, const function<vector<double>(const vector<double>&, const vector<double>&)>& MatVec_function);
|
||||||
|
|
||||||
|
vector<double> test_C(const size_t &NLOOPS, const size_t &L, const size_t &M, const size_t &N, const function<vector<double>(const vector<double>&, const vector<double>&, size_t const &shared_dim)>& MatMat_function);
|
||||||
|
|
||||||
|
vector<double> test_D(const size_t &NLOOPS, const size_t &N, const size_t &p);
|
||||||
246
ex3_benchmarks/benchmarks.cpp
Normal file
246
ex3_benchmarks/benchmarks.cpp
Normal file
|
|
@ -0,0 +1,246 @@
|
||||||
|
#include "benchmarks.h"
|
||||||
|
#include "vdop.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cassert> // assert()
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __INTEL_CLANG_COMPILER
|
||||||
|
#pragma message(" ########## Use of MKL ###############")
|
||||||
|
#include <mkl.h>
|
||||||
|
#else
|
||||||
|
#pragma message(" ########## Use of CBLAS ###############")
|
||||||
|
|
||||||
|
#include <cblas.h> // cBLAS Library
|
||||||
|
#include <lapacke.h> // Lapack
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// (A) Inner product of two vectors (from skalar_stl)
|
||||||
|
double scalar(vector<double> const &x, vector<double> const &y)
|
||||||
|
{
|
||||||
|
assert(x.size() == y.size());
|
||||||
|
size_t const N = x.size();
|
||||||
|
double sum = 0.0;
|
||||||
|
for (size_t i = 0; i < N; ++i)
|
||||||
|
{
|
||||||
|
sum += x[i] * y[i];
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// (A) 5.(b) Kahan scalar product
|
||||||
|
double Kahan_skalar(vector<double> const &x, vector<double> const &y)
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
double c = 0.0;
|
||||||
|
size_t n = x.size();
|
||||||
|
for (size_t i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
double z = x[i]*y[i] - c; // c is the part that got lost in the last iteration
|
||||||
|
double t = sum + z; // when adding sum + z, the lower digits are lost if sum is large
|
||||||
|
c = (t - sum) - z; // now we recover the lower digits to add in the next iteration
|
||||||
|
sum = t;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// (A) 6. cBLAS scalar product
|
||||||
|
double scalar_cBLAS(vector<double> const &x, vector<double> const &y)
|
||||||
|
{
|
||||||
|
return cblas_ddot(x.size(), x.data(), 1, y.data(), 1); // x.data() = &x[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// (B) Matrix-vector product (from intro_vector_densematrix)
|
||||||
|
vector<double> MatVec(vector<double> const &A, vector<double> const &x)
|
||||||
|
{
|
||||||
|
size_t const nelem = A.size();
|
||||||
|
size_t const N = x.size();
|
||||||
|
assert(nelem % N == 0); // make sure multiplication is possible
|
||||||
|
size_t const M = nelem/N;
|
||||||
|
|
||||||
|
|
||||||
|
vector<double> b(M);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < M; ++i)
|
||||||
|
{
|
||||||
|
double tmp = 0.0;
|
||||||
|
for (size_t j = 0; j < N; ++j)
|
||||||
|
tmp += A[N*i + j] * x[j];
|
||||||
|
b[i] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// (B) cBLAS Matrix-vector product
|
||||||
|
vector<double> MatVec_cBLAS(vector<double> const &A, vector<double> const &x)
|
||||||
|
{
|
||||||
|
size_t const nelem = A.size();
|
||||||
|
size_t const N = x.size();
|
||||||
|
assert(nelem % N == 0); // make sure multiplication is possible
|
||||||
|
size_t const M = nelem/N;
|
||||||
|
|
||||||
|
|
||||||
|
vector<double> b(M);
|
||||||
|
|
||||||
|
cblas_dgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0, A.data(), N, x.data(), 1, 0.0, b.data(), 1);
|
||||||
|
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// (C) Matrix-matrix product
|
||||||
|
vector<double> MatMat(vector<double> const &A, vector<double> const &B, size_t const &L)
|
||||||
|
{
|
||||||
|
size_t const nelem_A = A.size();
|
||||||
|
size_t const nelem_B = B.size();
|
||||||
|
|
||||||
|
assert(nelem_A % L == 0 && nelem_B % L == 0);
|
||||||
|
|
||||||
|
size_t const M = nelem_A/L;
|
||||||
|
size_t const N = nelem_B/L;
|
||||||
|
|
||||||
|
|
||||||
|
vector<double> C(M*N);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for (size_t i = 0; i < M; ++i)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < N; ++j)
|
||||||
|
{
|
||||||
|
double C_temp = 0;
|
||||||
|
for (size_t k = 0; k < L; ++k)
|
||||||
|
{
|
||||||
|
C_temp += A[L*i + k]*B[N*k + j];
|
||||||
|
}
|
||||||
|
C[N*i + j] = C_temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
// (C) cBLAS matrix-matrix product
|
||||||
|
vector<double> MatMat_cBLAS(vector<double> const &A, vector<double> const &B, size_t const &L)
|
||||||
|
{
|
||||||
|
size_t const nelem_A = A.size();
|
||||||
|
size_t const nelem_B = B.size();
|
||||||
|
|
||||||
|
assert(nelem_A % L == 0 && nelem_B % L == 0);
|
||||||
|
|
||||||
|
size_t const M = nelem_A/L;
|
||||||
|
size_t const N = nelem_B/L;
|
||||||
|
|
||||||
|
|
||||||
|
vector<double> C(M*N);
|
||||||
|
|
||||||
|
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, L, 1.0, A.data(), L, B.data(), N, 0.0, C.data(), N);
|
||||||
|
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// (D) Evaluation of a polynomial function
|
||||||
|
vector<double> poly(vector<double> const &a, vector<double> const &x)
|
||||||
|
{
|
||||||
|
size_t const N = x.size();
|
||||||
|
size_t const p = a.size() - 1;
|
||||||
|
vector<double> y(N, 0);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < N; ++i)
|
||||||
|
{
|
||||||
|
double x_temp = x[i];
|
||||||
|
double y_temp = 0;
|
||||||
|
for (size_t k = 0; k < p + 1; ++k)
|
||||||
|
{
|
||||||
|
y_temp += x_temp*y_temp + a[p - k];
|
||||||
|
}
|
||||||
|
y[i] = y_temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// (E) Solves linear system of equations
|
||||||
|
void JacobiSolve(CRS_Matrix const &SK, vector<double> const &f, vector<double> &u)
|
||||||
|
{
|
||||||
|
const double omega = 1.0;
|
||||||
|
const int maxiter = 1000;
|
||||||
|
const double tol = 1e-5, // tolerance
|
||||||
|
tol2 = tol * tol; // tolerance^2
|
||||||
|
|
||||||
|
int nrows = SK.Nrows(); // number of rows == number of columns
|
||||||
|
assert( nrows == static_cast<int>(f.size()) && f.size() == u.size() );
|
||||||
|
|
||||||
|
cout << endl << " Start Jacobi solver for " << nrows << " d.o.f.s" << endl;
|
||||||
|
// Choose initial guess
|
||||||
|
for (int k = 0; k < nrows; ++k)
|
||||||
|
{
|
||||||
|
u[k] = 0.0; // u := 0
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<double> dd(nrows); // matrix diagonal
|
||||||
|
vector<double> r(nrows); // residual
|
||||||
|
vector<double> w(nrows); // correction
|
||||||
|
|
||||||
|
SK.GetDiag(dd); // dd := diag(K)
|
||||||
|
////DebugVector(dd);{int ijk; cin >> ijk;}
|
||||||
|
|
||||||
|
// Initial sweep
|
||||||
|
SK.Defect(r, f, u); // r := f - K*u
|
||||||
|
|
||||||
|
vddiv(w, r, dd); // w := D^{-1}*r
|
||||||
|
double sigma0 = dscapr(w, r); // s0 := <w,r>
|
||||||
|
|
||||||
|
// Iteration sweeps
|
||||||
|
int iter = 0;
|
||||||
|
double sigma = sigma0;
|
||||||
|
while ( sigma > tol2 * sigma0 && maxiter > iter)
|
||||||
|
{
|
||||||
|
++iter;
|
||||||
|
vdaxpy(u, u, omega, w ); // u := u + om*w
|
||||||
|
SK.Defect(r, f, u); // r := f - K*u
|
||||||
|
vddiv(w, r, dd); // w := D^{-1}*r
|
||||||
|
sigma = dscapr(w, r); // s0 := <w,r>
|
||||||
|
// cout << "Iteration " << iter << " : " << sqrt(sigma/sigma0) << endl;
|
||||||
|
}
|
||||||
|
cout << "aver. Jacobi rate : " << exp(log(sqrt(sigma / sigma0)) / iter) << " (" << iter << " iter)" << endl;
|
||||||
|
cout << "final error: " << sqrt(sigma / sigma0) << " (rel) " << sqrt(sigma) << " (abs)\n";
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
89
ex3_benchmarks/benchmarks.h
Normal file
89
ex3_benchmarks/benchmarks.h
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
#pragma once
|
||||||
|
#include "getmatrix.h"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
/** (A) Inner product of two vectors (from skalar_stl)
|
||||||
|
@param[in] x vector
|
||||||
|
@param[in] y vector
|
||||||
|
@return resulting Euclidian inner product <x,y>
|
||||||
|
*/
|
||||||
|
double scalar(vector<double> const &x, vector<double> const &y);
|
||||||
|
|
||||||
|
/** (A) 5.(b) Inner product of two vectors using the Kahan scalar product
|
||||||
|
@param[in] x vector
|
||||||
|
@param[in] y vector
|
||||||
|
@return resulting Euclidian inner product <x,y>
|
||||||
|
*/
|
||||||
|
double Kahan_skalar(vector<double> const &x, vector<double> const &y);
|
||||||
|
|
||||||
|
/** (A) 6. cBLAS scalar product of two vectors
|
||||||
|
@param[in] x vector
|
||||||
|
@param[in] y vector
|
||||||
|
@return resulting Euclidian inner product <x,y>
|
||||||
|
*/
|
||||||
|
double scalar_cBLAS(vector<double> const &x, vector<double> const &y);
|
||||||
|
|
||||||
|
|
||||||
|
/** (B) Matrix-vector product (from intro_vector_densematrix)
|
||||||
|
* @param[in] A dense matrix (1D access)
|
||||||
|
* @param[in] u vector
|
||||||
|
*
|
||||||
|
* @return resulting vector
|
||||||
|
*/
|
||||||
|
vector<double> MatVec(vector<double> const &A, vector<double> const &x);
|
||||||
|
|
||||||
|
/** (B) 6. cBLAS Matrix-vector product
|
||||||
|
* @param[in] A dense matrix (1D access)
|
||||||
|
* @param[in] u vector
|
||||||
|
*
|
||||||
|
* @return resulting vector
|
||||||
|
*/
|
||||||
|
vector<double> MatVec_cBLAS(vector<double> const &A, vector<double> const &x);
|
||||||
|
|
||||||
|
|
||||||
|
/** (C) Matrix-matrix product
|
||||||
|
* @param[in] A MxL dense matrix (1D access)
|
||||||
|
* @param[in] B LxN dense matrix (1D access)
|
||||||
|
* @param[in] shared_dim shared dimension L
|
||||||
|
*
|
||||||
|
* @return resulting MxN matrix
|
||||||
|
*/
|
||||||
|
vector<double> MatMat(vector<double> const &A, vector<double> const &B, size_t const &shared_dim);
|
||||||
|
|
||||||
|
/** (C) 6. cBLAS Matrix-matrix product
|
||||||
|
* @param[in] A MxL dense matrix (1D access)
|
||||||
|
* @param[in] B LxN dense matrix (1D access)
|
||||||
|
* @param[in] shared_dim shared dimension L
|
||||||
|
*
|
||||||
|
* @return resulting MxN matrix
|
||||||
|
*/
|
||||||
|
vector<double> MatMat_cBLAS(vector<double> const &A, vector<double> const &B, size_t const &shared_dim);
|
||||||
|
|
||||||
|
|
||||||
|
/** (D) Evaluation of a polynomial function using Horner's scheme
|
||||||
|
* @param[in] a coefficient vector
|
||||||
|
* @param[in] x vector with input values
|
||||||
|
*
|
||||||
|
* @return vector with output values
|
||||||
|
*/
|
||||||
|
vector<double> poly(vector<double> const &a, vector<double> const &x);
|
||||||
|
|
||||||
|
|
||||||
|
/** (E) Solves linear system of equations K @p u = @p f via the Jacobi iteration (from jaboci_oo_stl)
|
||||||
|
* We use a distributed symmetric CSR matrix @p SK and initial guess of the
|
||||||
|
* solution is set to 0.
|
||||||
|
* @param[in] SK CSR matrix
|
||||||
|
* @param[in] f distributed local vector storing the right hand side
|
||||||
|
* @param[out] u accumulated local vector storing the solution.
|
||||||
|
*/
|
||||||
|
void JacobiSolve(CRS_Matrix const &SK, vector<double> const &f, vector<double> &u);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue