exercises 2 to 5

This commit is contained in:
Georg Thomas Mandl 2025-11-13 02:29:23 +01:00
commit ce217b844f
5 changed files with 820 additions and 0 deletions

View file

@ -0,0 +1,519 @@
#include "bsp_3_lib_bench.h"
#include <cassert>
#include <chrono>
#include <cmath>
#include <iostream>
#include <ctime>
using namespace std;
using namespace std::chrono; // timing
double scalar(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
size_t const N = x.size();
double sum = 0.0;
for (size_t i = 0; i < N; ++i)
{
sum += x[i] * y[i];
//sum += exp(x[i])*log(y[i]);
}
return sum;
}
double scalar_kahan(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
size_t const N = x.size();
double sum = 0.0;
double c = 0.0;
for (size_t i = 0; i < N; ++i)
{
double yk = x[i] * y[i] - c;
double t = sum + yk;
c = t - sum - yk;
sum = t;
//sum += exp(x[i])*log(y[i]);
}
return sum;
}
double norm_eucl(std::vector<double> const &x)
{
size_t const N = x.size();
double sum = 0.0;
for (size_t i = 0; i < N; ++i)
{
sum += x[i]*x[i];
//sum += exp(x[i])*log(y[i]);
}
sum = sqrt(sum);
return sum;
}
vector<double> MatVec(vector<double> const & a, vector<double> const & x) // row wise access
{
int const nelem = static_cast<int>(a.size()); // #elements in matrix
int const mcols = static_cast<int>(x.size()); // #elements in vector <==> #columns in matrix
assert(nelem % mcols == 0); // nelem has to be a multiple of mcols (==> #rows)
int const nrows = nelem/mcols; // integer division!
vector<double> b(nrows); // allocate resulting vector
for(size_t i = 0; i < nrows; ++i)
{
double tmp = 0.0;
for(size_t j = 0; j < mcols; ++j)
{
tmp = tmp + a[i*mcols+j] * x[j];
}
b[i] = tmp;
}
return b;
}
vector<double> MatVec_column(vector<double> const & a, vector<double> const & x) // column wise access
{
int const nelem = static_cast<int>(a.size()); // #elements in matrix
int const mcols = static_cast<int>(x.size()); // #elements in vector <==> #columns in matrix
assert(nelem % mcols == 0); // nelem has to be a multiple of mcols (==> #rows)
int const nrows = nelem/mcols; // integer division!
vector<double> b(nrows); // allocate resulting vector
// if we do it directly we have cache issues - not optimal
// to make the code more efficient we change the two loops and put the b[i] inside the inner loop
// b is not so large compared to a, so higher amount of writing operations to not matter that much
for(size_t j = 0; j < mcols; ++j)
{
double xj = x[j];
for(size_t i = 0; i < nrows; ++i)
{
b[i] += a[j*nrows+i] * xj;
}
}
return b;
}
vector<double> MatMatProd(vector<double> const & a, vector<double> const & b, int const & L)
{
size_t const a_nelem = a.size();
size_t const b_nelem = b.size();
assert(static_cast<int>(a_nelem) % L == 0 && static_cast<int>(b_nelem) % L == 0);
size_t M = a_nelem/L;
size_t N = b_nelem/L;
vector<double> c(N*M,0);
for(size_t i = 0; i < M; ++i)
{
for(size_t k = 0; k < L; ++k)
{
for(size_t j = 0; j < N; ++j)
{
c[i*M+j] = c[i*M+j] + a[i*L+k]*b[k*N+j];
}
}
}
return c;
}
vector<double> PolynomEval(vector<double> const & a, vector<double> const & x)
{
// we want to use the Horner-scheme
vector<double> sol(x.size(),0);
for(size_t i = 0; i < x.size(); ++i)
{
double tmp = a[a.size()-1];
for(int k = static_cast<int>(a.size())-2; k >= 0; --k)
{
tmp = tmp*x[i] + a[k];
}
sol[i] = tmp;
}
return sol;
}
void benchmark_A(int const & N, int const & Nloops)
{
//##########################################################################
cout << "\nStart Benchmarking A: scalar product\n";
vector<double> x(N), y(N);
for(size_t k = 0; k < x.size(); ++k)
{
x[k] = (k % 219) + 1;
y[k] = 1.0/x[k];
}
auto t1 = system_clock::now(); // start timer
// Do calculation
double sk(0.0), ss(0.0);
for (int i = 0; i < Nloops; ++i)
{
sk = scalar(x, y);
ss += sk; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/Nloops; // duration per loop seconds
//assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "=="
//##########################################################################
// Check the correct result
cout << "\n <x,y> = " << sk << endl;
if (static_cast<unsigned int>(sk) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "N = " << N << endl;
cout << "Time for Nloops: " << t_diff*Nloops << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
cout << endl << endl;
return;
}
void benchmark_A_kahan(int const & N, int const & Nloops)
{
//##########################################################################
cout << "\nStart Benchmarking A: scalar product with Kahan summation\n";
vector<double> x(N), y(N);
for(size_t k = 0; k < x.size(); ++k)
{
x[k] = (k % 219) + 1;
y[k] = 1.0/x[k];
}
auto t1 = system_clock::now(); // start timer
// Do calculation
double sk(0.0), ss(0.0);
for (int i = 0; i < Nloops; ++i)
{
sk = scalar(x, y);
ss += sk; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/Nloops; // duration per loop seconds
//assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "=="
//##########################################################################
// Check the correct result
cout << "\n <x,y> = " << sk << endl;
if (static_cast<unsigned int>(sk) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "N = " << N << endl;
cout << "Time for Nloops: " << t_diff*Nloops << endl;
cout << "Timing in sec. : " << t_diff << endl;
//cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
//cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
cout << endl << endl;
return;
}
void benchmark_A_norm(int const & N, int const & Nloops)
{
//##########################################################################
cout << "\nStart Benchmarking A_norm: euclidean norm\n";
vector<double> x(N,1.0);
auto t1 = system_clock::now(); // start timer
// Do calculation
double sk(0.0), ss(0.0);
for (int i = 0; i < Nloops; ++i)
{
sk = norm_eucl(x);
ss += sk; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/Nloops; // duration per loop seconds
//assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "=="
//##########################################################################
// Check the correct result
cout << "\n ||x|| = " << sk << endl;
if (sk - sqrt(N) > 1e-7)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "N = " << N << endl;
cout << "Time for Nloops: " << t_diff*Nloops << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
cout << endl << endl;
return;
}
void benchmark_B(int const & N, int const & M, int const & Nloops)
{
//##########################################################################
cout << "\nStart Benchmarking B: Matrix-Vector Product (row wise access)\n";
vector<double> x(N), b(M), a(N*M);
// initialize data
for(size_t i = 0; i < M; ++i)
{
for(size_t j = 0; j < N; ++j)
{
a[i*N+j] = (i+j) % 219 + 1;
}
}
for(size_t i = 0; i < N; ++i)
{
x[i] = 1.0/a[17*N+i];
}
auto t1 = system_clock::now(); // start timer
// Do calculation
double ss(0.0);
for (int i = 0; i < Nloops; ++i)
{
b = MatVec(a,x);
ss += b[0]; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/Nloops; // duration per loop seconds
//assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "=="
//##########################################################################
// Check the correct result
cout << "\n <A[17,.],x> = " << b[17] << endl;
if (static_cast<unsigned int>(b[17]) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "N = " << N << "\t M = " << M << endl;
cout << "Time for Nloops: " << t_diff*Nloops << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << 2.0 * N * M / t_diff / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << (2.0 * N * M + M) / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
cout << endl << endl;
return;
}
void benchmark_B_column(int const & N, int const & M, int const & Nloops)
{
//##########################################################################
cout << "\nStart Benchmarking B: Matrix-Vector Product (column wise access)\n";
vector<double> x(N), b(M), a(N*M);
// initialize data
for(size_t i = 0; i < M; ++i)
{
for(size_t j = 0; j < N; ++j)
{
a[i*N+j] = (i+j) % 219 + 1;
}
}
for(size_t i = 0; i < N; ++i)
{
x[i] = 1.0/a[17*N+i];
}
auto t1 = system_clock::now(); // start timer
// Do calculation
double ss(0.0);
for (int i = 0; i < Nloops; ++i)
{
b = MatVec_column(a,x);
ss += b[0]; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/Nloops; // duration per loop seconds
//assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "=="
//##########################################################################
// Check the correct result
cout << "\n <A[17,.],x> = " << b[17] << endl;
if (static_cast<unsigned int>(b[17]) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "N = " << N << "\t M = " << M << endl;
cout << "Time for Nloops: " << t_diff*Nloops << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << 2.0 * N * M / t_diff / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << (2.0 * N * M + M) / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
cout << endl << endl;
return;
}
void benchmark_C(int const & N, int const & M, int const & L, int const & Nloops)
{
//##########################################################################
cout << "\nStart Benchmarking C: Matrix-Matrix Product\n";
vector<double> a(M*L,1.0), b(L*N,1.0), c(N*M);
// with this data we get C[i,j] = L for all i and j
auto t1 = system_clock::now(); // start timer
// Do calculation
double ss(0.0);
for (int i = 0; i < Nloops; ++i)
{
c = MatMatProd(a,b,L);
ss += c[0]; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/Nloops; // duration per loop seconds
//assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "=="
//##########################################################################
// Check the correct result
cout << "\n C[10,15] = " << c[10*N+15] << endl;
if (static_cast<unsigned int>(c[10*N+15]) != L)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "N = " << N << "\t M = " << M << "\t L = " << L << endl;
cout << "Time for Nloops: " << t_diff*Nloops << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << 2.0 * N * M * L / t_diff / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << (L*(N+M) + M*N) / t_diff / 1024 / 1024 / 1024 * sizeof(a[0]) << endl;
cout << endl << endl;
return;
}
void benchmark_D(int const & p, int const & N, int const & Nloops)
{
//##########################################################################
cout << "\nStart Benchmarking D: polynomial evaluation\n";
vector<double> x(N,1), sol(N), a(p+1);
for(size_t i = 0; i < a.size(); ++i)
{
a[i] = pow(-1.0,i); // 1-x+x^2-x^3+x^4...
}
a[0] = 1;
auto t1 = system_clock::now(); // start timer
// Do calculation
double ss(0.0);
for (int i = 0; i < Nloops; ++i)
{
sol = PolynomEval(a,x);
ss += sol[0]; // prevents the optimizer from removing unused calculation results.
}
auto t2 = system_clock::now(); // stop timer
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
t_diff = t_diff/Nloops; // duration per loop seconds
//assert(std::abs(ss/NLOOPS-sk)<1e-5); // avoids unsafe floating point comparison "=="
//##########################################################################
// Check the correct result
cout << "\n p(x[0]) = " << sol[0] << endl;
if (static_cast<unsigned int>(sol[0]) != (static_cast<int>(a.size()) % 2))
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "p = " << p << "\t N = " << N << endl;
cout << "Time for Nloops: " << t_diff*Nloops << endl;
cout << "Timing in sec. : " << t_diff << endl;
cout << "GFLOPS : " << 2.0*(p+1)*N / t_diff / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << N*(3+2*p) / t_diff / 1024 / 1024 / 1024 * sizeof(a[0]) << endl;
cout << endl << endl;
return;
}

View file

@ -0,0 +1,136 @@
#ifndef BSP_3_LIB_BENCH_H_INCLUDED
#define BSP_3_LIB_BENCH_H_INCLUDED
#include <vector>
/** Inner product
@param[in] x vector
@param[in] y vector
@return resulting Euclidean inner product <x,y>
*/
double scalar(std::vector<double> const &x, std::vector<double> const &y);
/** Inner product with Kahan summation
@param[in] x vector
@param[in] y vector
@return resulting Euclidean inner product <x,y>
*/
double scalar_kahan(std::vector<double> const &x, std::vector<double> const &y);
/** euclidean norm
@param[in] x vector
@return resulting Euclidean norm
*/
double norm_eucl(std::vector<double> const &x);
/** \brief Matrix-Vektor-Multiplikation (row-wise access)
*
* \param[in] a Matrix with row wise access
* \param[in] x vector which gets multiplied
* \return resulting product a*x (vector)
*
*/
std::vector<double> MatVec(std::vector<double> const & a, std::vector<double> const & x);
/** \brief Matrix-Vektor-Multiplikation (column-wise access)
*
* \param[in] a Matrix with row wise access
* \param[in] x vector which gets multiplied
* \return resulting product a*x (vector)
*
*/
std::vector<double> MatVec_column(std::vector<double> const & a, std::vector<double> const & x);
/** \brief Matrix-Matrix-Multiplikation (row-wise access)
*
* \param[in] a matrix with row wise access (M*L)
* \param[in] b matrix with row wise access (L*N)
* \param[in] L inner dimension of the matrix product
* \return resulting product a*b
*
*/
std::vector<double> MatMatProd(std::vector<double> const & a, std::vector<double> const & b, int const & L);
/** \brief Polynomauswertung an Stelle x
*
* \param[in] a Vekor mit den Koeffizienten des Polynoms a=[a0,a1,a2,...]
* \param[in] x Vektor, für welchen das Polynom ausgewertet werden soll
* \return resulting vector p(x)
*
*/
std::vector<double> PolynomEval(std::vector<double> const & a, std::vector<double> const & x);
/** \brief Benchmarking A - the scalar product
*
* \param N size of the vector
* \param Nloops number of iterations we want to do for the measuring
*
*/
void benchmark_A(int const & N, int const & Nloops);
/** \brief Benchmarking A - the scalar product with Kahan summation
*
* \param N size of the vector
* \param Nloops number of iterations we want to do for the measuring
*
*/
void benchmark_A_kahan(int const & N, int const & Nloops);
/** \brief Benchmarking A - norm
*
* \param N size of the vector
* \param Nloops number of iterations we want to do for the measuring
*
*/
void benchmark_A_norm(int const & N, int const & Nloops);
/** \brief Benchmarking B - matrix-vector product Ax=b (row wise access)
*
* \param N size of vector x
* \param M size of vector b (=> A: M*N)
* \param Nloops number of iterations we want to do for the measuring
*
*/
void benchmark_B(int const & N, int const & M, int const & Nloops);
/** \brief Benchmarking B - matrix-vector product Ax=b (column wise access)
*
* \param N size of vector x
* \param M size of vector b (=> A: M*N)
* \param Nloops number of iterations we want to do for the measuring
*
*/
void benchmark_B_column(int const & N, int const & M, int const & Nloops);
/** \brief Benchmarking C - Matrix-Matrix product C=A*B A_M*L, B_L*N
*
* \param N
* \param M
* \param L
* \param Nloops number of iterations we want to do for the measuring
*
*/
void benchmark_C(int const & N, int const & M, int const & L, int const & Nloops);
/** \brief Benchmarking D - polynomial evaluation
*
* \param p the degree of the polynomial
* \param N size of the input vector x where p(x)
* \param Nloops number of iterations we want to do for the measuring
*
*/
void benchmark_D(int const & p, int const & N, int const & Nloops);
#endif // BSP_3_LIB_BENCH_H_INCLUDED

View file

@ -0,0 +1,106 @@
Aufgabe 2: Übersicht über memory, number of floating point operations and writing/reading operations
memory (*8, in Bytes) FLOP writing/reading
A - scalar 2N 2N 2N
B - MatVec N*M+N 2*N*M 2*N*M+M
C - MatMatProd L*(M+N) 2*L*M*N L*N+L*M+M*N
D - PolyEval p+1+N 2*(p+1)*N 2*(p+1)*N + N
Aufgabe 3-5: Ergebnisse saemtlicher benachmark tests
Start Benchmarking A: scalar product
<x,y> = 2.5e+08
N = 250000000
Time for Nloops: 20
Timing in sec. : 0.79
GFLOPS : 0.59
GiByte/s : 4.7
Start Benchmarking A: scalar product with Kahan summation
<x,y> = 2.5e+08
N = 250000000
Time for Nloops: 23
Timing in sec. : 0.92
Start Benchmarking A_norm: euclidean norm
||x|| = 1.6e+04
N = 250000000
Time for Nloops: 22
Timing in sec. : 0.88
GFLOPS : 0.53
GiByte/s : 2.1
Start Benchmarking B: Matrix-Vector Product (row wise access)
<A[17,.],x> = 8e+03
N = 8000 M = 8000
Time for Nloops: 17
Timing in sec. : 0.23
GFLOPS : 0.52
GiByte/s : 4.1
Start Benchmarking B: Matrix-Vector Product (column wise access)
<A[17,.],x> = 8e+03
N = 8000 M = 8000
Time for Nloops: 18
Timing in sec. : 0.24
GFLOPS : 0.51
GiByte/s : 4
Start Benchmarking C: Matrix-Matrix Product
C[10,15] = 1e+03
N = 1000 M = 1000 L = 1000
Time for Nloops: 19
Timing in sec. : 6.5
GFLOPS : 0.29
GiByte/s : 0.0034
Start Benchmarking D: polynomial evaluation
p(x[0]) = 1
p = 10000 N = 100000
Time for Nloops: 19
Timing in sec. : 3.8
GFLOPS : 0.49
GiByte/s : 4
Anmerkungen zu Bsp 5:
A: die Berechnung der Norm benötigt länger, obwohl nur ein Vektor beteiligt ist; sqrt ist rechenintensiv im Verlgleich zu +,-,/,*
B: kahan summation benötigt etwas länger
C: keine gravierenden Unterschiede feststellbar (column wise aber bereits optimiert)
Anmerkungen zu Bsp 4:
bei C) Matrix*Matrix wurde nicht das 10-100 fache des L3 Caches verwendet, da die Berechnung dann irsinnig lange gebraucht hätte;
dadurch etwas kleinere Dimensionen

42
BSP_3_2to5/bsp_3_x.cbp Normal file
View file

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<CodeBlocks_project_file>
<FileVersion major="1" minor="6" />
<Project>
<Option title="bsp_3_x" />
<Option pch_mode="2" />
<Option compiler="gcc" />
<Build>
<Target title="Debug">
<Option output="bin/Debug/bsp_3_x" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Debug/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-g" />
</Compiler>
</Target>
<Target title="Release">
<Option output="bin/Release/bsp_3_x" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Release/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-O2" />
</Compiler>
<Linker>
<Add option="-s" />
</Linker>
</Target>
</Build>
<Compiler>
<Add option="-Wall" />
<Add option="-fexceptions" />
</Compiler>
<Unit filename="bsp_3_lib_bench.cpp" />
<Unit filename="bsp_3_lib_bench.h" />
<Unit filename="main.cpp" />
<Extensions>
<lib_finder disable_auto="1" />
</Extensions>
</Project>
</CodeBlocks_project_file>

17
BSP_3_2to5/main.cpp Normal file
View file

@ -0,0 +1,17 @@
#include "bsp_3_lib_bench.h"
#include <iostream>
using namespace std;
int main()
{
benchmark_A(25*1e7,25);
benchmark_A_kahan(25*1e7,25);
benchmark_A_norm(25*1e7,25);
benchmark_B(8000,8000,75);
benchmark_B_column(8000,8000,75);
benchmark_C(1000,1000,1000,3);
benchmark_D(1e4,1e5,5);
return 0;
}