124 lines
4.2 KiB
C++
124 lines
4.2 KiB
C++
#include "mylib.h"
|
|
#include <cassert>
|
|
#include <chrono> // timing
|
|
#include <cmath> // sqrt()
|
|
#include <cstdlib> // atoi()
|
|
#include <cstring> // strncmp()
|
|
#include <ctime>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
using namespace std;
|
|
using namespace std::chrono; // timing
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
int const NLOOPS = 50; // chose a value such that the benchmark runs at least 10 sec.
|
|
unsigned int N = 50000001;
|
|
//##########################################################################
|
|
// Read Paramater from command line (C++ style)
|
|
cout << "Checking command line parameters for: -n <number> " << endl;
|
|
for (int i = 1; i < argc; i++)
|
|
{
|
|
cout << " arg[" << i << "] = " << argv[i] << endl;
|
|
if (std::strncmp(argv[i], "-n", 2) == 0 && i + 1 < argc) // found "-n" followed by another parameter
|
|
{
|
|
N = static_cast<unsigned int>(atoi(argv[i + 1]));
|
|
}
|
|
else
|
|
{
|
|
cout << "Corect call: " << argv[0] << " -n <number>\n";
|
|
}
|
|
}
|
|
|
|
cout << "\nN = " << N << endl;
|
|
|
|
//##########################################################################
|
|
// Memory allocation
|
|
cout << "Memory allocation\n";
|
|
|
|
vector<double> x(N), y(N);
|
|
|
|
cout.precision(2);
|
|
cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
|
cout.precision(6);
|
|
|
|
//##########################################################################
|
|
// Data initialization
|
|
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
|
|
for (unsigned int i = 0; i < N; ++i)
|
|
{
|
|
x[i] = i + 1;
|
|
y[i] = 1.0 / pow(x[i], 2);
|
|
}
|
|
|
|
//##########################################################################
|
|
cout << "\nStart Benchmarking Normal sum\n";
|
|
|
|
// Do calculation
|
|
auto t1 = system_clock::now(); // start timer
|
|
double sk1(0.0),ss(0.0);
|
|
for (int i = 0; i < NLOOPS; ++i)
|
|
{
|
|
sk1 = normal_sum(y);
|
|
ss += sk1; // prevents the optimizer from removing unused calculation results.
|
|
}
|
|
|
|
auto t2 = system_clock::now(); // stop timer
|
|
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
|
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
|
t_diff = t_diff/NLOOPS;
|
|
|
|
|
|
// Print result
|
|
printf("\nSum = %.16f\n", sk1);
|
|
|
|
//##########################################################################
|
|
|
|
// Timings and Performance
|
|
cout << endl;
|
|
cout.precision(2);
|
|
cout << "Timing in sec. : " << t_diff << endl;
|
|
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
|
|
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
|
|
|
|
//##########################################################################
|
|
|
|
cout << "\nStart Benchmarking Kahan summation\n";
|
|
|
|
// Do calculation
|
|
t1 = system_clock::now(); // start timer
|
|
double sk2(0.0),sss(0.0);
|
|
for (int i = 0; i < NLOOPS; ++i)
|
|
{
|
|
sk2 = Kahan_skalar(y);
|
|
sss += sk2; // prevents the optimizer from removing unused calculation results.
|
|
}
|
|
|
|
t2 = system_clock::now(); // stop timer
|
|
duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
|
t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
|
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
|
// duration per loop seconds
|
|
|
|
// Print result
|
|
printf("\nSum = %.16f\n", sk2);
|
|
|
|
|
|
//##########################################################################
|
|
|
|
// Timings and Performance
|
|
cout << endl;
|
|
cout.precision(2);
|
|
cout << "Timing in sec. : " << t_diff << endl;
|
|
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
|
|
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
|
|
|
|
//##########################################################################
|
|
|
|
// Print limit
|
|
printf("\nLimit = %.16f\n\n", pow(M_PI,2) / 6.0f);
|
|
|
|
//##########################################################################
|
|
|
|
return 0;
|
|
} // memory for x and y will be deallocated by their destructors
|