This commit is contained in:
dino.celebic 2025-11-11 15:50:51 +01:00
commit 3882aee07a
71 changed files with 160045 additions and 0 deletions

2877
ex3/seq/skalar/Doxyfile Normal file

File diff suppressed because it is too large Load diff

32
ex3/seq/skalar/Makefile Normal file
View file

@ -0,0 +1,32 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp mylib.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -O3 -ftree-vectorize -fopt-info-vec-missed -fopt-info-vec-optimized
CXXFLAGS += -mavx2 -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store
#-funroll-loops
LINKFLAGS += -g -ltbb
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

97
ex3/seq/skalar/c_main.cpp Normal file
View file

@ -0,0 +1,97 @@
#include "mylib.h"
#include <chrono> // timing
#include <cstdlib> // atoi()
#include <cstring> // strncmp()
#include <iostream>
#include <sstream>
using namespace std;
using namespace std::chrono; // timing
int main(int argc, char **argv)
{
int const NLOOPS = 50; // chose a value such that the benchmark runs at least 10 sec.
unsigned int N = 50000001;
//##########################################################################
// Read Paramater from command line (C++ style)
cout << "Checking command line parameters for: -n <number> " << endl;
for (int i = 1; i < argc; i++)
{
cout << " arg[" << i << "] = " << argv[i] << endl;
if (std::strncmp(argv[i], "-n", 2) == 0 && i + 1 < argc) // found "-n" followed by another parameter
{
N = static_cast<unsigned int>(atoi(argv[i + 1]));
}
else
{
cout << "Corect call: " << argv[0] << " -n <number>\n";
}
}
cout << "\nN = " << N << endl;
//##########################################################################
// Memory allocation
cout << "Memory allocation\n";
double *x, *y;
x = new double [N];
y = new double [N];
cout.precision(2);
cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
cout.precision(6);
//##########################################################################
// Data initialization
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
for (unsigned int i = 0; i < N; ++i)
{
x[i] = i + 1;
y[i] = 1.0 / x[i];
}
//##########################################################################
cout << "\nStart Benchmarking\n";
auto tstart = system_clock::now(); // start timer
// Do calculation
double sk(0.0);
for (int i = 0; i < NLOOPS; ++i)
{
sk = scalar(N, x, y);
// sk = norm(N,x);
}
auto tend = system_clock::now(); // end timer
auto duration = duration_cast<microseconds>(tend - tstart);
auto t1 = static_cast<double>(duration.count()) / 1e6 ; // t1 in seconds
t1 /= NLOOPS; // divide by number of function calls
//##########################################################################
// Check the correct result
cout << "\n <x,y> = " << sk << endl;
if (static_cast<unsigned int>(sk) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "Timing in sec. : " << t1 << endl;
cout << "GFLOPS : " << 2.0 * N / t1 / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << 2.0 * N / t1 / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
//##########################################################################
// Free allocated memory
delete [] y;
delete [] x;
return 0;
}

View file

@ -0,0 +1 @@

105
ex3/seq/skalar/main.cpp Normal file
View file

@ -0,0 +1,105 @@
#include "mylib.h"
#include <chrono> // timing
#include <cstdlib> // atoi()
#include <cstring> // strncmp()
#include <execution> // policy
#include <iostream>
#include <numeric> // transform_reduce
#include <sstream>
#include <vector>
using namespace std;
using namespace std::chrono; // timing
int main(int argc, char **argv)
{
int const NLOOPS = 50; // chose a value such that the benchmark runs at least 10 sec.
unsigned int N = 50000001;
//##########################################################################
// Read Paramater from command line (C++ style)
cout << "Checking command line parameters for: -n <number> " << endl;
for (int i = 1; i < argc; i++)
{
cout << " arg[" << i << "] = " << argv[i] << endl;
if (std::strncmp(argv[i], "-n", 2) == 0 && i + 1 < argc) // found "-n" followed by another parameter
{
N = static_cast<unsigned int>(atoi(argv[i + 1]));
}
else
{
cout << "Corect call: " << argv[0] << " -n <number>\n";
}
}
cout << "\nN = " << N << endl;
//##########################################################################
// Memory allocation
cout << "Memory allocation\n";
//double *x = new double [N];
//double *y = new double [N];
vector<double> x(N);
vector<double> y(N);
//alignas(16) double x[N], y[N];
cout.precision(2);
cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GiByte Memory allocated\n";
cout.precision(6);
//##########################################################################
// Data initialization
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
for (unsigned int i = 0; i < N; ++i)
{
x[i] = i + 1;
y[i] = 1.0 / x[i];
}
//##########################################################################
cout << "\nStart Benchmarking\n";
auto tstart = system_clock::now(); // start timer
// Do calculation
double sk(0.0);
for (int i = 0; i < NLOOPS; ++i)
{
//sk = scalar(N, x, y);
sk += scalar_unroll(N, x.data(), y.data());
//sk += transform_reduce(std::execution::par_unseq,cbegin(x),cend(x),cbegin(y),0.0);
// sk = norm(N,x);
}
auto tend = system_clock::now(); // end timer
auto duration = duration_cast<microseconds>(tend - tstart);
auto t1 = static_cast<double>(duration.count()) / 1e6 ; // t1 in seconds
t1 /= NLOOPS; // divide by number of function calls
//##########################################################################
// Check the correct result
sk /= NLOOPS;
cout << "\n <x,y> = " << sk << endl;
if (static_cast<unsigned int>(sk) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "Timing in sec. : " << t1 << endl;
cout << "GFLOPS : " << 2.0 * N / t1 / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << 2.0 * N / t1 / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
//##########################################################################
// Free allocated memory
//delete [] y;
//delete [] x;
return 0;
}

56
ex3/seq/skalar/mylib.cpp Normal file
View file

@ -0,0 +1,56 @@
#include "mylib.h"
#include <cassert> // assert()
#include <cmath>
#include <cstdlib> // alignof()
//#include <iostream>
#include <numeric>
#include <vector>
using namespace std;
double scalar(unsigned int const N, double const x[], double const y[])
{
double sum = 0.0;
for (unsigned int i = 0; i < N; ++i)
{
sum += x[i] * y[i];
// sum += exp(x[i])*log(y[i]);
}
return sum;
}
double scalar_unroll(unsigned int const N, double const x[], double const y[])
{
constexpr unsigned int Stride{8};
alignas(32) double sk[Stride] = {0.0};
assert(alignof(sk)==32);
assert(alignof(x)==8);
assert(alignof(y)==8);
for (unsigned int i = 0; i < (N/Stride)*Stride; i+=Stride)
{
for (unsigned int k=0; k<Stride; ++k)
{
sk[k] += x[i+k] * y[i+k];
}
}
double sum = std::accumulate(sk, sk+Stride,0.0);
for (unsigned int i = (N/Stride)*Stride; i < N; ++i)
{
sum += x[i]*y[i];
}
return sum;
}
double norm(unsigned int const N, double const x[])
{
double sum = 0.0;
for (unsigned int i = 0; i < N; ++i)
{
sum += x[i] * x[i];
}
return std::sqrt(sum);
}

17
ex3/seq/skalar/mylib.h Normal file
View file

@ -0,0 +1,17 @@
#pragma once
/** Inner product
@param[in] N number of vector elements
@param[in] x vector
@param[in] y vector
@return resulting Euclidian inner product <x,y>
*/
double scalar(unsigned int N, double const x[], double const y[]);
double scalar_unroll(unsigned int const N, double const x[], double const y[]);
/** L_2 Norm of a vector
@param[in] N number of vector elements
@param[in] x vector
@return resulting Euclidian norm <x,y>
*/
double norm(unsigned int N, double const x[]);

File diff suppressed because it is too large Load diff