sheet 5 not tested

This commit is contained in:
Markus Schmidt 2025-12-02 20:28:11 +01:00
commit 64c7aed176
169 changed files with 225337 additions and 0 deletions

59
sheet5/1/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,59 @@
{
"files.associations": {
"iostream": "cpp",
"cmath": "cpp",
"array": "cpp",
"atomic": "cpp",
"bit": "cpp",
"cctype": "cpp",
"charconv": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"compare": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"list": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"optional": "cpp",
"random": "cpp",
"ratio": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"format": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"new": "cpp",
"numbers": "cpp",
"ostream": "cpp",
"span": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"typeinfo": "cpp",
"variant": "cpp"
}
}

2877
sheet5/1/Doxyfile Normal file

File diff suppressed because it is too large Load diff

View file

30
sheet5/1/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp mylib.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

99
sheet5/1/check_env.h Normal file
View file

@ -0,0 +1,99 @@
#pragma once
#include <iostream>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <unordered_map>
//#####################################
// G.Haase
// See https://sourceforge.net/p/predef/wiki/Compilers/
// http://www.cplusplus.com/doc/tutorial/preprocessor/
// also: export OMP_DISPLAY_ENV=VERBOSE
//#####################################
/** Checks for compilers, its versions, threads etc.
*
@param[in] argc number of command line arguemnts
@param[in] argv command line arguments as array of C-strings
*/
template <class T>
void check_env(T argc, char const *argv[])
{
std::cout << "\n#######################################################################\n";
std::cout << "Code :";
for (T k = 0; k < argc; ++k) std::cout << " " << argv[k];
std::cout << std::endl;
// compiler: https://sourceforge.net/p/predef/wiki/Compilers/
std::cout << "Compiler: ";
#if defined __INTEL_COMPILER
#pragma message(" ########## INTEL ###############")
std::cout << "INTEL " << __INTEL_COMPILER;
// Ignore warnings for #pragma acc unrecognice
#pragma warning disable 161
// Ignore warnings for #pragma omp unrecognice
#pragma warning disable 3180
#elif defined __PGI
#pragma message(" ########## PGI ###############")
std::cout << "PGI " << __PGIC__ << "." << __PGIC_MINOR__ << "." << __PGIC_PATCHLEVEL__;
#elif defined __clang__
#pragma message(" ########## CLANG ###############")
std::cout << "CLANG " << __clang_major__ << "." << __clang_minor__ << "."; // << __clang_patchlevel__;
#elif defined __GNUC__
#pragma message(" ########## Gnu ###############")
std::cout << "Gnu " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__;
#else
#pragma message(" ########## unknown Compiler ###############")
std::cout << "unknown";
#endif
std::cout << " C++ standard: " << __cplusplus << std::endl;
// Parallel environments
std::cout << "Parallel: ";
#if defined MPI_VERSION
#pragma message(" ########## MPI ###############")
#ifdef OPEN_MPI
std::cout << "OpenMPI ";
#else
std::cout << "MPI ";
#endif
std::cout << MPI_VERSION << "." << MPI_SUBVERSION << " ";
#endif
#ifdef _OPENMP
//https://www.openmp.org/specifications/
//https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
std::unordered_map<unsigned, std::string> const map{
{200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0"}, {201811, "5.0"}};
#pragma message(" ########## OPENMP ###############")
//std::cout << _OPENMP;
std::cout << "OpenMP ";
try {
std::cout << map.at(_OPENMP);
}
catch (...) {
std::cout << _OPENMP;
}
#pragma omp parallel
{
#pragma omp master
{
const int nn = omp_get_num_threads(); // OpenMP
std::cout << " ---> " << nn << " Threads ";
}
#pragma omp barrier
}
#endif
#ifdef _OPENACC
#pragma message(" ########## OPENACC ###############")
std::cout << "OpenACC ";
#endif
std::cout << std::endl;
std::cout << "Date : " << __DATE__ << " " << __TIME__;
std::cout << "\n#######################################################################\n";
}
// HG

View file

BIN
sheet5/1/main.GCC_ Executable file

Binary file not shown.

203
sheet5/1/main.cpp Normal file
View file

@ -0,0 +1,203 @@
#include "check_env.h"
#include "mylib.h"
#include <cstdlib> // atoi()
#include <cstring> // strncmp()
#include <ctime>
#include <iostream>
#include <omp.h> // OpenMP
#include <sstream>
#include <string>
#include <cmath>
using namespace std;
void benchmark(vector<double> &x, vector<double> &y, unsigned int N, unsigned int NLOOPS)
{
double sk = 0.0;
for (int i = 0; i < NLOOPS; ++i)
{
sk += scalar(x, y);
// or scalar_trans(x,y) / norm(x) if you want
}
}
int main(int argc, char const *argv[])
{
//int const NLOOPS = 5; // chose a value such that the benchmark runs at least 10 sec.
unsigned int N = 5000001;
int const NLOOPS = 5; // chose a value such that the benchmark runs at least 10 sec.
//unsigned int N = 5000001;
//##########################################################################
// Read Parameter from command line (C++ style)
cout << "Checking command line parameters for: -n <number> " << endl;
for (int i = 1; i < argc; i++)
{
cout << " arg[" << i << "] = " << argv[i] << endl;
string ss(argv[i]);
if ("-n"==ss && i + 1 < argc) // found "-n" followed by another parameter
{
N = static_cast<unsigned int>(atoi(argv[i + 1]));
}
else
{
cout << "Corect call: " << argv[0] << " -n <number>\n";
}
}
cout << "\nN = " << N << endl;
check_env(argc, argv);
//########################################################################
int nthreads; // OpenMP
#pragma omp parallel default(none) shared(cout,nthreads)
{
int const th_id = omp_get_thread_num(); // OpenMP
int const nthrds = omp_get_num_threads(); // OpenMP
stringstream ss;
ss << "C++: Hello World from thread " << th_id << " / " << nthrds << endl;
#pragma omp critical
{
cout << ss.str(); // output to a shared ressource
}
#pragma omp master
nthreads = nthrds; // transfer nn to to master thread
}
cout << " " << nthreads << " threads have been started." << endl;
//##########################################################################
// Memory allocation
cout << "Memory allocation\n";
vector<double> x(N), y(N);
cout.precision(2);
cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
cout.precision(6);
//##########################################################################
// Data initialization
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
for (unsigned int i = 0; i < N; ++i)
{
x[i] = i + 1;
y[i] = 1.0 / x[i];
}
//##########################################################################
cout << "\nStart Benchmarking\n";
// Do calculation
double tstart = omp_get_wtime(); // OpenMP
double sk(0.0);
for (int i = 0; i < NLOOPS; ++i)
{
sk = scalar(x, y);
sk = scalar_trans(x, y);
//sk = norm(x);
}
double t1 = omp_get_wtime() - tstart; // OpenMP
t1 /= NLOOPS; // divide by number of function calls
//##########################################################################
// Check the correct result
cout << "\n <x,y> = " << sk << endl;
if (static_cast<unsigned int>(sk) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "Timing in sec. : " << t1 << endl;
cout << "GFLOPS : " << 2.0 * N / t1 / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << 2.0 * N / t1 / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
//#########################################################################
cout << "\n Try the reduction with an STL-vektor \n";
auto vr = reduction_vec(100);
cout << "done\n";
cout << vr << endl;
N=2;
//Data (re-)inizialiion
for (unsigned int i = 0; i < N; ++i)
{
x[i] = i + 1;
y[i] = 1.0 / x[i];
}
int proc_count = omp_get_num_procs();
cout << "Number of available processors: " << proc_count << endl;
for(int j=1; j<=proc_count; j++)
{
omp_set_num_threads(j);
cout << "used threads: "<< j << endl;
omp_set_schedule(omp_sched_static, 0);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
cout << "static (chunk 0) "<< (t1-tstart) << endl;
for(int i=0; i<= 5; i++)
{
int chunk = 1 << i;
cout << "chunk size: "<< chunk << endl;
// STATIC
omp_set_schedule(omp_sched_static, chunk);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
std::cout << "static: " << (t1 - tstart) << " s\n";
// DYNAMIC
omp_set_schedule(omp_sched_dynamic, chunk);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
std::cout << "dynamic: " << (t1 - tstart) << " s\n";
// GUIDED
omp_set_schedule(omp_sched_guided, chunk);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
std::cout << "guided: " << (t1 - tstart) << " s\n";
// AUTO
omp_set_schedule(omp_sched_auto, chunk);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
std::cout << "auto: " << (t1 - tstart) << " s\n";
cout << endl;
}
cout << endl;
}
cout << scalar_parrallel_env(x,y) << endl;
vector<int> vec = reduction_vec_append(N);
for(int i=0; i< N; i++)
{
cout << vec[i] << ", ";
}
return 0;
} // memory for x and y will be deallocated their destructors

View file

BIN
sheet5/1/main.o Normal file

Binary file not shown.

136
sheet5/1/mylib.cpp Normal file
View file

@ -0,0 +1,136 @@
#include "mylib.h"
#include <cassert> // assert()
#include <cmath>
#include <iostream>
#include <functional> // multiplies<>{}
#include <list>
#include <numeric> // iota()
#ifdef _OPENMP
#include <omp.h>
#endif
#include <vector>
using namespace std;
double scalar(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
size_t const N = x.size();
double sum = 0.0;
if (!omp_in_parallel())
{
// Safe to start a parallel region
#pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum) schedule(runtime)
for (size_t i = 0; i < N; ++i)
sum += x[i] * y[i];
}
else
{
// Already inside parallel region: do it sequentially to avoid nested parallelism
for (size_t i = 0; i < N; ++i)
sum += x[i] * y[i];
}
return sum;
}
double scalar_parrallel_env(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
size_t const N = x.size();
double sum = 0.0;
// Safe to start a parallel region
#pragma omp parallel default(none) shared(x,y,N,cout) reduction(+:sum)
{
int tid = omp_get_thread_num();
int threadCount = omp_get_num_threads();
cout << threadCount << endl;
for (size_t i = tid*N/threadCount; i < tid*(N + 1)/threadCount; ++i)
{
sum += x[i] * y[i];
}
}
return sum;
}
double norm(vector<double> const &x)
{
size_t const N = x.size();
double sum = 0.0;
#pragma omp parallel for default(none) shared(x,N) reduction(+:sum)
for (size_t i = 0; i < N; ++i)
{
sum += x[i]*x[i];
}
return sum;
}
vector<int> reduction_vec(int n)
{
vector<int> vec(n);
#pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec)
{
#pragma omp barrier
#pragma omp critical
cout << omp_get_thread_num() << " : " << vec.size() << endl;
#pragma omp barrier
iota( vec.begin(),vec.end(), omp_get_thread_num() );
#pragma omp barrier
}
return vec;
}
vector<int> reduction_vec_append(int n)
{
vector<int> vec(n);
#pragma omp parallel default(none) shared(cout,n) reduction(VecAppend:vec)
{
int tid = omp_get_thread_num();
vector<int> local(n);
iota(local.begin(), local.end(), tid);
#pragma omp critical
cout << tid << " : " << local.size() << endl;
vec = local;
}
return vec;
}
double scalar_trans(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
vector<double> z(x.size());
//list<double> z(x.size()); // parallel for-loop on iterators not possible (missing 'operator-')
// c++-20 CLANG_, ONEAPI_:condition of OpenMP for loop must be a relational comparison
transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{});
double sum = 0.0;
#pragma omp parallel for default(none) shared(z) reduction(+:sum)
for (auto pi = cbegin(z); pi!=cend(z); ++pi)
{
sum += *pi;
}
//for (auto val: z)
//{
//sum += val;
//}
return sum;
}

View file

86
sheet5/1/mylib.h Normal file
View file

@ -0,0 +1,86 @@
#pragma once
#include <cassert>
#include <iomanip> // setw()
#include <iostream>
#include <omp.h>
#include <vector>
using namespace std;
/** Inner product
@param[in] x vector
@param[in] y vector
@return resulting Euclidian inner product <x,y>
*/
double scalar(std::vector<double> const &x, std::vector<double> const &y);
double scalar_trans(std::vector<double> const &x, std::vector<double> const &y);
double scalar_parrallel_env(std::vector<double> const &x, std::vector<double> const &y);
/** l2-norm
@param[in] x vector
@return resulting Euclidian norm
*/
double norm(std::vector<double> const &x);
/** Vector @p b adds its elements to vector @p a .
@param[in] a vector
@param[in] b vector
@return a+=b componentwise
*/
template<class T>
std::vector<T> &operator+=(std::vector<T> &a, std::vector<T> const &b)
{
assert(a.size()==b.size());
for (size_t k = 0; k < a.size(); ++k) {
a[k] += b[k];
}
return a;
}
// Declare the reduction operation in OpenMP for an STL-vector
// omp_out += omp_in requires operator+=(vector<int> &, vector<int> const &) from above
// ------------------------------------------------------------
// https://scc.ustc.edu.cn/zlsc/tc4600/intel/2016.0.109/compiler_c/common/core/GUID-7312910C-D175-4544-99C5-29C12D980744.htm
// https://gist.github.com/eruffaldi/7180bdec4c8c9a11f019dd0ba9a2d68c
// https://stackoverflow.com/questions/29633531/user-defined-reduction-on-vector-of-varying-size
// see also p.74ff in https://www.fz-juelich.de/ias/jsc/EN/AboutUs/Staff/Hagemeier_A/docs-parallel-programming/OpenMP-Slides.pdf
#pragma omp declare reduction(VecAdd : std::vector<int> : omp_out += omp_in) \
initializer (omp_priv=omp_orig)
#pragma omp declare reduction(VecAppend: std::vector<int> : omp_out.insert(omp_out.end(), omp_in.begin(), omp_in.end())) \
initializer (omp_priv=vector<int>())
// Templates are n o t possible, i.e. the reduction has to be declared fore a specified type.
//template <class T>
//#pragma omp declare reduction(VecAdd : std::vector<T> : omp_out += omp_in) initializer (omp_priv(omp_orig))
// MS: template nach #pragma !?
// ------------------------------------------------------------
/** Test for vector reduction.
*
* The thread-private vectors of size @p n are initialized via @f$v_k^{tID}=tID+k@f$.
* Afterwards these vectors are accumulated, i.e.,
* @f$v_k= \sum_{tID=0}^{numThreads} v_k^{tID}@f$.
*
* @param[in] n size of global/private vector
* @return resulting global vector.
*/
std::vector<int> reduction_vec(int n);
std::vector<int> reduction_vec_append(int n);
/** Output of a vector.
@param[in,out] s output stream
@param[in] x vector
@return modified output stream
*/
template <class T>
std::ostream &operator<<(std::ostream &s, std::vector<T> const &x)
{
for (auto const &v : x) s << std::setw(4) << v << " ";
return s;
}

View file

BIN
sheet5/1/mylib.o Normal file

Binary file not shown.

70
sheet5/1/timing.h Normal file
View file

@ -0,0 +1,70 @@
#pragma once
#include <chrono> // timing
#include <stack>
using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
//using Clock = std::chrono::high_resolution_clock;
using TPoint= std::chrono::time_point<Clock>;
// [Galowicz, C++17 STL Cookbook, p. 29]
inline
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
/** Starts stopwatch timer.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
*
* The timining is allowed to be nested and the recent time is stored on top of the stack.
*
* @return recent time
* @see toc
*/
inline auto tic()
{
MyStopWatch.push(Clock::now());
return MyStopWatch.top();
}
/** Returns the elapsed time from stopwatch.
*
* The time from top of the stack is used
* if time point @p t_b is not passed as input parameter.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
* The last option is to be used in the case of
* non-nested but overlapping time measurements.
*
* @param[in] t_b start time of some stop watch
* @return elapsed time in seconds.
*
*/
inline double toc(TPoint const &t_b = MyStopWatch.top())
{
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
using Unit = std::chrono::seconds;
using FpSeconds = std::chrono::duration<double, Unit::period>;
auto t_e = Clock::now();
MyStopWatch.pop();
return FpSeconds(t_e-t_b).count();
}
#include <iostream>
#include <string>
/** Executes function @p f and measures/prints elapsed wall clock time in seconds
*
* Call as
* @code measure("Time for (b = b + 1)", [&]() {
thrust::transform(b.begin(), b.end(), b.begin(), increment());
}); @endcode
*
* @param[in] label additional string to be printed with the measurement.
* @param[in] f function to execute.
* @author Therese Bösmüller, 2025
*
*/
auto measure = [](const std::string& label, auto&& f) {
auto start = std::chrono::high_resolution_clock::now();
f();
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
std::cout << label << ": " << duration << " microseconds" << std::endl;
}; // ';' is needed for a visible documentation of this lambda-function

View file