sheet 5 not tested

2025-12-02 20:28:11 +01:00 · 2025-12-02 20:28:11 +01:00 · 64c7aed176
commit 64c7aed176
parent 9c16d18f8f
169 changed files with 225337 additions and 0 deletions
--- a/sheet1/F/main.GCC_
+++ b/sheet1/F/main.GCC_
--- a/sheet1/F/main.o
+++ b/sheet1/F/main.o
--- a/sheet5/1/.vscode/settings.json
+++ b/sheet5/1/.vscode/settings.json
@ -0,0 +1,59 @@
+{
+    "files.associations": {
+        "iostream": "cpp",
+        "cmath": "cpp",
+        "array": "cpp",
+        "atomic": "cpp",
+        "bit": "cpp",
+        "cctype": "cpp",
+        "charconv": "cpp",
+        "chrono": "cpp",
+        "clocale": "cpp",
+        "compare": "cpp",
+        "concepts": "cpp",
+        "cstdarg": "cpp",
+        "cstddef": "cpp",
+        "cstdint": "cpp",
+        "cstdio": "cpp",
+        "cstdlib": "cpp",
+        "cstring": "cpp",
+        "ctime": "cpp",
+        "cwchar": "cpp",
+        "cwctype": "cpp",
+        "deque": "cpp",
+        "list": "cpp",
+        "string": "cpp",
+        "unordered_map": "cpp",
+        "vector": "cpp",
+        "exception": "cpp",
+        "algorithm": "cpp",
+        "functional": "cpp",
+        "iterator": "cpp",
+        "memory": "cpp",
+        "memory_resource": "cpp",
+        "numeric": "cpp",
+        "optional": "cpp",
+        "random": "cpp",
+        "ratio": "cpp",
+        "string_view": "cpp",
+        "system_error": "cpp",
+        "tuple": "cpp",
+        "type_traits": "cpp",
+        "utility": "cpp",
+        "format": "cpp",
+        "initializer_list": "cpp",
+        "iomanip": "cpp",
+        "iosfwd": "cpp",
+        "istream": "cpp",
+        "limits": "cpp",
+        "new": "cpp",
+        "numbers": "cpp",
+        "ostream": "cpp",
+        "span": "cpp",
+        "sstream": "cpp",
+        "stdexcept": "cpp",
+        "streambuf": "cpp",
+        "typeinfo": "cpp",
+        "variant": "cpp"
+    }
+}
--- a/sheet5/1/Doxyfile
+++ b/sheet5/1/Doxyfile
--- a/sheet5/1/Doxyfile:Zone.Identifier
+++ b/sheet5/1/Doxyfile:Zone.Identifier
--- a/sheet5/1/Makefile
+++ b/sheet5/1/Makefile
@ -0,0 +1,30 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+# use Intel compilers
+#COMPILER=ICC_
+
+# use PGI compilers
+# COMPILER=PGI_
+
+
+SOURCES = main.cpp mylib.cpp
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= main.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+LINKFLAGS += -g
+# do not use -pg with PGI compilers
+
+ifndef COMPILER
+  COMPILER=GCC_
+endif
+
+include ../${COMPILER}default.mk
--- a/sheet5/1/Makefile:Zone.Identifier
+++ b/sheet5/1/Makefile:Zone.Identifier
--- a/sheet5/1/check_env.h
+++ b/sheet5/1/check_env.h
@ -0,0 +1,99 @@
+#pragma once
+
+#include <iostream>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+#include <unordered_map>
+
+//#####################################
+// G.Haase
+// See https://sourceforge.net/p/predef/wiki/Compilers/
+//     http://www.cplusplus.com/doc/tutorial/preprocessor/
+//  also:  export OMP_DISPLAY_ENV=VERBOSE
+//#####################################
+/** 	Checks for compilers, its versions, threads etc.
+ * 
+	@param[in] argc	number of command line arguemnts
+	@param[in] argv	command line arguments as array of C-strings
+*/
+template <class T>
+void check_env(T argc, char const *argv[])
+{
+    std::cout << "\n#######################################################################\n";
+    std::cout << "Code    :";
+    for (T k = 0; k < argc; ++k) std::cout << "  " << argv[k];
+    std::cout << std::endl;
+
+    // compiler:      https://sourceforge.net/p/predef/wiki/Compilers/
+    std::cout <<    "Compiler:  ";
+#if defined __INTEL_COMPILER
+#pragma message(" ##########  INTEL  ###############")
+    std::cout << "INTEL " << __INTEL_COMPILER;
+    // Ignore warnings for #pragma acc   unrecognice
+#pragma warning disable 161
+    // Ignore warnings for #pragma omp   unrecognice
+#pragma warning disable 3180
+
+#elif defined __PGI
+#pragma message(" ##########  PGI    ###############")
+    std::cout << "PGI " << __PGIC__ << "." << __PGIC_MINOR__ << "." << __PGIC_PATCHLEVEL__;
+#elif defined  __clang__
+#pragma message(" ##########  CLANG    ###############")
+    std::cout << "CLANG " << __clang_major__ << "." << __clang_minor__ << "."; // << __clang_patchlevel__;
+#elif defined __GNUC__
+#pragma message(" ##########  Gnu    ###############")
+    std::cout << "Gnu " <<  __GNUC__  << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__;
+#else
+#pragma message(" ##########  unknown Compiler   ###############")
+    std::cout << "unknown";
+#endif
+    std::cout << "  C++ standard: " << __cplusplus << std::endl;
+
+    // Parallel environments
+    std::cout <<    "Parallel:  ";
+#if defined MPI_VERSION
+#pragma message(" ##########  MPI    ###############")
+#ifdef OPEN_MPI
+    std::cout << "OpenMPI ";
+#else
+    std::cout << "MPI ";
+#endif
+    std::cout << MPI_VERSION << "." << MPI_SUBVERSION << "   ";
+#endif
+
+#ifdef _OPENMP
+//https://www.openmp.org/specifications/
+//https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
+    std::unordered_map<unsigned, std::string> const map{
+        {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0"}, {201811, "5.0"}};
+#pragma message(" ##########  OPENMP    ###############")
+    //std::cout << _OPENMP;
+    std::cout << "OpenMP ";
+    try {
+        std::cout << map.at(_OPENMP);
+    }
+    catch (...) {
+        std::cout << _OPENMP;
+    }
+    #pragma omp parallel
+    {
+        #pragma omp master
+        {
+            const int nn = omp_get_num_threads();          // OpenMP
+            std::cout << " ---> " <<  nn << " Threads   ";
+        }
+        #pragma omp barrier
+    }
+
+#endif
+#ifdef _OPENACC
+#pragma message(" ##########  OPENACC    ###############")
+    std::cout << "OpenACC   ";
+#endif
+    std::cout << std::endl;
+    std::cout << "Date    :  " << __DATE__ << "  " << __TIME__;
+    std::cout << "\n#######################################################################\n";
+}
+// HG
+
--- a/sheet5/1/check_env.h:Zone.Identifier
+++ b/sheet5/1/check_env.h:Zone.Identifier
--- a/sheet5/1/main.GCC_
+++ b/sheet5/1/main.GCC_
--- a/sheet5/1/main.cpp
+++ b/sheet5/1/main.cpp
@ -0,0 +1,203 @@
+#include "check_env.h"
+#include "mylib.h"
+#include <cstdlib>          // atoi()
+#include <cstring>          // strncmp()
+#include <ctime>
+#include <iostream>
+#include <omp.h>            // OpenMP
+#include <sstream>
+#include <string>
+#include <cmath>
+using namespace std;
+
+void benchmark(vector<double> &x, vector<double> &y, unsigned int N, unsigned int NLOOPS)
+{
+    double sk = 0.0;
+    for (int i = 0; i < NLOOPS; ++i)
+    {
+        sk += scalar(x, y);
+        // or scalar_trans(x,y) / norm(x) if you want
+    }
+}
+
+int main(int argc, char const *argv[])
+{
+    //int const NLOOPS = 5;        // chose a value such that the benchmark runs at least 10 sec.
+    unsigned int N = 5000001;
+    int const NLOOPS = 5;        // chose a value such that the benchmark runs at least 10 sec.
+    //unsigned int N = 5000001;    
+//##########################################################################
+//   Read Parameter from command line  (C++ style)
+    cout << "Checking command line parameters for: -n <number> " << endl;
+    for (int i = 1; i < argc; i++)
+    {
+        cout << " arg[" << i << "] = " << argv[i] << endl;
+        string ss(argv[i]);
+        if ("-n"==ss && i + 1 < argc) // found "-n" followed by another parameter
+        {
+            N = static_cast<unsigned int>(atoi(argv[i + 1]));
+        }
+        else
+        {
+            cout << "Corect call: " << argv[0] << " -n  <number>\n";
+        }
+    }
+
+    cout << "\nN = " << N << endl;
+    
+    check_env(argc, argv);
+//########################################################################
+    int nthreads;                                  // OpenMP
+    #pragma omp parallel default(none) shared(cout,nthreads)
+    {
+        int const th_id  = omp_get_thread_num();   // OpenMP
+        int const nthrds = omp_get_num_threads();  // OpenMP
+        stringstream ss;
+        ss << "C++: Hello World from thread " << th_id << " / " << nthrds << endl;
+        #pragma omp critical
+        {
+            cout << ss.str();                      // output to a shared ressource
+        }
+        #pragma omp master
+        nthreads = nthrds;                         // transfer nn to to master thread
+    }
+    cout << "   " << nthreads << "   threads have been started." << endl;
+
+//##########################################################################
+//  Memory allocation
+    cout << "Memory allocation\n";
+
+    vector<double> x(N), y(N);
+
+    cout.precision(2);
+    cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
+    cout.precision(6);
+
+//##########################################################################
+//  Data initialization
+//  Special:  x_i = i+1;  y_i = 1/x_i  ==> <x,y> == N
+    for (unsigned int i = 0; i < N; ++i)
+    {
+        x[i] = i + 1;
+        y[i] = 1.0 / x[i];
+    }
+
+//##########################################################################
+    cout << "\nStart Benchmarking\n";
+
+// Do calculation
+    double tstart = omp_get_wtime();                  // OpenMP
+
+    double sk(0.0);
+    for (int i = 0; i < NLOOPS; ++i)
+    {
+        sk = scalar(x, y);
+        sk = scalar_trans(x, y);
+        //sk = norm(x);
+    }
+
+    double t1 = omp_get_wtime() - tstart;             // OpenMP
+    t1 /= NLOOPS;           // divide by number of function calls
+
+//##########################################################################
+// Check the correct result
+    cout << "\n <x,y> = " << sk << endl;
+    if (static_cast<unsigned int>(sk) != N)
+    {
+        cout << "  !!   W R O N G  result   !!\n";
+    }
+    cout << endl;
+
+//##########################################################################
+// Timings  and Performance
+    cout << endl;
+    cout.precision(2);
+    cout << "Timing in sec. : " << t1 << endl;
+    cout << "GFLOPS         : " << 2.0 * N / t1 / 1024 / 1024 / 1024 << endl;
+    cout << "GiByte/s        : " << 2.0 * N / t1 / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
+
+//#########################################################################
+
+    cout << "\n  Try the reduction with an STL-vektor \n";
+    
+    auto vr = reduction_vec(100);
+    cout << "done\n";
+    cout << vr << endl;
+
+    N=2;
+    //Data (re-)inizialiion
+    for (unsigned int i = 0; i < N; ++i)
+    {
+        x[i] = i + 1;
+        y[i] = 1.0 / x[i];
+    }
+   
+
+
+   
+
+    int proc_count = omp_get_num_procs();
+    cout << "Number of available processors: " << proc_count << endl;
+
+    for(int j=1; j<=proc_count; j++)
+    {
+        omp_set_num_threads(j);
+        cout << "used threads: "<< j << endl;
+
+
+         omp_set_schedule(omp_sched_static, 0);
+        tstart = omp_get_wtime();
+        benchmark(x, y, N, NLOOPS);
+        t1 = omp_get_wtime()/NLOOPS;
+        cout << "static (chunk 0) "<< (t1-tstart) << endl;
+        for(int i=0; i<= 5; i++)
+        {
+            
+            int chunk = 1 << i;
+            cout << "chunk size: "<< chunk << endl;
+            
+            // STATIC
+            omp_set_schedule(omp_sched_static, chunk);
+            tstart = omp_get_wtime();
+            benchmark(x, y, N, NLOOPS);
+            t1 = omp_get_wtime()/NLOOPS;
+            std::cout << "static:  " << (t1 - tstart) << " s\n";
+
+            // DYNAMIC
+            omp_set_schedule(omp_sched_dynamic, chunk);
+            tstart = omp_get_wtime();
+            benchmark(x, y, N, NLOOPS);
+            t1 = omp_get_wtime()/NLOOPS;
+            std::cout << "dynamic: " << (t1 - tstart) << " s\n";
+
+            // GUIDED
+            omp_set_schedule(omp_sched_guided, chunk);
+            tstart = omp_get_wtime();
+            benchmark(x, y, N, NLOOPS);
+            t1 = omp_get_wtime()/NLOOPS;
+            std::cout << "guided:  " << (t1 - tstart) << " s\n";
+
+            // AUTO
+            omp_set_schedule(omp_sched_auto, chunk);
+            tstart = omp_get_wtime();
+            benchmark(x, y, N, NLOOPS);
+            t1 = omp_get_wtime()/NLOOPS;
+            std::cout << "auto:    " << (t1 - tstart) << " s\n";
+            cout << endl;
+        }
+        cout << endl;
+    }
+
+    
+    cout << scalar_parrallel_env(x,y) << endl;
+
+     
+    
+    vector<int> vec = reduction_vec_append(N);
+    for(int i=0; i< N; i++)
+    {
+        cout << vec[i] << ", ";
+    }
+    return 0;
+}  // memory for x and y will be deallocated their destructors
+
--- a/sheet5/1/main.cpp:Zone.Identifier
+++ b/sheet5/1/main.cpp:Zone.Identifier
--- a/sheet5/1/main.o
+++ b/sheet5/1/main.o
--- a/sheet5/1/mylib.cpp
+++ b/sheet5/1/mylib.cpp
@ -0,0 +1,136 @@
+#include "mylib.h"
+#include <cassert>       // assert()
+#include <cmath>
+#include <iostream>
+#include <functional>    // multiplies<>{}
+#include <list>
+#include <numeric>       // iota()
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+#include <vector>
+using namespace std;
+
+double scalar(vector<double> const &x, vector<double> const &y)
+{
+    assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
+    size_t const N = x.size();
+    double sum = 0.0;
+    if (!omp_in_parallel())
+    {
+        // Safe to start a parallel region
+        #pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum) schedule(runtime)
+        for (size_t i = 0; i < N; ++i)
+            sum += x[i] * y[i];
+    }
+    else
+    {
+        // Already inside parallel region: do it sequentially to avoid nested parallelism
+        for (size_t i = 0; i < N; ++i)
+            sum += x[i] * y[i];
+    }
+    return sum;
+}
+
+
+double scalar_parrallel_env(vector<double> const &x, vector<double> const &y)
+{
+    assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
+    size_t const N = x.size();
+    double sum = 0.0;
+    
+    
+    // Safe to start a parallel region
+    #pragma omp parallel  default(none) shared(x,y,N,cout) reduction(+:sum)
+    {
+        int tid = omp_get_thread_num();
+        int threadCount = omp_get_num_threads();
+        cout << threadCount << endl;
+
+        
+
+
+        for (size_t i = tid*N/threadCount; i < tid*(N + 1)/threadCount; ++i)
+        {
+            sum += x[i] * y[i];
+        }
+    }
+    
+    return sum;
+}
+
+double norm(vector<double> const &x)
+{
+    size_t const N = x.size();
+    double sum = 0.0;
+#pragma omp parallel for default(none) shared(x,N) reduction(+:sum)
+    for (size_t i = 0; i < N; ++i)
+    {
+        sum += x[i]*x[i];
+    }
+    return sum;
+}
+
+
+
+vector<int> reduction_vec(int n)
+{ 
+    vector<int> vec(n);
+#pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec)
+    {
+        #pragma omp barrier
+        #pragma omp critical
+        cout << omp_get_thread_num() << " : " << vec.size() << endl;
+        #pragma omp barrier
+        iota( vec.begin(),vec.end(), omp_get_thread_num() );
+        #pragma omp barrier
+
+    }
+    return vec;
+}
+
+vector<int> reduction_vec_append(int n)
+{
+    vector<int> vec(n);
+    #pragma omp parallel default(none) shared(cout,n) reduction(VecAppend:vec)
+    {
+        int tid = omp_get_thread_num();
+
+        vector<int> local(n);
+        iota(local.begin(), local.end(), tid);
+
+        #pragma omp critical
+        cout << tid << " : " << local.size() << endl;
+
+        vec = local;  
+    }
+
+    return vec;
+}
+
+
+
+double scalar_trans(vector<double> const &x, vector<double> const &y)
+{
+    assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
+    vector<double> z(x.size());
+    //list<double> z(x.size());  // parallel for-loop on iterators not possible (missing 'operator-')
+                                 // c++-20 CLANG_, ONEAPI_:condition of OpenMP for loop must be a relational comparison 
+    
+    transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{});
+    
+    double sum = 0.0;
+#pragma omp parallel for default(none) shared(z) reduction(+:sum)
+    for (auto pi = cbegin(z); pi!=cend(z); ++pi)
+    {
+        sum += *pi;
+    }
+    //for (auto val: z)
+    //{
+        //sum += val;
+    //}    
+    return sum;
+}
+
+
+
--- a/sheet5/1/mylib.cpp:Zone.Identifier
+++ b/sheet5/1/mylib.cpp:Zone.Identifier
--- a/sheet5/1/mylib.h
+++ b/sheet5/1/mylib.h
@ -0,0 +1,86 @@
+#pragma once
+#include <cassert>
+#include <iomanip>             // setw()
+#include <iostream>
+#include <omp.h>
+#include <vector>
+using namespace std;
+
+/** 	Inner product
+	@param[in] x	vector
+	@param[in] y	vector
+	@return 	    resulting Euclidian inner product <x,y>
+*/
+double scalar(std::vector<double> const &x, std::vector<double> const &y);
+double scalar_trans(std::vector<double> const &x, std::vector<double> const &y);
+double scalar_parrallel_env(std::vector<double> const &x, std::vector<double> const &y);
+
+
+/** 	l2-norm
+	@param[in] x	vector
+	@return 	    resulting Euclidian norm
+*/
+double norm(std::vector<double> const &x);
+
+/** 	 Vector @p b adds its elements to vector @p a .
+	@param[in] a	vector
+	@param[in] b	vector
+	@return 	     a+=b componentwise
+*/
+template<class T>
+std::vector<T> &operator+=(std::vector<T> &a, std::vector<T> const &b)
+{
+    assert(a.size()==b.size());
+    for (size_t k = 0; k < a.size(); ++k) {
+        a[k] += b[k];
+    }
+    return a;
+}
+
+// Declare the reduction operation in OpenMP for an STL-vector
+//   omp_out += omp_in  requires operator+=(vector<int> &, vector<int> const &) from above
+// ------------------------------------------------------------
+// https://scc.ustc.edu.cn/zlsc/tc4600/intel/2016.0.109/compiler_c/common/core/GUID-7312910C-D175-4544-99C5-29C12D980744.htm
+// https://gist.github.com/eruffaldi/7180bdec4c8c9a11f019dd0ba9a2d68c
+// https://stackoverflow.com/questions/29633531/user-defined-reduction-on-vector-of-varying-size
+//  see also p.74ff in  https://www.fz-juelich.de/ias/jsc/EN/AboutUs/Staff/Hagemeier_A/docs-parallel-programming/OpenMP-Slides.pdf
+#pragma omp declare reduction(VecAdd : std::vector<int>  : omp_out += omp_in) \
+ initializer (omp_priv=omp_orig)
+
+
+#pragma omp declare reduction(VecAppend: std::vector<int> : omp_out.insert(omp_out.end(), omp_in.begin(), omp_in.end())) \
+ initializer (omp_priv=vector<int>())
+
+//   Templates are  n o t  possible, i.e. the reduction has to be declared fore a specified type.
+//template <class T>
+//#pragma omp declare reduction(VecAdd : std::vector<T>  : omp_out += omp_in) initializer (omp_priv(omp_orig))
+// MS: template nach #pragma  !?
+
+// ------------------------------------------------------------
+
+
+/** 	Test for vector reduction.
+ * 
+ * The thread-private vectors of size @p n are initialized via @f$v_k^{tID}=tID+k@f$.
+ * Afterwards these vectors are accumulated, i.e., 
+ * @f$v_k= \sum_{tID=0}^{numThreads} v_k^{tID}@f$.
+ * 
+ * 	@param[in] n  size of global/private vector
+ * 	@return  resulting global vector.
+*/
+std::vector<int> reduction_vec(int n);
+std::vector<int> reduction_vec_append(int n);
+
+
+/** 	Output of a vector.
+	@param[in,out] s	output stream
+	@param[in]     x	vector
+	@return 	   modified output stream
+*/
+template <class T>
+std::ostream &operator<<(std::ostream &s, std::vector<T> const &x)
+{
+    for (auto const &v : x)  s << std::setw(4) << v << "  ";
+    return s;
+}
+
--- a/sheet5/1/mylib.h:Zone.Identifier
+++ b/sheet5/1/mylib.h:Zone.Identifier
--- a/sheet5/1/mylib.o
+++ b/sheet5/1/mylib.o
--- a/sheet5/1/timing.h
+++ b/sheet5/1/timing.h
@ -0,0 +1,70 @@
+#pragma once
+#include <chrono>                  // timing
+#include <stack>
+
+using Clock = std::chrono::system_clock;   //!< The wall clock timer chosen
+//using Clock = std::chrono::high_resolution_clock;
+using TPoint= std::chrono::time_point<Clock>;
+
+// [Galowicz, C++17 STL Cookbook, p. 29]
+inline 
+std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
+
+/** Starts stopwatch timer.
+ *  Use as @code tic(); myfunction(...) ; double tsec = toc();  @endcode
+ * 
+ *  The timining is allowed to be nested and the recent time is stored on top of the stack.
+ * 
+ *  @return recent time
+ *  @see toc
+ */
+inline auto tic()
+{
+    MyStopWatch.push(Clock::now());
+    return MyStopWatch.top();
+}
+ 
+/** Returns the elapsed time from stopwatch. 
+ * 
+ * The time from top of the stack is used
+ * if time point @p t_b is not passed as input parameter.
+ * Use as @code tic(); myfunction(...) ; double tsec = toc();  @endcode
+ * or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b);  @endcode
+ * The last option is to be used in the case of 
+ * non-nested but overlapping time measurements.
+ * 
+ * @param[in]  t_b start time of some stop watch
+ * @return elapsed time in seconds.
+ *
+*/ 
+inline double toc(TPoint const &t_b = MyStopWatch.top())
+{
+    // https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
+    using Unit      = std::chrono::seconds;
+    using FpSeconds = std::chrono::duration<double, Unit::period>;        
+    auto t_e = Clock::now();
+    MyStopWatch.pop();
+    return FpSeconds(t_e-t_b).count();
+}
+
+#include <iostream>
+#include <string>
+/** Executes function @p f and measures/prints elapsed wall clock time in seconds
+ * 
+ * Call as
+ * @code measure("Time for (b = b + 1)", [&]() {
+        thrust::transform(b.begin(), b.end(), b.begin(), increment());
+    }); @endcode
+ * 
+ * @param[in]  label  additional string to be printed with the measurement.
+ * @param[in]  f      function to execute.
+ * @author Therese Bösmüller, 2025
+ *
+*/ 
+auto measure = [](const std::string& label, auto&& f) {
+        auto start    = std::chrono::high_resolution_clock::now();
+        f();
+        auto stop     = std::chrono::high_resolution_clock::now();
+        auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
+        std::cout << label << ": " << duration << " microseconds" << std::endl;
+};          // ';' is needed for a visible documentation of this lambda-function
--- a/sheet5/1/timing.h:Zone.Identifier
+++ b/sheet5/1/timing.h:Zone.Identifier
--- a/sheet5/2/.vscode/c_cpp_properties.json
+++ b/sheet5/2/.vscode/c_cpp_properties.json
@ -0,0 +1,16 @@
+{
+    "configurations": [
+        {
+            "name": "Linux",
+            "includePath": [
+                "${workspaceFolder}/**"
+            ],
+            "defines": [],
+            "compilerPath": "/usr/bin/gcc",
+            "cStandard": "c17",
+            "cppStandard": "gnu++17",
+            "intelliSenseMode": "linux-gcc-x64"
+        }
+    ],
+    "version": 4
+}
--- a/sheet5/2/.vscode/settings.json
+++ b/sheet5/2/.vscode/settings.json
@ -0,0 +1,59 @@
+{
+    "files.associations": {
+        "iostream": "cpp",
+        "cmath": "cpp",
+        "array": "cpp",
+        "atomic": "cpp",
+        "bit": "cpp",
+        "cctype": "cpp",
+        "charconv": "cpp",
+        "chrono": "cpp",
+        "clocale": "cpp",
+        "compare": "cpp",
+        "concepts": "cpp",
+        "cstdarg": "cpp",
+        "cstddef": "cpp",
+        "cstdint": "cpp",
+        "cstdio": "cpp",
+        "cstdlib": "cpp",
+        "cstring": "cpp",
+        "ctime": "cpp",
+        "cwchar": "cpp",
+        "cwctype": "cpp",
+        "deque": "cpp",
+        "list": "cpp",
+        "string": "cpp",
+        "unordered_map": "cpp",
+        "vector": "cpp",
+        "exception": "cpp",
+        "algorithm": "cpp",
+        "functional": "cpp",
+        "iterator": "cpp",
+        "memory": "cpp",
+        "memory_resource": "cpp",
+        "numeric": "cpp",
+        "optional": "cpp",
+        "random": "cpp",
+        "ratio": "cpp",
+        "string_view": "cpp",
+        "system_error": "cpp",
+        "tuple": "cpp",
+        "type_traits": "cpp",
+        "utility": "cpp",
+        "format": "cpp",
+        "initializer_list": "cpp",
+        "iomanip": "cpp",
+        "iosfwd": "cpp",
+        "istream": "cpp",
+        "limits": "cpp",
+        "new": "cpp",
+        "numbers": "cpp",
+        "ostream": "cpp",
+        "span": "cpp",
+        "sstream": "cpp",
+        "stdexcept": "cpp",
+        "streambuf": "cpp",
+        "typeinfo": "cpp",
+        "variant": "cpp"
+    }
+}
--- a/sheet5/2/.vscode/settings.json:Zone.Identifier
+++ b/sheet5/2/.vscode/settings.json:Zone.Identifier
--- a/sheet5/2/Makefile
+++ b/sheet5/2/Makefile
@ -0,0 +1,39 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+# use Intel compilers
+#COMPILER=ICC_
+
+# use PGI compilers
+# COMPILER=PGI_
+
+# use CLANG compilers
+# COMPILER=CLANG_
+
+
+SOURCES = main.cpp file_io.cpp means.cpp
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= main.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+LINKFLAGS += -g
+# do not use -pg with PGI compilers
+
+ifndef COMPILER
+  COMPILER=GCC_
+endif
+
+include ../${COMPILER}default.mk
+
+
+task:
+	@pdflatex task
+	@pdflatex task
+
--- a/sheet5/2/Makefile:Zone.Identifier
+++ b/sheet5/2/Makefile:Zone.Identifier
--- a/sheet5/2/data_1.txt
+++ b/sheet5/2/data_1.txt
@ -0,0 +1,500 @@
+141
+261
+87
+430
+258
+298
+425
+120
+496
+707
+244
+786
+75
+394
+4
+221
+2
+190
+143
+269
+175
+139
+599
+902
+940
+222
+483
+377
+524
+265
+69
+437
+174
+27
+955
+431
+962
+763
+8
+681
+706
+646
+553
+219
+773
+229
+371
+891
+857
+403
+319
+609
+911
+910
+592
+333
+854
+443
+905
+34
+533
+717
+180
+337
+188
+322
+404
+549
+49
+553
+275
+242
+244
+155
+957
+936
+819
+729
+176
+361
+189
+2
+317
+700
+626
+544
+440
+288
+502
+762
+763
+577
+748
+646
+124
+505
+348
+93
+148
+199
+673
+432
+695
+257
+10
+533
+280
+947
+907
+393
+25
+672
+838
+972
+57
+451
+583
+687
+720
+651
+727
+374
+582
+117
+58
+980
+285
+595
+963
+186
+194
+342
+933
+391
+274
+152
+398
+375
+132
+436
+92
+615
+11
+574
+790
+236
+449
+570
+62
+497
+643
+222
+838
+972
+847
+506
+279
+747
+237
+958
+621
+601
+173
+91
+256
+859
+912
+700
+726
+230
+577
+811
+404
+989
+90
+321
+512
+61
+726
+557
+530
+830
+859
+790
+318
+453
+753
+110
+110
+270
+525
+973
+711
+312
+292
+851
+912
+640
+256
+89
+839
+585
+949
+62
+585
+286
+828
+191
+443
+394
+827
+677
+208
+319
+134
+672
+571
+170
+148
+477
+909
+553
+33
+54
+806
+452
+383
+790
+365
+533
+712
+872
+329
+651
+975
+76
+588
+414
+310
+264
+759
+996
+187
+782
+196
+993
+803
+425
+729
+499
+809
+357
+74
+591
+911
+194
+433
+750
+40
+947
+764
+559
+184
+498
+518
+995
+855
+963
+679
+404
+935
+480
+232
+397
+706
+559
+757
+996
+963
+536
+964
+116
+52
+305
+581
+531
+902
+541
+432
+543
+713
+17
+801
+143
+479
+257
+370
+662
+170
+279
+199
+196
+327
+881
+472
+404
+180
+969
+408
+845
+616
+377
+878
+785
+465
+814
+899
+430
+335
+597
+902
+703
+378
+735
+955
+543
+541
+312
+72
+182
+93
+464
+10
+916
+643
+2
+31
+209
+455
+128
+9
+728
+355
+781
+437
+437
+50
+50
+92
+595
+242
+842
+858
+964
+489
+221
+227
+537
+763
+348
+462
+640
+918
+162
+716
+578
+434
+885
+394
+179
+634
+625
+328
+803
+1000
+981
+128
+233
+24
+608
+111
+408
+885
+549
+370
+209
+441
+957
+125
+471
+857
+44
+692
+979
+284
+134
+686
+910
+611
+900
+194
+755
+347
+419
+156
+820
+625
+739
+806
+68
+951
+498
+756
+743
+832
+157
+458
+619
+933
+836
+896
+583
+583
+855
+35
+886
+408
+37
+747
+155
+144
+606
+255
+325
+402
+407
+387
+610
+167
+189
+95
+324
+770
+235
+741
+693
+825
+828
+294
+310
+524
+326
+832
+811
+557
+263
+681
+234
+457
+385
+539
+992
+756
+981
+235
+529
+52
+757
+602
+858
+989
+930
+410
+1
+541
+208
+220
+326
+96
+748
+749
+544
+339
+833
+553
+958
+893
+357
+547
+347
+623
+797
+746
+126
+823
+26
+415
+732
+782
+368
--- a/sheet5/2/data_1.txt:Zone.Identifier
+++ b/sheet5/2/data_1.txt:Zone.Identifier
--- a/sheet5/2/file_io.cbp
+++ b/sheet5/2/file_io.cbp
@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+<CodeBlocks_project_file>
+	<FileVersion major="1" minor="6" />
+	<Project>
+		<Option title="file_io" />
+		<Option pch_mode="2" />
+		<Option compiler="gcc" />
+		<Build>
+			<Target title="Debug">
+				<Option output="bin/Debug/file_io" prefix_auto="1" extension_auto="1" />
+				<Option object_output="obj/Debug/" />
+				<Option type="1" />
+				<Option compiler="gcc" />
+				<Compiler>
+					<Add option="-g" />
+				</Compiler>
+			</Target>
+			<Target title="Release">
+				<Option output="bin/Release/file_io" prefix_auto="1" extension_auto="1" />
+				<Option object_output="obj/Release/" />
+				<Option type="1" />
+				<Option compiler="gcc" />
+				<Compiler>
+					<Add option="-O2" />
+				</Compiler>
+				<Linker>
+					<Add option="-s" />
+				</Linker>
+			</Target>
+		</Build>
+		<Compiler>
+			<Add option="-Wshadow" />
+			<Add option="-Winit-self" />
+			<Add option="-Wunreachable-code" />
+			<Add option="-pedantic" />
+			<Add option="-std=c++11" />
+			<Add option="-Wextra" />
+			<Add option="-Wall" />
+			<Add option="-fexceptions" />
+		</Compiler>
+		<Unit filename="file_io.cpp" />
+		<Unit filename="file_io.h" />
+		<Unit filename="main.cpp" />
+		<Extensions>
+			<envvars />
+			<code_completion />
+			<lib_finder disable_auto="1" />
+			<debugger />
+			<DoxyBlocks>
+				<comment_style block="0" line="0" />
+				<doxyfile_project />
+				<doxyfile_build extract_all="1" />
+				<doxyfile_warnings />
+				<doxyfile_output />
+				<doxyfile_dot />
+				<general use_at_in_tags="1" />
+			</DoxyBlocks>
+		</Extensions>
+	</Project>
+</CodeBlocks_project_file>
--- a/sheet5/2/file_io.cbp:Zone.Identifier
+++ b/sheet5/2/file_io.cbp:Zone.Identifier
--- a/sheet5/2/file_io.cpp
+++ b/sheet5/2/file_io.cpp
@ -0,0 +1,65 @@
+#include "file_io.h"
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+using namespace std;
+
+// [Str10, p.364]
+void fill_vector(istream& istr, vector<short>& v)
+{
+    double d=0;
+    while ( istr >> d) v.push_back(d); // Einlesen
+    if (!istr.eof())
+    { // Fehlerbehandlung
+        cout << " Error handling \n";
+        if ( istr.bad() )  throw runtime_error("Schwerer Fehler in istr");
+        if ( istr.fail() )   // Versuch des Aufraeumens
+        {
+            cout << " Failed in reading all data.\n";
+            istr.clear();
+        }
+    }
+    v.shrink_to_fit();                 // C++11
+    return;
+}
+
+
+void read_vector_from_file(const string& file_name, vector<short>& v)
+{
+    ifstream fin(file_name);           // Oeffne das File im ASCII-Modus
+    if( fin.is_open() )                // File gefunden:
+    {
+       v.clear();                      // Vektor leeren
+       fill_vector(fin, v);
+    }
+    else                               // File nicht gefunden:
+    {
+        cout << "\nFile " << file_name << " has not been found.\n\n" ;
+        assert( fin.is_open() && "File not found." );       // exeption handling for the poor programmer
+    }
+
+ return;
+}
+
+void write_vector_to_file(const string& file_name, const vector<double>& v)
+{
+    ofstream fout(file_name);          // Oeffne das File im ASCII-Modus
+    if( fout.is_open() )
+    {
+       for (unsigned int k=0; k<v.size(); ++k)
+       {
+          fout << v.at(k) << endl;
+       }
+    }
+    else
+    {
+        cout << "\nFile " << file_name << " has not been opened.\n\n" ;
+        assert( fout.is_open() && "File not opened."  );         // exeption handling for the poor programmer
+    }
+
+ return;
+}
+
--- a/sheet5/2/file_io.cpp:Zone.Identifier
+++ b/sheet5/2/file_io.cpp:Zone.Identifier
--- a/sheet5/2/file_io.h
+++ b/sheet5/2/file_io.h
@ -0,0 +1,40 @@
+#ifndef FILE_IO_H_INCLUDED
+#define FILE_IO_H_INCLUDED
+
+#include <string>
+#include <vector>
+//using namespace std;
+
+
+/**
+  This function opens the ASCII-file named @p file_name and reads the
+  double data into the C++ vector @p v.
+  If the file  @p file_name does not exist then the code stops with an appropriate message.
+  @param[in]    file_name   name of the ASCII-file
+  @param[out]   v           C++ vector with double values
+*/
+
+void read_vector_from_file(const std::string& file_name, std::vector<short>& v);
+
+
+/**
+  This function opens the ASCII-file named @p file_name and rewrites its with the
+  double data from the C++ vector @p v.
+  If there are problems in opening/generating file  @p file_name
+  then the code stops with an appropriate message.
+  @param[in]    file_name   name of the ASCII-file
+  @param[in]    v           C++ vector with double values
+*/
+
+void write_vector_to_file(const std::string& file_name, const std::vector<double>& v);
+
+/**
+  Fills the double-vector @p v with data from an input stream @p istr until this input stream
+  ends regularily. The vector is cleared and its memory is automatically allocated.
+  @param[in]    istr        input stream
+  @param[out]    v           C++ vector with double values
+  @warning     An exception is thrown in case of wrong data format or corrupted data.
+*/
+void fill_vector(std::istream& istr, std::vector<double>& v);
+
+#endif // FILE_IO_H_INCLUDED
--- a/sheet5/2/file_io.h:Zone.Identifier
+++ b/sheet5/2/file_io.h:Zone.Identifier
--- a/sheet5/2/file_io.o
+++ b/sheet5/2/file_io.o
--- a/sheet5/2/main.GCC_
+++ b/sheet5/2/main.GCC_
--- a/sheet5/2/main.cpp
+++ b/sheet5/2/main.cpp
@ -0,0 +1,129 @@
+#include "file_io.h"
+#include "means.h"
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <cmath>
+#include <execution>
+#include <omp.h>
+using namespace std;
+
+int main()
+{
+    cout << "File einlesen." << endl;
+    
+    const string name("data_1.txt");              // name of input file
+    const string name2("out_1.txt");               // name of output file
+    vector<short> a;    //-2^15 to 2^15-1 fits the values from the file 
+    double  min, max, ar, ge, ha, std;
+
+
+    read_vector_from_file(name, a);
+    const unsigned size = a.size();
+double t_omp_start = omp_get_wtime();
+    min = a[0];
+    max = a[0];
+
+#pragma omp parallel for reduction(min:min) reduction(max:max)
+    for (int i = 0; i < static_cast<int>(size); ++i) {
+        if (a[i] < min)
+        {
+            min = a[i];
+        } 
+        if (a[i] > max) {
+            max = a[i];
+        }
+        
+    }
+
+   
+
+    means_vector(a, ar, ge, ha);
+
+    std = 0.0;
+    #pragma omp parallel for reduction(+:std)
+    for(unsigned int i = 0; i < size; i++)
+    {
+        std += pow(a.at(i)-ar,2);
+    }
+    std = sqrt(std/size);
+
+     double t_omp_end = omp_get_wtime();
+    double time_omp = t_omp_end - t_omp_start;
+
+    cout << "min: " << min << ", max: " << max << endl;
+    cout << "Arithmetic mean: " << ar
+         << ", geometric mean: " << ge
+         << ", harmonic mean: " << ha  << endl;
+    cout << "std: " << std << endl;
+
+    vector<double> results = {min, max, ar, ge, ha, std};
+    write_vector_to_file(name2, results);
+
+    
+
+    //C++17 execution policies
+
+    double min_ep = 0.0, max_ep = 0.0, ar_ep = 0.0, ge_ep = 0.0, ha_ep = 0.0, std_ep = 0.0;
+
+    double t_ep_start = omp_get_wtime();
+
+    
+    auto p = minmax_element(execution::par, a.begin(), a.end());
+    min_ep = *p.first;
+    max_ep = *p.second;
+
+    double sum_ep = reduce(
+        execution::par,
+        a.begin(), a.end(),
+        0.0
+    );
+    ar_ep = sum_ep / size;
+
+    ge_ep = transform_reduce(
+        execution::par,
+        a.begin(), a.end(),
+        1.0,
+        multiplies<>(),
+        [size](short x){
+            return pow((double)x, 1.0 / size);
+        }
+    );
+
+    double sum_inv_ep = transform_reduce(
+        execution::par,
+        a.begin(), a.end(),
+        0.0,
+        plus<>(),
+        [](short x){ return 1.0 / (double)x; }
+    );
+    ha_ep = size / sum_inv_ep;
+
+    double sum_sq_ep = transform_reduce(
+        execution::par,
+        a.begin(), a.end(),
+        0.0,
+        plus<>(),
+        [ar_ep](short x){
+            double d = x - ar_ep;
+            return d * d;
+        }
+    );
+    std_ep = sqrt(sum_sq_ep / size);
+    
+
+    double t_ep_end = omp_get_wtime();
+    double time_ep = t_ep_end - t_ep_start;
+
+    cout << "min: " << min_ep << ", max: " << max_ep << endl;
+    cout << "Arithmetic mean: " << ar_ep
+         << ", geometric mean: " << ge_ep
+         << ", harmonic mean: " << ha_ep  << endl;
+    cout << "std: " << std_ep << endl;
+
+
+    cout << "OpenMP per run:             " << (time_omp) << endl;
+    cout << "Execution policy per run:   " << (time_ep) << endl;
+    return 0;
+}
--- a/sheet5/2/main.cpp:Zone.Identifier
+++ b/sheet5/2/main.cpp:Zone.Identifier
--- a/sheet5/2/main.o
+++ b/sheet5/2/main.o
--- a/sheet5/2/means.cpp
+++ b/sheet5/2/means.cpp
@ -0,0 +1,37 @@
+#include <iostream>
+#include <cmath>
+#include <vector>
+#include <omp.h>
+
+using namespace std;
+
+void means(int a, int b, int c, double &ar, double &ge, double &ha) {
+    ar = (a+b+c) / 3.0;
+
+
+    ge = pow(a,1.0/3.0) * pow(b,1.0/3.0) * pow(c,1.0/3.0); //do it instead of pow(a*b*c,1.0/3.0) to prevent integer overflow
+
+    ha = 3.0 / (1.0/a +1.0/b +1.0/c); 
+}
+
+void means_vector(const vector<short> &input, double &ar, double &ge, double &ha) {
+    int size = input.size();
+
+    if (size == 0) {
+        cout << "Empty input" << endl;
+        return;
+    }
+
+    ar = 0;
+    ge = 1;
+    ha = 0;
+    #pragma omp parallel for reduction(+:ar,ha) reduction(*:ge)
+    for (int i = 0; i < size; i++) {
+        ar += input.at(i);
+        ge *=  pow(input.at(i), 1.0 / size);
+        ha += 1.0 / input.at(i);
+    }
+
+    ar /= size;
+    ha = size / ha;
+}
--- a/sheet5/2/means.cpp:Zone.Identifier
+++ b/sheet5/2/means.cpp:Zone.Identifier
--- a/sheet5/2/means.h
+++ b/sheet5/2/means.h
@ -0,0 +1,9 @@
+#ifndef MEANS_H_INCLUDED
+#define MEANS_H_INCLUDED
+
+#include <vector>
+
+void means(int a, int b, int c, double &ar, double &ge, double &ha);
+void means_vector(const std::vector<short> &input, double &ar, double &ge, double &ha);
+
+#endif // MEANS_H_INCLUDED
--- a/sheet5/2/means.h:Zone.Identifier
+++ b/sheet5/2/means.h:Zone.Identifier
--- a/sheet5/2/means.o
+++ b/sheet5/2/means.o
--- a/sheet5/2/out_1.txt
+++ b/sheet5/2/out_1.txt
@ -0,0 +1,6 @@
+1
+1000
+498.184
+364.412
+95.6857
+287.905
--- a/sheet5/2/out_1.txt:Zone.Identifier
+++ b/sheet5/2/out_1.txt:Zone.Identifier
--- a/sheet5/3/.vscode/c_cpp_properties.json
+++ b/sheet5/3/.vscode/c_cpp_properties.json
@ -0,0 +1,16 @@
+{
+    "configurations": [
+        {
+            "name": "Linux",
+            "includePath": [
+                "${workspaceFolder}/**"
+            ],
+            "defines": [],
+            "compilerPath": "/usr/bin/gcc",
+            "cStandard": "c17",
+            "cppStandard": "gnu++17",
+            "intelliSenseMode": "linux-gcc-x64"
+        }
+    ],
+    "version": 4
+}
--- a/sheet5/3/.vscode/settings.json
+++ b/sheet5/3/.vscode/settings.json
@ -0,0 +1,5 @@
+{
+    "files.associations": {
+        "ostream": "cpp"
+    }
+}
--- a/sheet5/3/.vscode/settings.json:Zone.Identifier
+++ b/sheet5/3/.vscode/settings.json:Zone.Identifier
--- a/sheet5/3/Makefile
+++ b/sheet5/3/Makefile
@ -0,0 +1,30 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+# use Intel compilers
+#COMPILER=ICC_
+
+# use PGI compilers
+# COMPILER=PGI_
+
+
+SOURCES = main.cpp 
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= main.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+LINKFLAGS += -g
+# do not use -pg with PGI compilers
+
+ifndef COMPILER
+  COMPILER=GCC_
+endif
+
+include ../${COMPILER}default.mk
--- a/sheet5/3/Makefile:Zone.Identifier
+++ b/sheet5/3/Makefile:Zone.Identifier
--- a/sheet5/3/Makefile:Zone.Identifier:Zone.Identifier
+++ b/sheet5/3/Makefile:Zone.Identifier:Zone.Identifier
--- a/sheet5/3/main.GCC_
+++ b/sheet5/3/main.GCC_
--- a/sheet5/3/main.cpp
+++ b/sheet5/3/main.cpp
@ -0,0 +1,77 @@
+#include <iostream>
+#include "mayer_primes.h"
+#include <vector>
+#include <algorithm>
+#include "timing.h"
+#include "omp.h"
+using namespace std;
+
+
+
+vector<unsigned int> count_goldbach(unsigned int n)
+{
+    vector<unsigned int> primes = get_primes(n);
+    size_t npr = primes.size();
+    int nthreads = omp_get_max_threads();
+
+    vector<vector<unsigned int>> local_counts(nthreads, vector<unsigned int>(n+1, 0));
+
+    #pragma omp parallel
+    {
+        int tid = omp_get_thread_num();
+        auto &lc = local_counts[tid];
+
+    #pragma omp for schedule(static)
+        for (int j = 0; j < (int)npr; ++j)
+        {
+            unsigned int p = primes[j];
+            for (unsigned int i = j;i < primes.size() && p + primes[i] <= n; ++i)
+            {
+                unsigned int q = primes[i];
+                unsigned int s = p + q;
+
+               lc[s] +=1;
+            }
+        }
+    }
+
+    // combine
+    vector<unsigned int> counts(n+1, 0);
+    for (int t = 0; t < nthreads; ++t)
+        for (unsigned int k = 0; k <= n; ++k)
+            counts[k] += local_counts[t][k];
+
+    return counts;
+}
+
+
+
+
+int main()
+{   
+
+    
+
+    //3
+    vector<unsigned int> counts = count_goldbach(100000);
+    cout << (max_element(counts.begin(), counts.end()) - counts.begin()) << endl; 
+
+    //4
+    vector<unsigned int> nvalues = {10000, 100000, 400000, 1000000, 2000000};
+    double time;
+    unsigned int n;
+    for(unsigned int i = 0; i< nvalues.size(); i++)
+    {
+        n = nvalues.at(i);
+        tic();
+        count_goldbach(n);
+        time = toc();
+        cout << "Time for n=" << n << ": " << time << endl;
+    }
+ 
+
+
+
+
+    return 0;
+}
--- a/sheet5/3/main.cpp:Zone.Identifier
+++ b/sheet5/3/main.cpp:Zone.Identifier
--- a/sheet5/3/main.o
+++ b/sheet5/3/main.o
--- a/sheet5/3/mayer_primes.h
+++ b/sheet5/3/mayer_primes.h
@ -0,0 +1,73 @@
+#pragma once
+
+#include <cstring> //memset
+#include <vector>
+//using namespace std;
+
+/** \brief Determines all prime numbers in interval [2, @p max].
+ *
+ *  The sieve of Eratosthenes is used.
+ *
+ *  The implementation originates from <a href="http://code.activestate.com/recipes/576559-fast-prime-generator/">Florian Mayer</a>.
+ *
+ * \param[in]   max end of interval for the prime number search.
+ * \return  vector of prime numbers @f$2,3,5, ..., p<=max @f$.
+ *
+ * \copyright
+ * Copyright (c) 2008 Florian Mayer          (adapted by Gundolf Haase 2018)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+template <class T>
+std::vector<T> get_primes(T max)
+{
+    std::vector<T> primes;
+    char *sieve;
+    sieve = new char[max / 8 + 1];
+    // Fill sieve with 1
+    memset(sieve, 0xFF, (max / 8 + 1) * sizeof(char));
+    for (T x = 2; x <= max; x++)
+    {
+        if (sieve[x / 8] & (0x01 << (x % 8))) {
+            primes.push_back(x);
+            // Is prime. Mark multiplicates.
+            for (T j = 2 * x; j <= max; j += x)
+            {
+                sieve[j / 8] &= ~(0x01 << (j % 8));
+			}
+        }
+	}
+    delete[] sieve;
+    return primes;
+}
+
+//---------------------------------------------------------------
+//int main()        // by  Florian Mayer
+//{g++ -O3  -std=c++14 -fopenmp main.cpp && ./a.out
+//    vector<unsigned long> primes;
+//    primes = get_primes(10000000);
+//    // return 0;
+//    // Print out result.
+//    vector<unsigned long>::iterator it;
+//    for(it=primes.begin(); it < primes.end(); it++)
+//        cout << *it << " ";
+//
+//    cout << endl;
+//    return 0;
+//}
--- a/sheet5/3/mayer_primes.h:Zone.Identifier
+++ b/sheet5/3/mayer_primes.h:Zone.Identifier
@ -0,0 +1,3 @@
+[ZoneTransfer]
+ZoneId=3
+HostUrl=https://imsc.uni-graz.at/haasegu/Lectures/Math2CPP/Examples/goldbach/mayer_primes.h
--- a/sheet5/3/mayer_primes.h:Zone.Identifier:Zone.Identifier
+++ b/sheet5/3/mayer_primes.h:Zone.Identifier:Zone.Identifier
--- a/sheet5/3/timing.h
+++ b/sheet5/3/timing.h
@ -0,0 +1,51 @@
+//
+//    Gundolf Haase, Oct 18 2024
+//
+#pragma once
+#include <chrono>                  // timing
+#include <stack>
+
+//using Clock = std::chrono::system_clock;   //!< The wall clock timer chosen
+using Clock = std::chrono::high_resolution_clock;
+using TPoint= std::chrono::time_point<Clock>;
+
+// [Galowicz, C++17 STL Cookbook, p. 29]
+
+std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
+
+/** Starts stopwatch timer.
+ *  Use as @code tic(); myfunction(...) ; double tsec = toc();  @endcode
+ * 
+ *  The timining can be nested and the recent time point is stored on top of the stack.
+ * 
+ *  @return recent time point
+ *  @see toc
+ */
+ auto tic()
+{
+    MyStopWatch.push(Clock::now());
+    return MyStopWatch.top();
+}
+ 
+/** Returns the elapsed time from stopwatch. 
+ * 
+ * The time point from top of the stack is used
+ * if time point @p t_b is not passed as input parameter.
+ * Use as @code tic(); myfunction(...) ; double tsec = toc();  @endcode
+ * or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b);  @endcode
+ * The last option is to be used in the case of 
+ * non-nested but overlapping time measurements.
+ * 
+ * @param[in]  t_b start time of some stop watch
+ * @return elapsed time in seconds.
+ *
+*/ 
+ double toc(TPoint const &t_b = MyStopWatch.top())
+{
+    // https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
+    using Unit      = std::chrono::seconds;
+    using FpSeconds = std::chrono::duration<double, Unit::period>;        
+    auto t_e = Clock::now();
+    MyStopWatch.pop();
+    return FpSeconds(t_e-t_b).count();
+}
--- a/sheet5/3/timing.h:Zone.Identifier
+++ b/sheet5/3/timing.h:Zone.Identifier
--- a/sheet5/3/timing.h:Zone.Identifier:Zone.Identifier
+++ b/sheet5/3/timing.h:Zone.Identifier:Zone.Identifier
--- a/sheet5/4/.vscode/settings.json
+++ b/sheet5/4/.vscode/settings.json
@ -0,0 +1,6 @@
+{
+    "files.associations": {
+        "ostream": "cpp",
+        "iostream": "cpp"
+    }
+}
--- a/sheet5/4/.vscode/settings.json:Zone.Identifier
+++ b/sheet5/4/.vscode/settings.json:Zone.Identifier
--- a/sheet5/4/Doxyfile
+++ b/sheet5/4/Doxyfile
--- a/sheet5/4/Doxyfile:Zone.Identifier
+++ b/sheet5/4/Doxyfile:Zone.Identifier
--- a/sheet5/4/Makefile
+++ b/sheet5/4/Makefile
@ -0,0 +1,30 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+# use Intel compilers
+#COMPILER=ICC_
+
+# use PGI compilers
+# COMPILER=PGI_
+
+
+SOURCES = main.cpp mylib.cpp benchmark.cpp
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= main.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+LINKFLAGS += -g
+# do not use -pg with PGI compilers
+
+ifndef COMPILER
+  COMPILER=GCC_
+endif
+
+include ../${COMPILER}default.mk
--- a/sheet5/4/Makefile:Zone.Identifier
+++ b/sheet5/4/Makefile:Zone.Identifier
--- a/sheet5/4/bench
+++ b/sheet5/4/bench
--- a/sheet5/4/benchmark.cpp
+++ b/sheet5/4/benchmark.cpp
@ -0,0 +1,115 @@
+#include <iostream>
+#include <vector>   
+#include <cmath>
+using namespace std;
+
+
+// Inner product
+double benchmark_A(const vector<double> &x, const vector<double> &y)
+{
+    double sum = 0.0;
+    #pragma omp parallel for reduction(+:sum)
+     for (unsigned int i = 0; i < x.size(); i++)
+    {
+        sum += x[i]*y[i];
+    }
+    return sum;
+}
+
+// Inner product
+double benchmark_A_sum(const vector<double> &x)
+{
+    double sum = 0.0;
+    #pragma omp parallel for reduction(+:sum)
+     for (unsigned int i = 0; i < x.size(); i++)
+    {
+        sum += x[i];
+    }
+    return sum;
+}
+
+//Matrix-vector product
+vector<double> benchmark_B(const vector<double> &A, const vector<double> &x)
+{
+    unsigned int N = x.size();
+    unsigned int M = A.size() / N;
+    vector<double> b(M, 0.0);
+
+    #pragma omp parallel for
+    for (unsigned int i = 0; i < M; i++)
+    {
+        double bi = 0.0;
+        for (unsigned int j = 0; j < N; j++)
+        {
+            bi += A[i*N+j]*x[j];
+        }
+        b[i] = bi;
+    }
+
+    return b;
+}
+
+
+//Matrix-Matrix product
+vector<double> benchmark_C(const vector<double> &A, const vector<double> &B, unsigned int M)
+{
+    unsigned int L = A.size()/M;
+    unsigned int N = B.size()/L;
+    vector<double> C(M*N,0.0);
+    #pragma omp parallel for collapse(2)
+     for (unsigned int i = 0; i < M; i++)
+    {
+        for (unsigned int j = 0; j < N; j++)
+        {
+            double sum = 0.0;
+            for (unsigned int k = 0; k < L; k++)
+            {
+                sum += A[i*L+k]*B[k*N+j];
+            }
+            C[i*N+j] = sum;
+        }
+    }
+
+    return C;
+
+}
+
+//polynomial evaluation
+vector<double> benchmark_D(const vector<double>& coeff, const vector<double>& x)
+{
+    unsigned int p  = coeff.size();   // p coefficients, degree p-1
+    unsigned int N  = x.size();
+    vector<double> y(N);
+
+#pragma omp parallel for
+    for (unsigned int i = 0; i < N; i++){
+        double yi = coeff[p-1];
+        double xi = x[i];
+        for(int j=p-2; j>=0; --j)
+        {
+            yi = yi*xi+coeff[j];
+        }
+        y[i] = yi;
+    }
+    return y;
+}
+
+
+double benchmark_A_old(const vector<double> &x, const vector<double> &y)
+{
+    double sum = 0.0;
+     for (unsigned int i = 0; i < x.size(); i++)
+    {
+        sum += x[i]*y[i];
+    }
+    return sum;
+}
+double benchmark_A_sum_old(const vector<double> &x)
+{
+    double sum = 0.0;
+     for (unsigned int i = 0; i < x.size(); i++)
+    {
+        sum += x[i];
+    }
+    return sum;
+}
--- a/sheet5/4/benchmark.cpp:Zone.Identifier
+++ b/sheet5/4/benchmark.cpp:Zone.Identifier
--- a/sheet5/4/benchmark.h
+++ b/sheet5/4/benchmark.h
@ -0,0 +1,27 @@
+#ifndef BENCHMARK_H
+#define BENCHMARK_H
+
+#include <vector>
+using namespace std;
+
+double benchmark_A(const vector<double> &x,
+                   const vector<double> &y);
+    double benchmark_A_sum(const vector<double> &x);
+
+vector<double> benchmark_B(const vector<double> &A,
+                                const vector<double> &x);
+
+vector<double> benchmark_C(const vector<double> &A,
+                                const vector<double> &B,
+                                unsigned int M);
+
+vector<double> benchmark_D(const vector<double> &coefficients,
+                                const vector<double> &x);
+double benchmark_A_old(const vector<double> &x,
+                   const vector<double> &y);
+    double benchmark_A_sum_old(const vector<double> &x);
+                                
+
+
+
+#endif 
--- a/sheet5/4/benchmark.h:Zone.Identifier
+++ b/sheet5/4/benchmark.h:Zone.Identifier
--- a/sheet5/4/benchmark.o
+++ b/sheet5/4/benchmark.o
--- a/sheet5/4/main.GCC_
+++ b/sheet5/4/main.GCC_
--- a/sheet5/4/main.cpp
+++ b/sheet5/4/main.cpp
@ -0,0 +1,234 @@
+#include "mylib.h"
+#include <cassert>
+#include <chrono>           // timing
+#include <cmath>            // sqrt()
+#include <cstdlib>          // atoi()
+#include <cstring>          // strncmp()
+#include <ctime>
+#include <iostream>
+#include <sstream>
+#include "benchmark.h"
+#include "omp.h"
+using namespace std;
+using namespace std::chrono;  // timing
+
+int main(int argc, char **argv)
+{
+    const unsigned int NA = 1400000;     
+    const unsigned int NLOOPSA = 2000;     
+    //const unsigned int NLOOPS = 10;  
+    
+    const unsigned int MC = 1000;
+    int const NLOOPSC = 5;
+// ---------- Benchmark A ----------
+
+{
+    
+    
+    vector<double> xA(NA), yA(NA);
+    for (unsigned int i = 0; i < NA; ++i)
+    {
+        double xi= (i % 219) + 1;
+        xA[i] = xi;
+        yA[i] = 1.0 / xi;
+    }
+
+    auto tA1 = system_clock::now();
+    double sA = 0.0, sumA = 0.0;
+    for (unsigned int loop = 0; loop < NLOOPSA; ++loop)
+    {
+        sA = benchmark_A(xA, yA);
+        sumA += sA;
+    }
+    auto tA2 = system_clock::now();
+
+    auto durA = duration_cast<microseconds>(tA2 - tA1);
+    double tA = static_cast<double>(durA.count()) / 1e6 / NLOOPSA; //duration per loop seconds
+
+    cout << "\n===== Benchmark A =====\n";
+    cout << "<xA,yA> = "  << sA << endl;
+    cout << "Timing in sec. : " << tA << endl;
+    cout << "GFLOPS         : " << 2.0 * NA / tA / 1024 / 1024 / 1024 << endl;
+    cout << "GiByte/s       : "
+         << 2.0 * NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
+}
+
+// ---------- Benchmark B----------
+
+{
+    const unsigned int MB = 1700;
+    const unsigned int NB = MB;
+    const unsigned int NLOOPSB = 200;//50;
+
+    vector<double> AB(MB * NB);
+    vector<double> xB(NB);
+
+    for (unsigned int i = 0; i < MB; ++i)
+        for (unsigned int j = 0; j < NB; ++j)
+            AB[i * NB + j] = (i+j) %219 +1;
+
+    for (unsigned int j = 0; j < NB; ++j)
+    {
+        
+        xB[j] = 1.0 / AB[17*NB+j];
+    }
+
+    vector<double> bB;
+    auto tB1 = system_clock::now();
+    double guardB = 0.0;
+    for (unsigned int loop = 0; loop < NLOOPSB; ++loop)
+    {
+        bB = benchmark_B(AB, xB);
+        guardB += bB[17];
+    }
+    auto tB2 = system_clock::now();
+
+    auto durB = duration_cast<microseconds>(tB2 - tB1);
+    double tB = static_cast<double>(durB.count()) / 1e6 / NLOOPSB;
+
+    double flopsB = 2.0 * MB * NB;  
+    double bytesB = (MB * NB + NB  + MB)  * sizeof(double);
+
+    cout << "\n===== Benchmark B =====\n";
+    cout << guardB << endl;
+    cout << "bytes: " << bytesB << endl;
+    cout << "Timing in sec. : " << tB << endl;
+    cout << "GFLOPS         : " << flopsB / tB / 1024 / 1024 / 1024 << endl;
+    cout << "GiByte/s       : " << bytesB / tB / 1024 / 1024 / 1024 << endl;
+}
+
+// ---------- Benchmark C ----------
+
+{
+    
+    const unsigned int LC = MC;
+    const unsigned int NC = MC;
+
+   
+    vector<double> AC(MC * LC), BC(LC * NC);
+
+    for (unsigned int i = 0; i < MC; ++i)
+        for (unsigned int j = 0; j < LC; ++j)
+            AC[i * LC + j] = (i+j) %219 +1;
+
+    for (unsigned int i = 0; i < LC; ++i)
+        for (unsigned int j = 0; j < NC; ++j)
+            BC[i * NC + j] = (i+j) %219 +1;
+
+    vector<double> CC;
+    auto tC1 = system_clock::now();
+    double guardC = 0.0;
+    for (unsigned int loop = 0; loop < NLOOPSC; ++loop)
+    {
+        CC = benchmark_C(AC, BC, MC);
+        guardC += CC[0];
+    }
+    auto tC2 = system_clock::now();
+
+    auto durC = duration_cast<microseconds>(tC2 - tC1);
+    double tC = static_cast<double>(durC.count()) / 1e6 / NLOOPSC;
+
+    double flopsC = 2.0 * MC * LC * NC;
+    double bytesC = (MC * LC  + LC * NC  + MC * NC)* sizeof(double);
+
+    cout << "\n===== Benchmark C =====\n";
+    cout << guardC << endl;
+    cout << "bytes: " << bytesC << endl;
+    cout << "Timing in sec. : " << tC << endl;
+    cout << "GFLOPS         : " << flopsC / tC / 1024 / 1024 / 1024 << endl;
+    cout << "GiByte/s       : " << bytesC / tC / 1024 / 1024 / 1024 << endl;
+}
+
+// ---------- Benchmark D----------
+
+
+{
+    const unsigned int ND = 2000000;
+    const unsigned int p  = 14;         // degree p-1 = 15
+    const unsigned int NLOOPSD = 100;
+    vector<double> coeff(p, 0.0);
+    vector<double> xD(ND);
+
+    for (unsigned int k = 0; k < p; ++k)
+        coeff[k] = k%219+1;
+
+    for (unsigned int i = 0; i < ND; ++i)
+        xD[i] = i%219+1;
+
+    vector<double> yD; 
+    auto tD1 = system_clock::now();
+    double guardD = 0.0;
+    for (unsigned int loop = 0; loop < NLOOPSD; ++loop)
+    {
+        
+        yD = benchmark_D(coeff, xD);
+        guardD += yD[0];
+    }
+    auto tD2 = system_clock::now();
+
+    auto durD = duration_cast<microseconds>(tD2 - tD1);
+    double tD = static_cast<double>(durD.count()) / 1e6 / NLOOPSD;
+
+   
+    double flopsD = ND * 2 * p;
+    double bytesD = (p + 2 * ND)*sizeof(double);
+
+    cout << "\n===== Benchmark D =====\n";
+    cout << guardD << endl;
+    cout << "bytes: " << bytesD << endl;
+    cout << "Timing in sec. : " << tD << endl;
+    cout << "GFLOPS         : " << flopsD / tD / 1024 / 1024 / 1024 << endl;
+    cout << "GiByte/s       : " << bytesD / tD / 1024 / 1024 / 1024 << endl;
+}
+
+
+
+for (int k = 3; k <= 8; ++k)
+    {
+        size_t n = (size_t)pow(10, k);
+
+        vector<double> x(n), y(n);
+        for (unsigned int i = 0; i < n; ++i)
+        {
+            double xi= (i % 219) + 1;
+            x[i] = xi;
+            y[i] = 1.0 / xi;
+        }
+
+        // ---- SUM benchmark (sequential) ----
+        double t0 = omp_get_wtime();
+        double s1 = benchmark_A_sum_old(x);
+        double t_sum_seq = omp_get_wtime() - t0;
+
+        // ---- SUM benchmark (parallel) ----
+        t0 = omp_get_wtime();
+        double s2 = benchmark_A_sum(x);
+        double t_sum_omp = omp_get_wtime() - t0;
+
+        double sum_speedup = t_sum_seq / t_sum_omp;
+
+
+        // ---- INNER PRODUCT benchmark (sequential) ----
+        t0 = omp_get_wtime();
+        double ip1 = benchmark_A_old(x, y);
+        double t_inner_seq = omp_get_wtime() - t0;
+
+        // ---- INNER PRODUCT benchmark (parallel) ----
+        t0 = omp_get_wtime();
+        double ip2 = benchmark_A(x, y);
+        double t_inner_omp = omp_get_wtime() - t0;
+
+        double inner_speedup = t_inner_seq / t_inner_omp;
+
+        // ---- Print results ----
+        cout << k << endl;
+            cout << t_sum_seq << ", " << t_sum_omp << ", " << sum_speedup << endl;
+           cout << t_inner_seq << ", " << t_inner_omp << ", " << inner_speedup << endl;
+        cout << endl;
+    }
+
+
+
+return 0;
+}
+
--- a/sheet5/4/main.cpp:Zone.Identifier
+++ b/sheet5/4/main.cpp:Zone.Identifier
--- a/sheet5/4/main.o
+++ b/sheet5/4/main.o
--- a/sheet5/4/mylib.cpp
+++ b/sheet5/4/mylib.cpp
@ -0,0 +1,65 @@
+#include "mylib.h"
+#include <cassert>       // assert()
+#include <cmath>
+#include <vector>
+
+#ifdef __INTEL_CLANG_COMPILER
+#pragma message(" ##########  Use of MKL  ###############")
+#include <mkl.h>
+#else
+#pragma message(" ##########  Use of CBLAS  ###############")
+//extern "C"
+//{
+#include <cblas.h>               // cBLAS Library
+#include <lapacke.h>             // Lapack
+//}
+#endif
+
+using namespace std;
+
+double scalar(vector<double> const &x, vector<double> const &y)
+{
+    assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
+    size_t const N = x.size();
+    double sum = 0.0;
+    for (size_t i = 0; i < N; ++i)
+    {
+        sum += x[i] * y[i];
+        //sum += exp(x[i])*log(y[i]);
+    }
+    return sum;
+}
+
+
+double scalar_cblas(vector<double> const &x, vector<double> const &y)
+{
+    int const asize = static_cast<int>(size(x));
+    int const bsize = static_cast<int>(size(y));
+    assert(asize == bsize); // switch off via compile flag: -DNDEBUG
+	return cblas_ddot(asize,x.data(),1,y.data(),1);    
+    //assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
+	//return cblas_ddot(x.size(),x.data(),1,y.data(),1);
+}
+
+float scalar_cblas(vector<float> const &x, vector<float> const &y)
+{
+    int const asize = static_cast<int>(size(x));
+    int const bsize = static_cast<int>(size(y));
+    assert(asize == bsize); // switch off via compile flag: -DNDEBUG
+	return cblas_sdot(asize,x.data(),1,y.data(),1);    
+    //assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
+	//return cblas_ddot(x.size(),x.data(),1,y.data(),1);
+}
+
+
+double norm(vector<double> const &x)
+{
+    size_t const N = x.size();
+    double sum = 0.0;
+    for (size_t i = 0; i < N; ++i)
+    {
+        sum += x[i] * x[i];
+    }
+    return std::sqrt(sum);
+}
+
--- a/sheet5/4/mylib.cpp:Zone.Identifier
+++ b/sheet5/4/mylib.cpp:Zone.Identifier
--- a/sheet5/4/mylib.h
+++ b/sheet5/4/mylib.h
@ -0,0 +1,30 @@
+#ifndef FILE_MYLIB
+#define FILE_MYLIB
+#include <vector>
+
+/** 	Inner product
+	@param[in] x	vector
+	@param[in] y	vector
+	@return 	    resulting Euclidian inner product <x,y>
+*/
+double scalar(std::vector<double> const &x, std::vector<double> const &y);
+
+/** 	Inner product using BLAS routines
+	@param[in] x	vector
+	@param[in] y	vector
+	@return 	    resulting Euclidian inner product <x,y>
+*/
+double scalar_cblas(std::vector<double> const &x, std::vector<double> const &y);
+float scalar_cblas(std::vector<float> const &x, std::vector<float> const &y);
+
+
+/** 	L_2 Norm of a vector
+	@param[in] x	vector
+	@return 	    resulting Euclidian norm <x,y>
+*/
+double norm(std::vector<double> const &x);
+
+
+
+
+#endif
--- a/sheet5/4/mylib.h:Zone.Identifier
+++ b/sheet5/4/mylib.h:Zone.Identifier
--- a/sheet5/4/mylib.o
+++ b/sheet5/4/mylib.o
--- a/sheet5/4/output.txt
+++ b/sheet5/4/output.txt
@ -0,0 +1,51 @@
+g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1  -flto -o main.o main.cpp
+g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1  -flto -o mylib.o mylib.cpp
+g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1  -flto -o benchmark.o benchmark.cpp
+g++  main.o mylib.o benchmark.o  -g -O0  -llapack -lblas -flto -o main.GCC_
+./main.GCC_ 
+
+===== Benchmark A =====
+<xA,yA> = 1.4e+06
+Timing in sec. : 0.00893637
+GFLOPS         : 0.291808
+GiByte/s       : 2.33446
+
+===== Benchmark B =====
+340000
+bytes: 2.31472e+07
+Timing in sec. : 0.0133897
+GFLOPS         : 0.402029
+GiByte/s       : 1.61001
+
+===== Benchmark C =====
+7.37196e+07
+bytes: 2.4e+07
+Timing in sec. : 8.67235
+GFLOPS         : 0.21478
+GiByte/s       : 0.00257736
+
+===== Benchmark D =====
+10500
+bytes: 3.20001e+07
+Timing in sec. : 0.101087
+GFLOPS         : 0.515935
+GiByte/s       : 0.294821
+
+===== Benchmark 5A =====
+NORM = 150114
+Timing in sec. : 0.00703533
+GFLOPS         : 0.370658
+GiByte/s       : 1.48263
+
+===== Benchmark 5B =====
+<xA,yA> = 1.4e+06
+Timing in sec. : 0.0108377
+GFLOPS         : 0.601533
+GiByte/s       : 1.92491
+
+===== Benchmark 5C =====
+7.37196e+07
+bytes: 2.4e+07
+Timing in sec. : 15.2407
+GFLOPS         : 0.122215
+GiByte/s       : 0.00146658
--- a/sheet5/4/output.txt:Zone.Identifier
+++ b/sheet5/4/output.txt:Zone.Identifier
--- a/sheet5/4/small_Doxyfile
+++ b/sheet5/4/small_Doxyfile
--- a/sheet5/4/small_Doxyfile:Zone.Identifier
+++ b/sheet5/4/small_Doxyfile:Zone.Identifier
--- a/sheet5/5/Doxyfile
+++ b/sheet5/5/Doxyfile
--- a/sheet5/5/Doxyfile:Zone.Identifier
+++ b/sheet5/5/Doxyfile:Zone.Identifier
--- a/sheet5/5/Makefile
+++ b/sheet5/5/Makefile
@ -0,0 +1,54 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# COMPILER=GCC_SEQ_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+MAIN = main
+SOURCES = ${MAIN}.cpp vdop.cpp geom.cpp\
+	getmatrix.cpp jacsolve.cpp userset.cpp
+# 	dexx.cpp debugd.cpp skalar.cpp  vecaccu.cpp accudiag.cpp
+
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= ${MAIN}.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+# -pg slows down the code on my laptop when using CLANG_
+#LINKFLAGS += -pg
+#CXXFLAGS += -Q --help=optimizers
+#CXXFLAGS += -fopt-info
+
+include ../${COMPILER}default.mk
+
+#############################################################################
+# additional specific cleaning in this directory
+clean_all::
+	@rm -f t.dat*
+
+
+#############################################################################
+# special testing
+# NPROCS	= 4
+#
+TFILE	= t.dat
+# TTMP	= t.tmp
+#
+graph: $(PROGRAM)
+# 	@rm -f $(TFILE).*
+	# next two lines only sequentially
+	./$(PROGRAM)
+	@mv  $(TFILE).000 $(TFILE)
+# 	$(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
+# 	@echo " "; echo "Manipulate data for graphics."; echo " "
+# 	@cat $(TFILE).* > $(TTMP)
+# 	@sort -b -k 2    $(TTMP)   -o $(TTMP).1
+# 	@sort -b -k 1    $(TTMP).1 -o $(TTMP).2
+# 	@awk  -f nl.awk  $(TTMP).2  > $(TFILE)
+# 	@rm -f $(TTMP).* $(TTMP) $(TFILE).*
+#
+	-gnuplot jac.dem
--- a/sheet5/5/Makefile:Zone.Identifier
+++ b/sheet5/5/Makefile:Zone.Identifier
--- a/sheet5/5/ToDo.txt
+++ b/sheet5/5/ToDo.txt
@ -0,0 +1,5 @@
+// Jan 15, 2019
+
+geom.h:75        void SetValues(std::vector<double> &v) const;  // GH: TODO with functor
+Set vector values using a functor ff(x,y).
+ See solution in Progs/cds
--- a/sheet5/5/ToDo.txt:Zone.Identifier
+++ b/sheet5/5/ToDo.txt:Zone.Identifier
--- a/sheet5/5/ascii_read_meshvector.m
+++ b/sheet5/5/ascii_read_meshvector.m
@ -0,0 +1,43 @@
+function [ xc, ia, v ] = ascii_read_meshvector( fname )
+%
+% Loads the 2D triangular mesh (coordinates, vertex connectivity) 
+%  together with values on its vertices from an ASCII file.
+%   Matlab indexing is stored  (starts with 1).
+% 
+% The input file format is compatible 
+%  with Mesh_2d_3_matlab:Write_ascii_matlab(..) in jacobi_oo_stl/geom.h
+%
+%
+%  IN: fname - filename
+% OUT: xc    - coordinates
+%      ia    - mesh connectivity
+%      v     - solution vector
+
+DELIMETER = ' ';
+
+fprintf('Read file  %s\n',fname)
+
+% Read mesh constants
+nn = dlmread(fname,DELIMETER,[0 0 0 3]);  %% row_1, col_1, row_2, col_2  in C indexing!!!
+nnode = nn(1);
+ndim  = nn(2);
+nelem = nn(3);
+nvert = nn(4);
+
+% Read coordinates
+row_start = 0+1;
+row_end   = 0+nnode;
+xc = dlmread(fname,DELIMETER,[row_start 0 row_end ndim-1]);
+
+% Read connectivity
+row_start = row_end+1;
+row_end   = row_end+nelem;
+ia = dlmread(fname,DELIMETER,[row_start 0 row_end nvert-1]);
+
+% Read solution
+row_start = row_end+1;
+row_end   = row_end+nnode;
+v = dlmread(fname,DELIMETER,[row_start 0 row_end 0]);
+end
+
+
--- a/sheet5/5/ascii_read_meshvector.m:Zone.Identifier
+++ b/sheet5/5/ascii_read_meshvector.m:Zone.Identifier
--- a/sheet5/5/ascii_write_mesh.m
+++ b/sheet5/5/ascii_write_mesh.m
@ -0,0 +1,49 @@
+function ascii_write_mesh( xc, ia, e, basename)
+%
+% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
+%  in an ASCII file.
+%  Matlab indexing is stored  (starts with 1).
+% 
+% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
+%
+% IN:
+% coordinates  xc: [2][nnode]
+% connectivity ia: [4][nelem]   with  t(4,:) are the subdomain numbers
+% edges         e: [7][nedges]  boundary edges
+%                              e([1,2],:) - start/end vertex of edge
+%                              e([3,4],:) - start/end values
+%                              e(5,:)     - segment number
+%                              e([6,7],:) - left/right subdomain
+%        basename: file name without extension
+% 
+% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
+%
+fname = [basename, '.txt'];
+
+nnode = int32(size(xc,2));
+ndim  = int32(size(xc,1));
+nelem = int32(size(ia,2));
+nvert_e = int32(3);
+
+
+dlmwrite(fname,nnode,'delimiter','\t','precision',16)                 % number of nodes
+dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16)        % space dimension
+dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16)       % number of elements
+dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16)     % number of vertices per element
+
+% dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16)       % coordinates
+dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
+
+% no subdomain info transferred
+tmp=int32(ia(1:3,:));
+% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16)      % connectivity in Matlab indexing
+dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16)    % connectivity in Matlab indexing
+
+% store only start and end point of boundary edges,
+nbedges = size(e,2);
+dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16)     % number boundary edges
+tmp=int32(e(1:2,:));
+% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16)    % boundary edges in Matlab indexing
+dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16)  % boundary edges in Matlab indexing
+
+end
--- a/sheet5/5/ascii_write_mesh.m:Zone.Identifier
+++ b/sheet5/5/ascii_write_mesh.m:Zone.Identifier
--- a/sheet5/5/geom.cpp
+++ b/sheet5/5/geom.cpp
@ -0,0 +1,522 @@
+// see:   http://llvm.org/docs/CodingStandards.html#include-style
+#include "geom.h"
+
+#include <algorithm>
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <list>
+#include <string>
+#include <vector>
+using namespace std;
+
+Mesh::Mesh(int ndim, int nvert_e, int ndof_e)
+    : _nelem(0), _nvert_e(nvert_e), _ndof_e(ndof_e), _nnode(0), _ndim(ndim), _ia(0), _xc(0)
+{
+}
+
+Mesh::~Mesh()
+{}
+
+void Mesh::SetValues(std::vector<double> &v, const std::function<double(double, double)> &func) const
+{
+    int const nnode = Nnodes();            // number of vertices in mesh
+    assert( nnode == static_cast<int>(v.size()) );
+    for (int k = 0; k < nnode; ++k)
+    {
+        v[k] = func( _xc[2 * k], _xc[2 * k + 1] );
+    }
+}
+
+
+void Mesh::Debug() const
+{
+    cout << "\n ############### Debug  M E S H  ###################\n";
+    cout << "\n ...............    Coordinates       ...................\n";
+    for (int k = 0; k < _nnode; ++k)
+    {
+        cout << k << " : " ;
+        for (int i = 0; i < _ndof_e; ++i )
+        {
+            cout << _xc[2*k+i] << "  ";
+        }
+        cout << endl;
+    }
+    cout << "\n ...............    Elements        ...................\n";
+    for (int k = 0; k < _nelem; ++k)
+    {
+        cout << k << " : ";
+        for (int i = 0; i < _ndof_e; ++i )
+            cout << _ia[_ndof_e * k + i] << "  ";
+        cout << endl;
+    }
+    return;
+}
+
+void Mesh::Write_ascii_matlab(std::string const &fname, std::vector<double> const &v) const
+{
+    assert(Nnodes() ==  static_cast<int>(v.size()));  // fits vector length to mesh information?
+
+    ofstream fout(fname);                             // open file ASCII mode
+    if ( !fout.is_open() )
+    {
+        cout << "\nFile " << fname << " has not been opened.\n\n" ;
+        assert( fout.is_open() && "File not opened."  );
+    }
+
+    string const DELIMETER(" ");    // define the same delimeter as in matlab/ascii_read*.m
+    int const    OFFSET(1);         // convert C-indexing to matlab
+
+    // Write data: #nodes, #space dimensions, #elements, #vertices per element
+    fout << Nnodes() << DELIMETER << Ndims() << DELIMETER << Nelems() << DELIMETER << NverticesElements() << endl;
+
+    // Write cordinates: x_k, y_k   in seperate lines
+    assert( Nnodes()*Ndims() ==  static_cast<int>(_xc.size()));
+    for (int k = 0, kj = 0; k < Nnodes(); ++k)
+    {
+        for (int j = 0; j < Ndims(); ++j, ++kj)
+        {
+            fout << _xc[kj] << DELIMETER;
+        }
+        fout << endl;
+    }
+
+    // Write connectivity: ia_k,0, ia_k,1 etc  in seperate lines
+    assert( Nelems()*NverticesElements() ==  static_cast<int>(_ia.size()));
+    for (int k = 0, kj = 0; k < Nelems(); ++k)
+    {
+        for (int j = 0; j < NverticesElements(); ++j, ++kj)
+        {
+            fout << _ia[kj] + OFFSET << DELIMETER;     // C to matlab
+        }
+        fout << endl;
+    }
+
+    // Write vector
+    for (int k = 0; k < Nnodes(); ++k)
+    {
+        fout << v[k] << endl;
+    }
+
+    fout.close();
+    return;
+}
+
+
+void Mesh::Visualize(std::vector<double> const &v) const
+{
+    // define external command
+    const string exec_m("matlab -nosplash < visualize_results.m");                 // Matlab
+    //const string exec_m("octave --no-window-system --no-gui visualize_results.m"); // Octave
+    //const string exec_m("flatpak run org.octave.Octave visualize_results.m");      // Octave (flatpak): desktop GH
+
+    const string fname("uv.txt");
+    Write_ascii_matlab(fname, v);
+
+    int ierror = system(exec_m.c_str());                                 // call external command
+
+    if (ierror != 0)
+    {
+        cout << endl << "Check path to Matlab/octave on your system" << endl;
+    }
+    cout << endl;
+    return;
+}
+
+
+
+
+// #####################################################################
+Mesh_2d_3_square::Mesh_2d_3_square(int nx, int ny, int myid, int procx, int procy)
+    : Mesh(2, 3, 3), // two dimensions, 3 vertices, 3 dofs
+      _myid(myid), _procx(procx), _procy(procy), _neigh{{-1, -1, -1, -1}}, _color(0),
+_xl(0.0), _xr(1.0), _yb(0.0), _yt(1.0), _nx(nx), _ny(ny)
+{
+    //void IniGeom(int const myid, int const procx, int const procy, int neigh[], int &color)
+    int const ky = _myid / _procx;
+    int const kx = _myid % _procy;  //    MOD(myid,procx)
+    // Determine the neighbors of domain/rank myid
+    _neigh[0] = (ky == 0)       ?  -1 : _myid - _procx;    //   South
+    _neigh[1] = (kx == _procx - 1) ?  -1 : _myid + 1;      //   East
+    _neigh[2] = (ky == _procy - 1) ?  -1 : _myid + _procx;  //   North
+    _neigh[3] = (kx == 0)       ?  -1 : _myid - 1;        //   West
+
+    _color = (kx + ky) & 1 ;
+
+    // subdomain is part of unit square
+    double const hx = 1. / _procx;
+    double const hy = 1. / _procy;
+    _xl = kx * hx;                      //  left
+    _xr = (kx + 1) * hx;                //  right
+    _yb = ky * hy;                      //  bottom
+    _yt = (ky + 1) * hy;                //  top
+
+    // Calculate coordinates
+    int const nnode = (_nx + 1) * (_ny + 1); // number of nodes
+    Resize_Coords(nnode, 2);                 // coordinates in 2D [nnode][ndim]
+    GetCoordsInRectangle(_nx, _ny, _xl, _xr, _yb, _yt, GetCoords().data());
+
+    // Calculate element connectivity (linear triangles)
+    int const nelem = 2 * _nx * _ny;         // number of elements
+    Resize_Connectivity(nelem, 3);           // connectivity matrix [nelem][3]
+    GetConnectivityInRectangle(_nx, _ny, GetConnectivity().data());
+
+    return;
+}
+
+
+void Mesh_2d_3_square::SetU(std::vector<double> &u) const
+{
+    int dx    = _nx + 1;
+    for (int j = 0; j <= _ny; ++j)
+    {
+        int k = j * dx;
+        for (int i = 0; i <= _nx; ++i, ++k)
+        {
+            u[k] = 0.0;
+        }
+    }
+
+}
+
+void Mesh_2d_3_square::SetF(std::vector<double> &f) const
+{
+    int dx    = _nx + 1;
+    for (int j = 0; j <= _ny; ++j)
+    {
+        int k = j * dx;
+        for (int i = 0; i <= _nx; ++i, ++k)
+        {
+            f[k] = 1.0;
+        }
+    }
+
+}
+
+
+std::vector<int> Mesh_2d_3_square::Index_DirichletNodes() const
+{
+    int const dx = 1,
+              dy = _nx + 1,
+              bl  = 0,
+              br  = _nx,
+              tl  = _ny * (_nx + 1),
+              tr  = (_ny + 1) * (_nx + 1) - 1;
+    int const start[4] = { bl, br, tl, bl},
+                         end[4] = { br, tr, tr, tl},
+                                  step[4] = { dx, dy, dx, dy};
+
+    vector<int> idx(0);
+    for (int j = 0; j < 4; j++)
+    {
+        if (_neigh[j] < 0)
+        {
+            for (int i = start[j]; i <= end[j]; i += step[j])
+            {
+                idx.push_back(i);        // node i is Dirichlet node
+            }
+        }
+    }
+    //    remove multiple elements
+    sort(idx.begin(), idx.end());                          // sort
+    idx.erase( unique(idx.begin(), idx.end()), idx.end() ); // remove duplicate data
+
+    return idx;
+}
+
+void Mesh_2d_3_square::SaveVectorP(std::string const &name, vector<double> const &u) const
+{
+//  construct the file name for subdomain myid
+    const string tmp( std::to_string(_myid / 100) + to_string((_myid % 100) / 10) + to_string(_myid % 10) );
+
+    const string namep = name + "." + tmp;
+    ofstream ff(namep.c_str());
+    ff.precision(6);
+    ff.setf(ios::fixed, ios::floatfield);
+
+    // assumes tensor product grid in unit square; rowise numbered (as generated in class constructor)
+    // output is provided for tensor product grid visualization ( similar to Matlab-surf() )
+    auto const &xc = GetCoords();
+    int k = 0;
+    for (int j = 0; j <= _ny; ++j)
+    {
+        for (int i = 0; i <= _nx; ++i, ++k)
+            ff << xc[2 * k + 0] << "   " << xc[2 * k + 1] << "   " << u[k] << endl;
+        ff << endl;
+    }
+
+    ff.close();
+    return;
+}
+
+void Mesh_2d_3_square::GetCoordsInRectangle(int const nx, int const ny,
+        double const xl, double const xr, double const yb, double const yt,
+        double xc[])
+{
+    const double hx = (xr - xl) / nx,
+                 hy = (yt - yb) / ny;
+
+    int k = 0;
+    for (int j = 0; j <= ny; ++j)
+    {
+        const double y0 = yb + j * hy;
+        for (int i = 0; i <= nx; ++i, k += 2)
+        {
+            xc[k  ] = xl + i * hx;
+            xc[k + 1] = y0;
+        }
+    }
+
+    return;
+}
+
+void Mesh_2d_3_square::GetConnectivityInRectangle(int const nx, int const ny, int ia[])
+{
+    const int dx = nx + 1;
+    int k  = 0;
+    int l  = 0;
+    for (int j = 0; j < ny; ++j, ++k)
+    {
+        for (int i = 0; i < nx; ++i, ++k)
+        {
+            ia[l  ] = k;
+            ia[l + 1] = k + 1;
+            ia[l + 2] = k + dx + 1;
+            l += 3;
+            ia[l  ] = k;
+            ia[l + 1] = k + dx;
+            ia[l + 2] = k + dx + 1;
+            l += 3;
+        }
+    }
+    return;
+}
+
+// #################### still some old code (--> MPI) ############################
+
+
+//  Copies the values of w corresponding to the boundary
+//  South (ib==1), East (ib==2), North (ib==3), West (ib==4)
+
+void GetBound(int const ib, int const nx, int const ny, double const w[], double s[])
+{
+    const int //dx = 1,
+    dy = nx + 1,
+    bl  = 0,
+    br  = nx,
+    tl  = ny * (nx + 1),
+    tr  = (ny + 1) * (nx + 1) - 1;
+    switch (ib)
+    {
+        case 1:
+        {
+            for (int i = bl, j = 0; i <= br; ++i, ++j)
+                s[j] = w[i];
+            break;
+        }
+        case 3:
+        {
+            for (int i = tl, j = 0; i <= tr; ++i, ++j)
+                s[j] = w[i];
+            break;
+        }
+        case 4:
+        {
+            for (int i = bl, j = 0; i <= tl; i += dy, ++j)
+                s[j] = w[i];
+            break;
+        }
+        case 2:
+        {
+            for (int i = br, j = 0; i <= tr; i += dy, ++j)
+                s[j] = w[i];
+            break;
+        }
+        default:
+        {
+            cout << endl << "Wrong parameter  ib in " << __FILE__ << ":" << __LINE__ << endl;
+        }
+    }
+    return;
+}
+
+// ----------------------------------------------------------------------------------------------------------
+// Computes w:  = w + s at nodes on  the boundary
+// South (ib == 1), East (ib == 2), North (ib == 3), West (ib == 4)
+
+void AddBound(int const ib, int const nx, int const ny, double w[], double const s[])
+{
+    int const dy = nx + 1,
+              bl  = 0,
+              br  = nx,
+              tl  = ny * (nx + 1),
+              tr  = (ny + 1) * (nx + 1) - 1;
+    switch (ib)
+    {
+        case 1:
+        {
+            for (int i = bl, j = 0; i <= br; ++i, ++j)
+                w[i] += s[j];
+            break;
+        }
+        case 3:
+        {
+            for (int i = tl, j = 0; i <= tr; ++i, ++j)
+                w[i] += s[j];
+            break;
+        }
+        case 4:
+        {
+            for (int i = bl, j = 0; i <= tl; i += dy, ++j)
+                w[i] += s[j];
+            break;
+        }
+        case 2:
+        {
+            for (int i = br, j = 0; i <= tr; i += dy, ++j)
+                w[i] += s[j];
+            break;
+        }
+        default:
+        {
+            cout << endl << "Wrong parameter  ib in " << __FILE__ << ":" << __LINE__ << endl;
+        }
+    }
+    return;
+}
+
+
+// ####################################################################
+
+Mesh_2d_3_matlab::Mesh_2d_3_matlab(string const &fname)
+    : Mesh(2, 3, 3),  // two dimensions, 3 vertices, 3 dofs
+      bedges(0)
+{
+    ifstream ifs(fname);
+    if (!(ifs.is_open() && ifs.good()))
+    {
+        cerr << "Mesh_2d_3_matlab: Error cannot open file " << fname << endl;
+        assert(ifs.is_open());
+    }
+
+    int const OFFSET(1);             // Matlab to C indexing
+    cout << "ASCI file  " << fname << "  opened" << endl;
+
+    // Read some mesh constants
+    int nnode, ndim, nelem, nvert_e;
+    ifs >> nnode >> ndim >> nelem >> nvert_e;
+    cout << nnode << "  " << ndim << "  " << nelem << "  " << nvert_e << endl;
+    assert(ndim == 2 && nvert_e == 3);
+
+    // Allocate memory
+    Resize_Coords(nnode, ndim);                 // coordinates in 2D [nnode][ndim]
+    Resize_Connectivity(nelem, nvert_e);        // connectivity matrix [nelem][nvert]
+
+    // Read ccordinates
+    auto &xc = GetCoords();
+    for (int k = 0; k < nnode * ndim; ++k)
+    {
+        ifs >> xc[k];
+    }
+
+    // Read connectivity
+    auto &ia = GetConnectivity();
+    for (int k = 0; k < nelem * nvert_e; ++k)
+    {
+        ifs >> ia[k];
+        ia[k] -= OFFSET;                // Matlab to C indexing
+    }
+
+    // additional read of boundary information (only start/end point)
+    int nbedges;
+    ifs >> nbedges;
+
+    bedges.resize(nbedges * 2);
+    for (int k = 0; k < nbedges * 2; ++k)
+    {
+        ifs >> bedges[k];
+        bedges[k] -= OFFSET;            // Matlab to C indexing
+    }
+
+    return;
+}
+
+// binary
+//{
+//ifstream ifs(fname, ios_base::in | ios_base::binary);
+//if(!(ifs.is_open() && ifs.good()))
+//{
+//cerr << "ReadBinMatrix: Error cannot open file " << file << endl;
+//assert(ifs.is_open());
+//}
+//cout << "ReadBinMatrix: file opened" << file << endl;
+
+
+//}
+
+// binaryIO.cpp
+//void read_binMatrix(const string& file, vector<int> &cnt, vector<int> &col, vector<double> &ele)
+//{
+
+//ifstream ifs(file, ios_base::in | ios_base::binary);
+
+//if(!(ifs.is_open() && ifs.good()))
+//{
+//cerr << "ReadBinMatrix: Error cannot open file " << file << endl;
+//assert(ifs.is_open());
+//}
+//cout << "ReadBinMatrix: Opened file " << file << endl;
+
+//int _size;
+
+//ifs.read(reinterpret_cast<char*>(&_size), sizeof(int));   // old: ifs.read((char*)&_size, sizeof(int));
+//cnt.resize(_size);
+//cout << "ReadBinMatrix: cnt size: " << _size << endl;
+
+//ifs.read((char*)&_size, sizeof(int));
+//col.resize(_size);
+//cout << "ReadBinMatrix: col size: " << _size << endl;
+
+//ifs.read((char*)&_size, sizeof(int));
+//ele.resize(_size);
+//cout << "ReadBinMatrix: ele size: " << _size << endl;
+
+
+//ifs.read((char*)cnt.data(), cnt.size() * sizeof(int));
+//ifs.read((char*)col.data(), col.size() * sizeof(int));
+//ifs.read((char*)ele.data(), ele.size() * sizeof(double));
+
+//ifs.close();
+//cout << "ReadBinMatrix: Finished reading matrix.." << endl;
+
+//}
+
+
+std::vector<int> Mesh_2d_3_matlab::Index_DirichletNodes() const
+{
+    vector<int> idx(bedges);                              // copy
+
+    sort(idx.begin(), idx.end());                         // sort
+    idx.erase( unique(idx.begin(), idx.end()), idx.end() ); // remove duplicate data
+
+    return idx;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/sheet5/5/geom.cpp:Zone.Identifier
+++ b/sheet5/5/geom.cpp:Zone.Identifier
--- a/sheet5/5/geom.h
+++ b/sheet5/5/geom.h
@ -0,0 +1,381 @@
+#ifndef GEOM_FILE
+#define GEOM_FILE
+#include <array>
+#include <functional>             // function; C++11
+#include <string>
+#include <vector>
+
+/**
+ * Basis class for finite element meshes.
+ */
+class Mesh
+{
+public:
+    /**
+     * Constructor initializing the members with default values.
+      *
+     * @param[in] ndim  space dimensions (dimension for coordinates)
+      * @param[in] nvert_e  number of vertices per element (dimension for connectivity)
+      * @param[in] ndof_e   degrees of freedom per element (= @p nvert_e for linear elements)
+     */
+    explicit Mesh(int ndim, int nvert_e = 0, int ndof_e = 0);
+
+    /**
+     * Destructor.
+     *
+     * See clang warning on
+     * <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
+     */
+    virtual ~Mesh();
+
+    /**
+     * Number of finite elements in (sub)domain.
+     * @return number of elements.
+     */
+    int Nelems() const
+    {
+        return _nelem;
+    }
+
+    /**
+     * Global number of vertices for each finite element.
+     * @return number of vertices per element.
+     */
+    int NverticesElements() const
+    {
+        return _nvert_e;
+    }
+
+    /**
+     * Global number of degrees of freedom (dof) for each finite element.
+     * @return degrees of freedom per element.
+     */
+    int NdofsElement() const
+    {
+        return _ndof_e;
+    }
+
+    /**
+     * Number of vertices in mesh.
+     * @return number of vertices.
+     */
+    int Nnodes() const
+    {
+        return _nnode;
+    }
+
+    /**
+     * Space dimension.
+     * @return number of dimensions.
+     */
+    int Ndims() const
+    {
+        return _ndim;
+    }
+
+    /**
+     * (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
+      *
+     * @param[in] nelem    number of elements
+      * @param[in] nvert_e  number of vertices per element
+     */
+    void Resize_Connectivity(int nelem, int nvert_e)
+    {
+        SetNelem(nelem);               // number of elements
+        SetNverticesElement(nvert_e);  // vertices per element
+        _ia.resize(nelem * nvert_e);
+    }
+
+    /**
+     * Read connectivity information (g1,g2,g3)_i.
+     * @return convectivity vector [nelems*ndofs].
+     */
+    const std::vector<int>  &GetConnectivity() const
+    {
+        return _ia;
+    }
+
+    /**
+     * Access/Change connectivity information (g1,g2,g3)_i.
+     * @return convectivity vector [nelems*ndofs].
+     */
+    std::vector<int>  &GetConnectivity()
+    {
+        return _ia;
+    }
+
+    /**
+     * (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
+      *
+     * @param[in] nnodes    number of nodes
+      * @param[in] ndim      space dimension
+     */
+    void Resize_Coords(int nnodes, int ndim)
+    {
+        SetNnode(nnodes);       // number of nodes
+        SetNdim(ndim);          // space dimension
+        _xc.resize(nnodes * ndim);
+    }
+
+    /**
+     * Read coordinates of vertices (x,y)_i.
+     * @return coordinates vector [nnodes*2].
+     */
+    const std::vector<double> &GetCoords() const
+    {
+        return _xc;
+    }
+
+    /**
+     * Access/Change coordinates of vertices (x,y)_i.
+     * @return coordinates vector [nnodes*2].
+     */
+    std::vector<double> &GetCoords()
+    {
+        return _xc;
+    }
+
+    /**
+     * Calculate values in vector @p v via function @p func(x,y)
+     * @param[in] v     vector
+      * @param[in] func  function of (x,y) returning a double value.
+     */
+    void SetValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
+
+    /**
+     * Prints the information for a finite element mesh
+     */
+    void Debug() const;
+
+    /**
+     * Determines the indices of those vertices with Dirichlet boundary conditions
+     * @return index vector.
+     */
+    virtual std::vector<int> Index_DirichletNodes() const = 0;
+
+    /**
+     * Write vector @p v toghether with its mesh information to an ASCii file @p fname.
+      *
+      * The data are written in C-style.
+      *
+      * @param[in] fname  file name
+      * @param[in] v      vector
+     */
+    void Write_ascii_matlab(std::string const &fname, std::vector<double> const &v) const;
+
+    /**
+     * Visualize @p v together with its mesh information via matlab or octave.
+      *
+      * Comment/uncomment those code lines in method Mesh:Visualize (geom.cpp)
+      * that are supported on your system.
+      *
+      * @param[in] v    vector
+      *
+      * @warning matlab files ascii_read_meshvector.m  visualize_results.m
+      *          must be in the executing directory.
+     */
+    void Visualize(std::vector<double> const &v) const;
+
+
+protected:
+    void SetNelem(int nelem)
+    {
+        _nelem = nelem;
+    }
+
+    void SetNverticesElement(int nvert)
+    {
+        _nvert_e = nvert;
+    }
+
+    void SetNdofsElement(int ndof)
+    {
+        _ndof_e = ndof;
+    }
+
+    void SetNnode(int nnode)
+    {
+        _nnode = nnode;
+    }
+
+    void SetNdim(int ndim)
+    {
+        _ndim = ndim;
+    }
+
+private:
+    int _nelem;         //!< number elements
+    int _nvert_e;       //!< number of vertices per element
+    int _ndof_e;        //!< degrees of freedom (d.o.f.) per element
+    int _nnode;         //!< number nodes/vertices
+    int _ndim;          //!< space dimension of the problem (1, 2, or 3)
+    std::vector<int> _ia;    //!< element connectivity
+    std::vector<double> _xc; //!< coordinates
+};
+
+/**
+ * 2D finite element mesh of the square consiting of linear triangular elements.
+ */
+class Mesh_2d_3_square: public Mesh
+{
+public:
+    /**
+     * Generates the f.e. mesh for the unit square.
+     *
+     * @param[in] nx    number of discretization intervals in x-direction
+     * @param[in] ny    number of discretization intervals in y-direction
+     * @param[in] myid  my MPI-rank / subdomain
+     * @param[in] procx number of ranks/subdomains in x-direction
+     * @param[in] procy number of processes in y-direction
+    */
+    Mesh_2d_3_square(int nx, int ny, int myid = 0, int procx = 1, int procy = 1);
+
+    /**
+     * Destructor
+     */
+    ~Mesh_2d_3_square() override
+    {}
+
+    /**
+     * Set solution vector based on a tensor product grid in the rectangle.
+     * @param[in] u solution vector
+     */
+    void SetU(std::vector<double> &u) const;
+
+    /**
+     * Set right hand side (rhs) vector on a tensor product grid in the rectangle.
+     * @param[in] f rhs vector
+     */
+    void SetF(std::vector<double> &f) const;
+
+    /**
+     * Determines the indices of those vertices with Dirichlet boundary conditions
+     * @return index vector.
+     */
+    std::vector<int> Index_DirichletNodes() const override;
+
+    /**
+      * Stores the values of vector @p u of (sub)domain into a file @p name for further processing in gnuplot.
+      * The file stores rowise the x- and y- coordinates together with the value from  @p u .
+      * The domain [@p xl, @p xr] x [@p yb, @p yt] is discretized into @p nx x @p ny intervals.
+      *
+      * @param[in] name basename of file name (file name will be extended by the rank number)
+      * @param[in] u    local vector
+      *
+      * @warning   Assumes tensor product grid in unit square; rowise numbered
+      *            (as generated in class constructor).
+      *            The output is provided for tensor product grid visualization
+      *            ( similar to Matlab-surf() ).
+      *
+      * @see Mesh_2d_3_square
+      */
+    void SaveVectorP(std::string const &name, std::vector<double> const &u) const;
+
+    // here will still need to implement in the class
+    //  GetBound(), AddBound()
+    //  or better a generalized way with indices and their appropriate ranks for MPI communication
+
+private:
+    /**
+      * Determines the coordinates of the dicretization nodes of the domain [@p xl, @p xr] x [@p yb, @p yt]
+      * which is discretized into @p nx x @p ny intervals.
+      *
+      * @param[in] ny   number of discretization intervals in y-direction
+      * @param[in] xl   x-coordinate of left boundary
+      * @param[in] xr   x-coordinate of right boundary
+      * @param[in] yb   y-coordinate of lower boundary
+      * @param[in] yt   y-coordinate of upper boundary
+      * @param[out] xc  coordinate vector of length 2n with x(2*k,2*k+1) as coodinates of node k
+      */
+
+    void GetCoordsInRectangle(int nx, int ny, double xl, double xr, double yb, double yt,
+                              double xc[]);
+    /**
+      * Determines the element connectivity of linear triangular elements of a FEM discretization
+      * of a rectangle using @p nx x @p ny equidistant intervals for discretization.
+      * @param[in] nx   number of discretization intervals in x-direction
+      * @param[in] ny   number of discretization intervals in y-direction
+      * @param[out] ia  element connectivity matrix with ia(3*s,3*s+1,3*s+2) as node numbers od element s
+      */
+    void GetConnectivityInRectangle(int nx, int ny, int ia[]);
+
+private:
+    int _myid;          //!< my MPI rank
+    int _procx;         //!< number of MPI ranks in x-direction
+    int _procy;         //!< number of MPI ranks in y-direction
+    std::array<int, 4> _neigh; //!< MPI ranks of neighbors (negative: no neighbor but b.c.)
+    int _color;         //!< red/black coloring (checker board) of subdomains
+
+    double _xl;         //!< x coordinate of lower left  corner of square
+    double _xr;         //!< x coordinate of lower right corner of square
+    double _yb;         //!< y coordinate or lower left  corner of square
+    double _yt;         //!< y coordinate of upper right corner of square
+    int    _nx;         //!< number of intervals in x-direction
+    int    _ny;         //!< number of intervals in y-direction
+};
+
+// #################### still some old code (--> MPI) ############################
+/**
+ * Copies the values of @p w corresponding to boundary @p ib
+ * onto vector s.  South (ib==1), East (ib==2), North (ib==3), West (ib==4).
+ * The vector @p s has to be long enough!!
+ * @param[in] ib    my local boundary
+ * @param[in] nx    number of discretization intervals in x-direction
+ * @param[in] ny    number of discretization intervals in y-direction
+ * @param[in] w     vector for all nodes of local discretization
+ * @param[out] s    short vector with values on boundary @p ib
+*/
+// GH_NOTE: Absicherung bei s !!
+void GetBound(int ib, int nx, int ny, double const w[], double s[]);
+
+
+/**
+ * Computes @p w := @p w + @p s  at the interface/boundary nodes on the
+ * boundary @p ib .  South (ib==1), East (ib==2), North (ib==3), West (ib==4)
+ * @param[in] ib    my local boundary
+ * @param[in] nx    number of discretization intervals in x-direction
+ * @param[in] ny    number of discretization intervals in y-direction
+ * @param[in,out] w vector for all nodes of local discretization
+ * @param[in] s     short vector with values on boundary @p ib
+*/
+void AddBound(int ib, int nx, int ny, double w[], double const s[]);
+
+// #################### Mesh from Matlab ############################
+/**
+ * 2D finite element mesh of the square consiting of linear triangular elements.
+ */
+class Mesh_2d_3_matlab: public Mesh
+{
+public:
+    /**
+     * Reads mesh data from a binary file.
+     *
+     * File format, see ascii_write_mesh.m
+     *
+     * @param[in] fname file name
+    */
+    explicit Mesh_2d_3_matlab(std::string const &fname);
+
+    /**
+     * Determines the indices of those vertices with Dirichlet boundary conditions.
+     * @return index vector.
+      *
+      * @warning All boundary nodes are considered as Dirchlet nodes.
+     */
+    std::vector<int> Index_DirichletNodes() const override;
+
+private:
+    /**
+     * Determines the indices of those vertices with Dirichlet boundary conditions
+     * @return index vector.
+     */
+    int Nnbedges() const
+    {
+        return static_cast<int>(bedges.size());
+    }
+
+    std::vector<int> bedges;     //!< boundary edges [nbedges][2] storing start/end vertex
+
+};
+
+#endif
--- a/sheet5/5/geom.h:Zone.Identifier
+++ b/sheet5/5/geom.h:Zone.Identifier
--- a/Show more
+++ b/Show more