Supporting files for Ex1

2025-12-02 09:36:03 +01:00 · 2025-12-02 09:36:03 +01:00 · d245f837d9
commit d245f837d9
parent f6f8485979
3 changed files with 285 additions and 0 deletions
--- a/Sheet5/check_env.h
+++ b/Sheet5/check_env.h
@ -0,0 +1,99 @@
+#pragma once
+
+#include <iostream>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+#include <unordered_map>
+
+//#####################################
+// G.Haase
+// See https://sourceforge.net/p/predef/wiki/Compilers/
+//     http://www.cplusplus.com/doc/tutorial/preprocessor/
+//  also:  export OMP_DISPLAY_ENV=VERBOSE
+//#####################################
+/** 	Checks for compilers, its versions, threads etc.
+ * 
+	@param[in] argc	number of command line arguemnts
+	@param[in] argv	command line arguments as array of C-strings
+*/
+template <class T>
+void check_env(T argc, char const *argv[])
+{
+    std::cout << "\n#######################################################################\n";
+    std::cout << "Code    :";
+    for (T k = 0; k < argc; ++k) std::cout << "  " << argv[k];
+    std::cout << std::endl;
+
+    // compiler:      https://sourceforge.net/p/predef/wiki/Compilers/
+    std::cout <<    "Compiler:  ";
+#if defined __INTEL_COMPILER
+#pragma message(" ##########  INTEL  ###############")
+    std::cout << "INTEL " << __INTEL_COMPILER;
+    // Ignore warnings for #pragma acc   unrecognice
+#pragma warning disable 161
+    // Ignore warnings for #pragma omp   unrecognice
+#pragma warning disable 3180
+
+#elif defined __PGI
+#pragma message(" ##########  PGI    ###############")
+    std::cout << "PGI " << __PGIC__ << "." << __PGIC_MINOR__ << "." << __PGIC_PATCHLEVEL__;
+#elif defined  __clang__
+#pragma message(" ##########  CLANG    ###############")
+    std::cout << "CLANG " << __clang_major__ << "." << __clang_minor__ << "."; // << __clang_patchlevel__;
+#elif defined __GNUC__
+#pragma message(" ##########  Gnu    ###############")
+    std::cout << "Gnu " <<  __GNUC__  << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__;
+#else
+#pragma message(" ##########  unknown Compiler   ###############")
+    std::cout << "unknown";
+#endif
+    std::cout << "  C++ standard: " << __cplusplus << std::endl;
+
+    // Parallel environments
+    std::cout <<    "Parallel:  ";
+#if defined MPI_VERSION
+#pragma message(" ##########  MPI    ###############")
+#ifdef OPEN_MPI
+    std::cout << "OpenMPI ";
+#else
+    std::cout << "MPI ";
+#endif
+    std::cout << MPI_VERSION << "." << MPI_SUBVERSION << "   ";
+#endif
+
+#ifdef _OPENMP
+//https://www.openmp.org/specifications/
+//https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
+    std::unordered_map<unsigned, std::string> const map{
+        {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0"}, {201811, "5.0"}};
+#pragma message(" ##########  OPENMP    ###############")
+    //std::cout << _OPENMP;
+    std::cout << "OpenMP ";
+    try {
+        std::cout << map.at(_OPENMP);
+    }
+    catch (...) {
+        std::cout << _OPENMP;
+    }
+    #pragma omp parallel
+    {
+        #pragma omp master
+        {
+            const int nn = omp_get_num_threads();          // OpenMP
+            std::cout << " ---> " <<  nn << " Threads   ";
+        }
+        #pragma omp barrier
+    }
+
+#endif
+#ifdef _OPENACC
+#pragma message(" ##########  OPENACC    ###############")
+    std::cout << "OpenACC   ";
+#endif
+    std::cout << std::endl;
+    std::cout << "Date    :  " << __DATE__ << "  " << __TIME__;
+    std::cout << "\n#######################################################################\n";
+}
+// HG
+
--- a/Sheet5/mylib.cpp
+++ b/Sheet5/mylib.cpp
@ -0,0 +1,118 @@
+#include "mylib.h"
+#include <cassert>       // assert()
+#include <cmath>
+#include <iostream>
+#include <functional>    // multiplies<>{}
+#include <list>
+#include <numeric>       // iota()
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+#include <vector>
+using namespace std;
+
+double scalar(vector<double> const &x, vector<double> const &y)
+{
+    assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
+    size_t const N = x.size();
+    double sum = 0.0;
+#pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum)
+    for (size_t i = 0; i < N; ++i)
+    {
+        sum += x[i] * y[i];
+        //sum += exp(x[i])*log(y[i]);
+    }
+    return sum;
+}
+
+double scalar_trans(vector<double> const &x, vector<double> const &y)
+{
+    assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
+    vector<double> z(x.size());
+    transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{});
+    double sum = 0.0;
+#pragma omp parallel for default(none) shared(z) reduction(+:sum)
+    for (auto pi = cbegin(z); pi!=cend(z); ++pi)
+    {
+        sum += *pi;
+    }
+    return sum;
+}
+
+double norm(vector<double> const &x)
+{
+    size_t const N = x.size();
+    double sum = 0.0;
+#pragma omp parallel for default(none) shared(x,N) reduction(+:sum)
+    for (size_t i = 0; i < N; ++i)
+    {
+        sum += x[i]*x[i];
+    }
+    return sum;
+}
+
+// ------------------------------------------------------------------
+double scalar_manual(vector<double> const &x, vector<double> const &y)
+{
+    assert(x.size() == y.size());
+    size_t const N = x.size();
+    double sum = 0.0;
+
+#pragma omp parallel default(none) shared(x,y,N) reduction(+:sum)
+    {
+        int tid = omp_get_thread_num();
+        int nth = omp_get_num_threads();
+        // manual cyclic distribution
+        for (size_t i = static_cast<size_t>(tid); i < N; i += static_cast<size_t>(nth)) {
+            sum += x[i] * y[i];
+        }
+    }
+    return sum;
+}
+
+// ------------------------------------------------------------------
+vector<int> reduction_vec(int n)
+{ 
+    vector<int> vec(n);
+#pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec)
+    {
+        #pragma omp barrier
+        #pragma omp critical
+        cout << omp_get_thread_num() << " : " << vec.size() << endl;
+        #pragma omp barrier
+        iota( vec.begin(),vec.end(), omp_get_thread_num() );
+        #pragma omp barrier
+
+    }
+    return vec;
+}
+
+// ------------------------------------------------------------------
+vector<int> reduction_vec_append(int n)
+{
+    // determine number of threads that will be used
+    int nth = 1;
+#pragma omp parallel
+    {
+        #pragma omp master
+        nth = omp_get_num_threads();
+    }
+
+    vector<int> result;
+    result.resize(static_cast<size_t>(n) * static_cast<size_t>(nth));
+
+    // Each thread will fill its own contiguous block [tid*n, tid*n + n)
+#pragma omp parallel default(none) shared(result,n)
+    {
+        int tid = omp_get_thread_num();
+        // create local vector and initialize with a pattern (e.g., tid + k)
+        vector<int> local(n);
+        iota(local.begin(), local.end(), tid); // local[k] = tid + k
+
+        size_t offset = static_cast<size_t>(tid) * static_cast<size_t>(n);
+        for (int k = 0; k < n; ++k) {
+            result[offset + static_cast<size_t>(k)] = local[static_cast<size_t>(k)];
+        }
+    }
+    return result;
+}
--- a/Sheet5/mylib.h
+++ b/Sheet5/mylib.h
@ -0,0 +1,68 @@
+#pragma once
+#include <cassert>
+#include <iomanip>             // setw()
+#include <iostream>
+#include <omp.h>
+#include <vector>
+
+/**     Inner product
+    @param[in] x    vector
+    @param[in] y    vector
+    @return         resulting Euclidian inner product <x,y>
+*/
+double scalar(std::vector<double> const &x, std::vector<double> const &y);
+double scalar_trans(std::vector<double> const &x, std::vector<double> const &y);
+
+/**  Second inner product: use #pragma omp parallel (no "for" in pragma)
+    The work is split manually inside the parallel region (stride or chunk).
+*/
+double scalar_manual(std::vector<double> const &x, std::vector<double> const &y);
+
+/**     l2-norm
+    @param[in] x    vector
+    @return         resulting Euclidian norm
+*/
+double norm(std::vector<double> const &x);
+
+/**      Vector @p b adds its elements to vector @p a .
+    @param[in] a    vector
+    @param[in] b    vector
+    @return          a+=b componentwise
+*/
+template<class T>
+std::vector<T> &operator+=(std::vector<T> &a, std::vector<T> const &b)
+{
+    assert(a.size()==b.size());
+    for (size_t k = 0; k < a.size(); ++k) {
+        a[k] += b[k];
+    }
+    return a;
+}
+
+// Declare the reduction operation in OpenMP for an STL-vector (existing)
+#pragma omp declare reduction(VecAdd : std::vector<int>  : omp_out += omp_in) \
+ initializer (omp_priv=omp_orig)
+
+/**     Test for vector reduction.
+ *  existing: converts thread-private vectors into a componentwise sum
+ *  @param[in] n  size of global/private vector
+ *  @return  resulting global vector.
+*/
+std::vector<int> reduction_vec(int n);
+
+/**     New: append per-thread vectors into a single big vector.
+ *  The result will have size n * numThreads, where each thread contributes a contiguous block.
+ */
+std::vector<int> reduction_vec_append(int n);
+
+/**     Output of a vector.
+    @param[in,out] s    output stream
+    @param[in]     x    vector
+    @return        modified output stream
+*/
+template <class T>
+std::ostream &operator<<(std::ostream &s, std::vector<T> const &x)
+{
+    for (auto const &v : x)  s << std::setw(4) << v << "  ";
+    return s;
+}