diff --git a/Sheet5/check_env.h b/Sheet5/check_env.h new file mode 100644 index 0000000..41bd99d --- /dev/null +++ b/Sheet5/check_env.h @@ -0,0 +1,99 @@ +#pragma once + +#include +#ifdef _OPENMP +#include +#endif +#include + +//##################################### +// G.Haase +// See https://sourceforge.net/p/predef/wiki/Compilers/ +// http://www.cplusplus.com/doc/tutorial/preprocessor/ +// also: export OMP_DISPLAY_ENV=VERBOSE +//##################################### +/** Checks for compilers, its versions, threads etc. + * + @param[in] argc number of command line arguemnts + @param[in] argv command line arguments as array of C-strings +*/ +template +void check_env(T argc, char const *argv[]) +{ + std::cout << "\n#######################################################################\n"; + std::cout << "Code :"; + for (T k = 0; k < argc; ++k) std::cout << " " << argv[k]; + std::cout << std::endl; + + // compiler: https://sourceforge.net/p/predef/wiki/Compilers/ + std::cout << "Compiler: "; +#if defined __INTEL_COMPILER +#pragma message(" ########## INTEL ###############") + std::cout << "INTEL " << __INTEL_COMPILER; + // Ignore warnings for #pragma acc unrecognice +#pragma warning disable 161 + // Ignore warnings for #pragma omp unrecognice +#pragma warning disable 3180 + +#elif defined __PGI +#pragma message(" ########## PGI ###############") + std::cout << "PGI " << __PGIC__ << "." << __PGIC_MINOR__ << "." << __PGIC_PATCHLEVEL__; +#elif defined __clang__ +#pragma message(" ########## CLANG ###############") + std::cout << "CLANG " << __clang_major__ << "." << __clang_minor__ << "."; // << __clang_patchlevel__; +#elif defined __GNUC__ +#pragma message(" ########## Gnu ###############") + std::cout << "Gnu " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__; +#else +#pragma message(" ########## unknown Compiler ###############") + std::cout << "unknown"; +#endif + std::cout << " C++ standard: " << __cplusplus << std::endl; + + // Parallel environments + std::cout << "Parallel: "; +#if defined MPI_VERSION +#pragma message(" ########## MPI ###############") +#ifdef OPEN_MPI + std::cout << "OpenMPI "; +#else + std::cout << "MPI "; +#endif + std::cout << MPI_VERSION << "." << MPI_SUBVERSION << " "; +#endif + +#ifdef _OPENMP +//https://www.openmp.org/specifications/ +//https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux + std::unordered_map const map{ + {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0"}, {201811, "5.0"}}; +#pragma message(" ########## OPENMP ###############") + //std::cout << _OPENMP; + std::cout << "OpenMP "; + try { + std::cout << map.at(_OPENMP); + } + catch (...) { + std::cout << _OPENMP; + } + #pragma omp parallel + { + #pragma omp master + { + const int nn = omp_get_num_threads(); // OpenMP + std::cout << " ---> " << nn << " Threads "; + } + #pragma omp barrier + } + +#endif +#ifdef _OPENACC +#pragma message(" ########## OPENACC ###############") + std::cout << "OpenACC "; +#endif + std::cout << std::endl; + std::cout << "Date : " << __DATE__ << " " << __TIME__; + std::cout << "\n#######################################################################\n"; +} +// HG + diff --git a/Sheet5/mylib.cpp b/Sheet5/mylib.cpp new file mode 100644 index 0000000..412d4a7 --- /dev/null +++ b/Sheet5/mylib.cpp @@ -0,0 +1,118 @@ +#include "mylib.h" +#include // assert() +#include +#include +#include // multiplies<>{} +#include +#include // iota() +#ifdef _OPENMP +#include +#endif +#include +using namespace std; + +double scalar(vector const &x, vector const &y) +{ + assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG + size_t const N = x.size(); + double sum = 0.0; +#pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum) + for (size_t i = 0; i < N; ++i) + { + sum += x[i] * y[i]; + //sum += exp(x[i])*log(y[i]); + } + return sum; +} + +double scalar_trans(vector const &x, vector const &y) +{ + assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG + vector z(x.size()); + transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{}); + double sum = 0.0; +#pragma omp parallel for default(none) shared(z) reduction(+:sum) + for (auto pi = cbegin(z); pi!=cend(z); ++pi) + { + sum += *pi; + } + return sum; +} + +double norm(vector const &x) +{ + size_t const N = x.size(); + double sum = 0.0; +#pragma omp parallel for default(none) shared(x,N) reduction(+:sum) + for (size_t i = 0; i < N; ++i) + { + sum += x[i]*x[i]; + } + return sum; +} + +// ------------------------------------------------------------------ +double scalar_manual(vector const &x, vector const &y) +{ + assert(x.size() == y.size()); + size_t const N = x.size(); + double sum = 0.0; + +#pragma omp parallel default(none) shared(x,y,N) reduction(+:sum) + { + int tid = omp_get_thread_num(); + int nth = omp_get_num_threads(); + // manual cyclic distribution + for (size_t i = static_cast(tid); i < N; i += static_cast(nth)) { + sum += x[i] * y[i]; + } + } + return sum; +} + +// ------------------------------------------------------------------ +vector reduction_vec(int n) +{ + vector vec(n); +#pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec) + { + #pragma omp barrier + #pragma omp critical + cout << omp_get_thread_num() << " : " << vec.size() << endl; + #pragma omp barrier + iota( vec.begin(),vec.end(), omp_get_thread_num() ); + #pragma omp barrier + + } + return vec; +} + +// ------------------------------------------------------------------ +vector reduction_vec_append(int n) +{ + // determine number of threads that will be used + int nth = 1; +#pragma omp parallel + { + #pragma omp master + nth = omp_get_num_threads(); + } + + vector result; + result.resize(static_cast(n) * static_cast(nth)); + + // Each thread will fill its own contiguous block [tid*n, tid*n + n) +#pragma omp parallel default(none) shared(result,n) + { + int tid = omp_get_thread_num(); + // create local vector and initialize with a pattern (e.g., tid + k) + vector local(n); + iota(local.begin(), local.end(), tid); // local[k] = tid + k + + size_t offset = static_cast(tid) * static_cast(n); + for (int k = 0; k < n; ++k) { + result[offset + static_cast(k)] = local[static_cast(k)]; + } + } + return result; +} diff --git a/Sheet5/mylib.h b/Sheet5/mylib.h new file mode 100644 index 0000000..0a5ddd9 --- /dev/null +++ b/Sheet5/mylib.h @@ -0,0 +1,68 @@ +#pragma once +#include +#include // setw() +#include +#include +#include + +/** Inner product + @param[in] x vector + @param[in] y vector + @return resulting Euclidian inner product +*/ +double scalar(std::vector const &x, std::vector const &y); +double scalar_trans(std::vector const &x, std::vector const &y); + +/** Second inner product: use #pragma omp parallel (no "for" in pragma) + The work is split manually inside the parallel region (stride or chunk). +*/ +double scalar_manual(std::vector const &x, std::vector const &y); + +/** l2-norm + @param[in] x vector + @return resulting Euclidian norm +*/ +double norm(std::vector const &x); + +/** Vector @p b adds its elements to vector @p a . + @param[in] a vector + @param[in] b vector + @return a+=b componentwise +*/ +template +std::vector &operator+=(std::vector &a, std::vector const &b) +{ + assert(a.size()==b.size()); + for (size_t k = 0; k < a.size(); ++k) { + a[k] += b[k]; + } + return a; +} + +// Declare the reduction operation in OpenMP for an STL-vector (existing) +#pragma omp declare reduction(VecAdd : std::vector : omp_out += omp_in) \ + initializer (omp_priv=omp_orig) + +/** Test for vector reduction. + * existing: converts thread-private vectors into a componentwise sum + * @param[in] n size of global/private vector + * @return resulting global vector. +*/ +std::vector reduction_vec(int n); + +/** New: append per-thread vectors into a single big vector. + * The result will have size n * numThreads, where each thread contributes a contiguous block. + */ +std::vector reduction_vec_append(int n); + +/** Output of a vector. + @param[in,out] s output stream + @param[in] x vector + @return modified output stream +*/ +template +std::ostream &operator<<(std::ostream &s, std::vector const &x) +{ + for (auto const &v : x) s << std::setw(4) << v << " "; + return s; +}