Upload files to "Sheet5"
Supporting files for Ex1
This commit is contained in:
parent
305e9d8b8c
commit
49aa5da807
3 changed files with 285 additions and 0 deletions
99
Sheet5/check_env.h
Normal file
99
Sheet5/check_env.h
Normal file
|
|
@ -0,0 +1,99 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
//#####################################
|
||||||
|
// G.Haase
|
||||||
|
// See https://sourceforge.net/p/predef/wiki/Compilers/
|
||||||
|
// http://www.cplusplus.com/doc/tutorial/preprocessor/
|
||||||
|
// also: export OMP_DISPLAY_ENV=VERBOSE
|
||||||
|
//#####################################
|
||||||
|
/** Checks for compilers, its versions, threads etc.
|
||||||
|
*
|
||||||
|
@param[in] argc number of command line arguemnts
|
||||||
|
@param[in] argv command line arguments as array of C-strings
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
void check_env(T argc, char const *argv[])
|
||||||
|
{
|
||||||
|
std::cout << "\n#######################################################################\n";
|
||||||
|
std::cout << "Code :";
|
||||||
|
for (T k = 0; k < argc; ++k) std::cout << " " << argv[k];
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
// compiler: https://sourceforge.net/p/predef/wiki/Compilers/
|
||||||
|
std::cout << "Compiler: ";
|
||||||
|
#if defined __INTEL_COMPILER
|
||||||
|
#pragma message(" ########## INTEL ###############")
|
||||||
|
std::cout << "INTEL " << __INTEL_COMPILER;
|
||||||
|
// Ignore warnings for #pragma acc unrecognice
|
||||||
|
#pragma warning disable 161
|
||||||
|
// Ignore warnings for #pragma omp unrecognice
|
||||||
|
#pragma warning disable 3180
|
||||||
|
|
||||||
|
#elif defined __PGI
|
||||||
|
#pragma message(" ########## PGI ###############")
|
||||||
|
std::cout << "PGI " << __PGIC__ << "." << __PGIC_MINOR__ << "." << __PGIC_PATCHLEVEL__;
|
||||||
|
#elif defined __clang__
|
||||||
|
#pragma message(" ########## CLANG ###############")
|
||||||
|
std::cout << "CLANG " << __clang_major__ << "." << __clang_minor__ << "."; // << __clang_patchlevel__;
|
||||||
|
#elif defined __GNUC__
|
||||||
|
#pragma message(" ########## Gnu ###############")
|
||||||
|
std::cout << "Gnu " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__;
|
||||||
|
#else
|
||||||
|
#pragma message(" ########## unknown Compiler ###############")
|
||||||
|
std::cout << "unknown";
|
||||||
|
#endif
|
||||||
|
std::cout << " C++ standard: " << __cplusplus << std::endl;
|
||||||
|
|
||||||
|
// Parallel environments
|
||||||
|
std::cout << "Parallel: ";
|
||||||
|
#if defined MPI_VERSION
|
||||||
|
#pragma message(" ########## MPI ###############")
|
||||||
|
#ifdef OPEN_MPI
|
||||||
|
std::cout << "OpenMPI ";
|
||||||
|
#else
|
||||||
|
std::cout << "MPI ";
|
||||||
|
#endif
|
||||||
|
std::cout << MPI_VERSION << "." << MPI_SUBVERSION << " ";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
//https://www.openmp.org/specifications/
|
||||||
|
//https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
|
||||||
|
std::unordered_map<unsigned, std::string> const map{
|
||||||
|
{200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0"}, {201811, "5.0"}};
|
||||||
|
#pragma message(" ########## OPENMP ###############")
|
||||||
|
//std::cout << _OPENMP;
|
||||||
|
std::cout << "OpenMP ";
|
||||||
|
try {
|
||||||
|
std::cout << map.at(_OPENMP);
|
||||||
|
}
|
||||||
|
catch (...) {
|
||||||
|
std::cout << _OPENMP;
|
||||||
|
}
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp master
|
||||||
|
{
|
||||||
|
const int nn = omp_get_num_threads(); // OpenMP
|
||||||
|
std::cout << " ---> " << nn << " Threads ";
|
||||||
|
}
|
||||||
|
#pragma omp barrier
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#ifdef _OPENACC
|
||||||
|
#pragma message(" ########## OPENACC ###############")
|
||||||
|
std::cout << "OpenACC ";
|
||||||
|
#endif
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "Date : " << __DATE__ << " " << __TIME__;
|
||||||
|
std::cout << "\n#######################################################################\n";
|
||||||
|
}
|
||||||
|
// HG
|
||||||
|
|
||||||
118
Sheet5/mylib.cpp
Normal file
118
Sheet5/mylib.cpp
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
#include "mylib.h"
|
||||||
|
#include <cassert> // assert()
|
||||||
|
#include <cmath>
|
||||||
|
#include <iostream>
|
||||||
|
#include <functional> // multiplies<>{}
|
||||||
|
#include <list>
|
||||||
|
#include <numeric> // iota()
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
#include <vector>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
double scalar(vector<double> const &x, vector<double> const &y)
|
||||||
|
{
|
||||||
|
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
|
||||||
|
size_t const N = x.size();
|
||||||
|
double sum = 0.0;
|
||||||
|
#pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum)
|
||||||
|
for (size_t i = 0; i < N; ++i)
|
||||||
|
{
|
||||||
|
sum += x[i] * y[i];
|
||||||
|
//sum += exp(x[i])*log(y[i]);
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
double scalar_trans(vector<double> const &x, vector<double> const &y)
|
||||||
|
{
|
||||||
|
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
|
||||||
|
vector<double> z(x.size());
|
||||||
|
transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{});
|
||||||
|
double sum = 0.0;
|
||||||
|
#pragma omp parallel for default(none) shared(z) reduction(+:sum)
|
||||||
|
for (auto pi = cbegin(z); pi!=cend(z); ++pi)
|
||||||
|
{
|
||||||
|
sum += *pi;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
double norm(vector<double> const &x)
|
||||||
|
{
|
||||||
|
size_t const N = x.size();
|
||||||
|
double sum = 0.0;
|
||||||
|
#pragma omp parallel for default(none) shared(x,N) reduction(+:sum)
|
||||||
|
for (size_t i = 0; i < N; ++i)
|
||||||
|
{
|
||||||
|
sum += x[i]*x[i];
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
double scalar_manual(vector<double> const &x, vector<double> const &y)
|
||||||
|
{
|
||||||
|
assert(x.size() == y.size());
|
||||||
|
size_t const N = x.size();
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
#pragma omp parallel default(none) shared(x,y,N) reduction(+:sum)
|
||||||
|
{
|
||||||
|
int tid = omp_get_thread_num();
|
||||||
|
int nth = omp_get_num_threads();
|
||||||
|
// manual cyclic distribution
|
||||||
|
for (size_t i = static_cast<size_t>(tid); i < N; i += static_cast<size_t>(nth)) {
|
||||||
|
sum += x[i] * y[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
vector<int> reduction_vec(int n)
|
||||||
|
{
|
||||||
|
vector<int> vec(n);
|
||||||
|
#pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec)
|
||||||
|
{
|
||||||
|
#pragma omp barrier
|
||||||
|
#pragma omp critical
|
||||||
|
cout << omp_get_thread_num() << " : " << vec.size() << endl;
|
||||||
|
#pragma omp barrier
|
||||||
|
iota( vec.begin(),vec.end(), omp_get_thread_num() );
|
||||||
|
#pragma omp barrier
|
||||||
|
|
||||||
|
}
|
||||||
|
return vec;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
vector<int> reduction_vec_append(int n)
|
||||||
|
{
|
||||||
|
// determine number of threads that will be used
|
||||||
|
int nth = 1;
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp master
|
||||||
|
nth = omp_get_num_threads();
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<int> result;
|
||||||
|
result.resize(static_cast<size_t>(n) * static_cast<size_t>(nth));
|
||||||
|
|
||||||
|
// Each thread will fill its own contiguous block [tid*n, tid*n + n)
|
||||||
|
#pragma omp parallel default(none) shared(result,n)
|
||||||
|
{
|
||||||
|
int tid = omp_get_thread_num();
|
||||||
|
// create local vector and initialize with a pattern (e.g., tid + k)
|
||||||
|
vector<int> local(n);
|
||||||
|
iota(local.begin(), local.end(), tid); // local[k] = tid + k
|
||||||
|
|
||||||
|
size_t offset = static_cast<size_t>(tid) * static_cast<size_t>(n);
|
||||||
|
for (int k = 0; k < n; ++k) {
|
||||||
|
result[offset + static_cast<size_t>(k)] = local[static_cast<size_t>(k)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
68
Sheet5/mylib.h
Normal file
68
Sheet5/mylib.h
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
#pragma once
|
||||||
|
#include <cassert>
|
||||||
|
#include <iomanip> // setw()
|
||||||
|
#include <iostream>
|
||||||
|
#include <omp.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
/** Inner product
|
||||||
|
@param[in] x vector
|
||||||
|
@param[in] y vector
|
||||||
|
@return resulting Euclidian inner product <x,y>
|
||||||
|
*/
|
||||||
|
double scalar(std::vector<double> const &x, std::vector<double> const &y);
|
||||||
|
double scalar_trans(std::vector<double> const &x, std::vector<double> const &y);
|
||||||
|
|
||||||
|
/** Second inner product: use #pragma omp parallel (no "for" in pragma)
|
||||||
|
The work is split manually inside the parallel region (stride or chunk).
|
||||||
|
*/
|
||||||
|
double scalar_manual(std::vector<double> const &x, std::vector<double> const &y);
|
||||||
|
|
||||||
|
/** l2-norm
|
||||||
|
@param[in] x vector
|
||||||
|
@return resulting Euclidian norm
|
||||||
|
*/
|
||||||
|
double norm(std::vector<double> const &x);
|
||||||
|
|
||||||
|
/** Vector @p b adds its elements to vector @p a .
|
||||||
|
@param[in] a vector
|
||||||
|
@param[in] b vector
|
||||||
|
@return a+=b componentwise
|
||||||
|
*/
|
||||||
|
template<class T>
|
||||||
|
std::vector<T> &operator+=(std::vector<T> &a, std::vector<T> const &b)
|
||||||
|
{
|
||||||
|
assert(a.size()==b.size());
|
||||||
|
for (size_t k = 0; k < a.size(); ++k) {
|
||||||
|
a[k] += b[k];
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Declare the reduction operation in OpenMP for an STL-vector (existing)
|
||||||
|
#pragma omp declare reduction(VecAdd : std::vector<int> : omp_out += omp_in) \
|
||||||
|
initializer (omp_priv=omp_orig)
|
||||||
|
|
||||||
|
/** Test for vector reduction.
|
||||||
|
* existing: converts thread-private vectors into a componentwise sum
|
||||||
|
* @param[in] n size of global/private vector
|
||||||
|
* @return resulting global vector.
|
||||||
|
*/
|
||||||
|
std::vector<int> reduction_vec(int n);
|
||||||
|
|
||||||
|
/** New: append per-thread vectors into a single big vector.
|
||||||
|
* The result will have size n * numThreads, where each thread contributes a contiguous block.
|
||||||
|
*/
|
||||||
|
std::vector<int> reduction_vec_append(int n);
|
||||||
|
|
||||||
|
/** Output of a vector.
|
||||||
|
@param[in,out] s output stream
|
||||||
|
@param[in] x vector
|
||||||
|
@return modified output stream
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
std::ostream &operator<<(std::ostream &s, std::vector<T> const &x)
|
||||||
|
{
|
||||||
|
for (auto const &v : x) s << std::setw(4) << v << " ";
|
||||||
|
return s;
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue