133 lines
3.2 KiB
C++
133 lines
3.2 KiB
C++
#include "mylib.h"
|
|
#include <cassert> // assert()
|
|
#include <cmath>
|
|
#include <iostream>
|
|
#include <functional> // multiplies<>{}
|
|
#include <list>
|
|
#include <numeric> // iota()
|
|
#ifdef _OPENMP
|
|
#include <omp.h>
|
|
#endif
|
|
#include <vector>
|
|
using namespace std;
|
|
|
|
double scalar(vector<double> const &x, vector<double> const &y)
|
|
{
|
|
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
|
|
size_t const N = x.size();
|
|
double sum = 0.0;
|
|
if (!omp_in_parallel())
|
|
{
|
|
// Safe to start a parallel region
|
|
#pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum) schedule(runtime)
|
|
for (size_t i = 0; i < N; ++i)
|
|
sum += x[i] * y[i];
|
|
}
|
|
else
|
|
{
|
|
// Already inside parallel region: do it sequentially to avoid nested parallelism
|
|
for (size_t i = 0; i < N; ++i)
|
|
sum += x[i] * y[i];
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
|
|
double scalar_parrallel_env(vector<double> const &x, vector<double> const &y)
|
|
{
|
|
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
|
|
size_t const N = x.size();
|
|
double sum = 0.0;
|
|
|
|
|
|
// Safe to start a parallel region
|
|
#pragma omp parallel default(none) shared(x,y,N,cout) reduction(+:sum)
|
|
{
|
|
int tid = omp_get_thread_num();
|
|
int threadCount = omp_get_num_threads();
|
|
cout << threadCount << endl;
|
|
|
|
|
|
|
|
|
|
for (size_t i = tid*N/threadCount; i < tid*(N + 1)/threadCount; ++i)
|
|
{
|
|
sum += x[i] * y[i];
|
|
}
|
|
}
|
|
|
|
return sum;
|
|
}
|
|
|
|
double norm(vector<double> const &x)
|
|
{
|
|
size_t const N = x.size();
|
|
double sum = 0.0;
|
|
#pragma omp parallel for default(none) shared(x,N) reduction(+:sum)
|
|
for (size_t i = 0; i < N; ++i)
|
|
{
|
|
sum += x[i]*x[i];
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
|
|
|
|
vector<int> reduction_vec(int n)
|
|
{
|
|
vector<int> vec(n);
|
|
#pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec)
|
|
{
|
|
#pragma omp barrier
|
|
#pragma omp critical
|
|
cout << omp_get_thread_num() << " : " << vec.size() << endl;
|
|
#pragma omp barrier
|
|
iota( vec.begin(),vec.end(), omp_get_thread_num() );
|
|
#pragma omp barrier
|
|
|
|
}
|
|
return vec;
|
|
}
|
|
|
|
vector<int> reduction_vec_append(int n)
|
|
{
|
|
vector<int> vec;
|
|
#pragma omp parallel default(none) shared(cout,n) reduction(VecAppend:vec)
|
|
{
|
|
int tid = omp_get_thread_num();
|
|
|
|
vector<int> local(n);
|
|
iota(local.begin(), local.end(), tid);
|
|
|
|
vec = local;
|
|
}
|
|
|
|
return vec;
|
|
}
|
|
|
|
|
|
|
|
double scalar_trans(vector<double> const &x, vector<double> const &y)
|
|
{
|
|
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
|
|
vector<double> z(x.size());
|
|
//list<double> z(x.size()); // parallel for-loop on iterators not possible (missing 'operator-')
|
|
// c++-20 CLANG_, ONEAPI_:condition of OpenMP for loop must be a relational comparison
|
|
|
|
transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{});
|
|
|
|
double sum = 0.0;
|
|
#pragma omp parallel for default(none) shared(z) reduction(+:sum)
|
|
for (auto pi = cbegin(z); pi!=cend(z); ++pi)
|
|
{
|
|
sum += *pi;
|
|
}
|
|
//for (auto val: z)
|
|
//{
|
|
//sum += val;
|
|
//}
|
|
return sum;
|
|
}
|
|
|
|
|
|
|