LisaPizzoExercises/Sheet5/mylib.cpp
2025-12-02 09:36:03 +01:00

118 lines
3.3 KiB
C++

#include "mylib.h"
#include <cassert> // assert()
#include <cmath>
#include <iostream>
#include <functional> // multiplies<>{}
#include <list>
#include <numeric> // iota()
#ifdef _OPENMP
#include <omp.h>
#endif
#include <vector>
using namespace std;
double scalar(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
size_t const N = x.size();
double sum = 0.0;
#pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum)
for (size_t i = 0; i < N; ++i)
{
sum += x[i] * y[i];
//sum += exp(x[i])*log(y[i]);
}
return sum;
}
double scalar_trans(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
vector<double> z(x.size());
transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{});
double sum = 0.0;
#pragma omp parallel for default(none) shared(z) reduction(+:sum)
for (auto pi = cbegin(z); pi!=cend(z); ++pi)
{
sum += *pi;
}
return sum;
}
double norm(vector<double> const &x)
{
size_t const N = x.size();
double sum = 0.0;
#pragma omp parallel for default(none) shared(x,N) reduction(+:sum)
for (size_t i = 0; i < N; ++i)
{
sum += x[i]*x[i];
}
return sum;
}
// ------------------------------------------------------------------
double scalar_manual(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size());
size_t const N = x.size();
double sum = 0.0;
#pragma omp parallel default(none) shared(x,y,N) reduction(+:sum)
{
int tid = omp_get_thread_num();
int nth = omp_get_num_threads();
// manual cyclic distribution
for (size_t i = static_cast<size_t>(tid); i < N; i += static_cast<size_t>(nth)) {
sum += x[i] * y[i];
}
}
return sum;
}
// ------------------------------------------------------------------
vector<int> reduction_vec(int n)
{
vector<int> vec(n);
#pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec)
{
#pragma omp barrier
#pragma omp critical
cout << omp_get_thread_num() << " : " << vec.size() << endl;
#pragma omp barrier
iota( vec.begin(),vec.end(), omp_get_thread_num() );
#pragma omp barrier
}
return vec;
}
// ------------------------------------------------------------------
vector<int> reduction_vec_append(int n)
{
// determine number of threads that will be used
int nth = 1;
#pragma omp parallel
{
#pragma omp master
nth = omp_get_num_threads();
}
vector<int> result;
result.resize(static_cast<size_t>(n) * static_cast<size_t>(nth));
// Each thread will fill its own contiguous block [tid*n, tid*n + n)
#pragma omp parallel default(none) shared(result,n)
{
int tid = omp_get_thread_num();
// create local vector and initialize with a pattern (e.g., tid + k)
vector<int> local(n);
iota(local.begin(), local.end(), tid); // local[k] = tid + k
size_t offset = static_cast<size_t>(tid) * static_cast<size_t>(n);
for (int k = 0; k < n; ++k) {
result[offset + static_cast<size_t>(k)] = local[static_cast<size_t>(k)];
}
}
return result;
}