141 lines
2.6 KiB
C++
141 lines
2.6 KiB
C++
#include "benchmarks.h"
|
|
#include <cassert> // assert()
|
|
#include <cmath>
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <omp.h>
|
|
|
|
// (A) Inner product of two vectors (from skalar_stl)
|
|
double scalar_parallel(vector<double> const &x, vector<double> const &y)
|
|
{
|
|
assert(x.size() == y.size());
|
|
size_t const N = x.size();
|
|
double sum = 0.0;
|
|
//#pragma omp parallel for default(none) shared(x, y, N) reduction(+:sum) schedule(runtime)
|
|
#pragma omp parallel for shared(x, y, N) reduction(+:sum)
|
|
for (size_t i = 0; i < N; ++i)
|
|
{
|
|
sum += x[i] * y[i];
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
// (A) Vector entry sum
|
|
double sum(vector<double> const &x)
|
|
{
|
|
double sum = 0.0;
|
|
#pragma omp parallel for shared(x) reduction(+:sum)
|
|
for (size_t i = 0; i < x.size(); ++i)
|
|
{
|
|
sum += x[i];
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
|
|
// (B) Matrix-vector product (from intro_vector_densematrix)
|
|
vector<double> MatVec_parallel(vector<double> const &A, vector<double> const &x)
|
|
{
|
|
size_t const nelem = A.size();
|
|
size_t const N = x.size();
|
|
assert(nelem % N == 0); // make sure multiplication is possible
|
|
size_t const M = nelem/N;
|
|
|
|
vector<double> b(M);
|
|
|
|
#pragma omp parallel for shared(A, x, N, M, b)
|
|
for (size_t i = 0; i < M; ++i)
|
|
{
|
|
double tmp = 0.0;
|
|
for (size_t j = 0; j < N; ++j)
|
|
tmp += A[N*i + j] * x[j];
|
|
b[i] = tmp;
|
|
}
|
|
|
|
return b;
|
|
}
|
|
|
|
|
|
// (C) Matrix-matrix product
|
|
vector<double> MatMat_parallel(vector<double> const &A, vector<double> const &B, size_t const &L)
|
|
{
|
|
size_t const nelem_A = A.size();
|
|
size_t const nelem_B = B.size();
|
|
|
|
assert(nelem_A % L == 0 && nelem_B % L == 0);
|
|
|
|
size_t const M = nelem_A/L;
|
|
size_t const N = nelem_B/L;
|
|
|
|
|
|
vector<double> C(M*N);
|
|
|
|
|
|
#pragma omp parallel for shared(A, B, M, N, L, C)
|
|
for (size_t i = 0; i < M; ++i)
|
|
{
|
|
for (size_t k = 0; k < L; ++k)
|
|
{
|
|
for (size_t j = 0; j < N; ++j)
|
|
{
|
|
C[N*i + j] += A[L*i + k]*B[N*k + j];
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
return C;
|
|
}
|
|
|
|
|
|
// (D) Evaluation of a polynomial function
|
|
vector<double> poly_parallel(vector<double> const &a, vector<double> const &x)
|
|
{
|
|
size_t const N = x.size();
|
|
size_t const p = a.size() - 1;
|
|
vector<double> y(N, 0);
|
|
|
|
#pragma omp parallel for shared(a, x, N, p, y)
|
|
for (size_t i = 0; i < N; ++i)
|
|
{
|
|
double x_temp = x[i];
|
|
double y_temp = 0;
|
|
for (size_t k = 0; k < p + 1; ++k)
|
|
{
|
|
y_temp += x_temp*y_temp + a[p - k];
|
|
}
|
|
y[i] = y_temp;
|
|
}
|
|
|
|
return y;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|