#include "mylib.h" #include // assert() #include #include #include // multiplies<>{} #include #include // iota() #ifdef _OPENMP #include #endif #include using namespace std; double scalar(vector const &x, vector const &y) { assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG size_t const N = x.size(); double sum = 0.0; if (!omp_in_parallel()) { // Safe to start a parallel region #pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum) schedule(runtime) for (size_t i = 0; i < N; ++i) sum += x[i] * y[i]; } else { // Already inside parallel region: do it sequentially to avoid nested parallelism for (size_t i = 0; i < N; ++i) sum += x[i] * y[i]; } return sum; } double scalar_parrallel_env(vector const &x, vector const &y) { assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG size_t const N = x.size(); double sum = 0.0; // Safe to start a parallel region #pragma omp parallel default(none) shared(x,y,N,cout) reduction(+:sum) { int tid = omp_get_thread_num(); int threadCount = omp_get_num_threads(); cout << threadCount << endl; for (size_t i = tid*N/threadCount; i < tid*(N + 1)/threadCount; ++i) { sum += x[i] * y[i]; } } return sum; } double norm(vector const &x) { size_t const N = x.size(); double sum = 0.0; #pragma omp parallel for default(none) shared(x,N) reduction(+:sum) for (size_t i = 0; i < N; ++i) { sum += x[i]*x[i]; } return sum; } vector reduction_vec(int n) { vector vec(n); #pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec) { #pragma omp barrier #pragma omp critical cout << omp_get_thread_num() << " : " << vec.size() << endl; #pragma omp barrier iota( vec.begin(),vec.end(), omp_get_thread_num() ); #pragma omp barrier } return vec; } vector reduction_vec_append(int n) { vector vec(n); #pragma omp parallel default(none) shared(cout,n) reduction(VecAppend:vec) { int tid = omp_get_thread_num(); vector local(n); iota(local.begin(), local.end(), tid); #pragma omp critical cout << tid << " : " << local.size() << endl; vec = local; } return vec; } double scalar_trans(vector const &x, vector const &y) { assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG vector z(x.size()); //list z(x.size()); // parallel for-loop on iterators not possible (missing 'operator-') // c++-20 CLANG_, ONEAPI_:condition of OpenMP for loop must be a relational comparison transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{}); double sum = 0.0; #pragma omp parallel for default(none) shared(z) reduction(+:sum) for (auto pi = cbegin(z); pi!=cend(z); ++pi) { sum += *pi; } //for (auto val: z) //{ //sum += val; //} return sum; }