ex5 task5

This commit is contained in:
dino.celebic 2025-12-09 23:23:51 +01:00
commit 3d054f8ae7
94 changed files with 159884 additions and 6 deletions

View file

@ -1,215 +0,0 @@
// clang-tidy *.cpp -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" -- *.cpp
#include "task_2.h"
#include "task_3.h"
#include "task_4.h"
#include "timing.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <execution>
#include <iomanip>
#include <iostream>
#include <omp.h>
#include <sstream>
#include <vector>
void task_2() {
printf("\n\n-------------- Task 2 --------------\n\n");
int threads = 4;
omp_set_num_threads(threads);
cout << omp_get_max_threads() << " threads have been started." << endl;
// Read vector
vector<double> a;
read_vector_from_file("data_1.txt", a);
tic();
// min and max
// auto [min, max] = min_max_par(a);
auto min = *min_element(std::execution::par, a.begin(), a.end());
auto max = *max_element(std::execution::par, a.begin(), a.end());
// means
auto [x,y,z] = means_par(a);
// deviation
double deviation(0.0);
#pragma omp parallel for shared(x,a) reduction(+:deviation)
for (long unsigned int i=0; i<a.size(); i++){
deviation += pow(x - a.at(i),2);
}
deviation = sqrt(deviation/static_cast<double>(a.size()));
double t = toc();
printf("Minimum: %f\n", min);
printf("Maximum: %f\n", max);
printf("Arithmetic: %f\n", x);
printf("Geometric: %f\n", y);
printf("Harmonic: %f\n", z);
printf("Deviation: %f\n", deviation);
printf("Execution time: %f\n", t);
// write results to file
vector<double> b = {min,max,x,y,z,deviation};
write_vector_to_file("out_1.txt", b);
}
void task_3() {
printf("\n\n-------------- Task 3 --------------\n\n");
int threads = 4;
omp_set_num_threads(threads);
cout << omp_get_max_threads() << " threads have been started." << endl;
// #####################################
// single_goldbach(k)
int k = 694;
printf("single_goldbach(k = %d) = %d\n", k, single_goldbach_par(k));
// Prints decompositions
print_decomps(k);
// count_goldbach(n)
// printf("\nNOTE: For n=2'000'000 it will take ~30 seconds.\n");
for (int n : {10'000, 100'000, 400'000, 1'000'000, 2'000'000/*, 10'000'000*/}) {
tic();
vector<int> counts = count_goldbach_par(n);
double sec = toc();
auto max = max_element(counts.begin(), counts.end());
printf("count_goldbach(n = %d): k = %ld, decompositions = %d, time elapsed: %f milliseconds\n", n, max-counts.begin(), *max, sec*1000);
}
printf("Should be: k = 9240, 99330, 390390, 990990, 1981980, 9699690\n");
printf(" decompositions = 329, 2168, 7094, 15594, 27988, 124180\n\n");
}
void task_4() {
printf("\n\n-------------- Task 4 --------------\n\n");
int threads = 32;
omp_set_num_threads(threads);
cout << omp_get_max_threads() << " threads have been started." << endl;
size_t M, N, L, p, NLOOPS;
{ // Matrix-Vector product
printf("----- Benchmark (B) -----\n");
// Initialization
M = 8'000;
N = 12'000;
NLOOPS = 30;
auto [A,x] = init_B(M,N);
// Benchmark
tic();
benchmark_B(A, x, NLOOPS, false);
double sec = toc() / NLOOPS;
// Timings and Performance
size_t memory = M*N + M + N;
size_t flops = 2 * M * N;
print_performance(sec, memory, flops, sizeof(A[0]));
printf("-------------------------\n");
}
{ // Matrix-Matrix product
printf("----- Benchmark (C) -----\n");
// Initialization
M = 1'000;
N = 2'000;
L = 500;
NLOOPS = 20;
auto [A,B] = init_C(M,N,L);
// Benchmark
tic();
benchmark_C(A, B, L, NLOOPS, false);
double sec = toc() / NLOOPS;
// Timings and Performance
size_t memory = M*L + L*N + M*N;
size_t flops = M * 2*L * N;
print_performance(sec, memory, flops, sizeof(A[0]));
printf("-------------------------\n");
}
{ // Polynomial evaluation
printf("----- Benchmark (D) -----\n");
// Initialization
N = 1'000'000;
p = 200;
NLOOPS = 20;
auto [x,a] = init_D(N,p);
// Benchmark
tic();
benchmark_D(x, a, NLOOPS);
double sec = toc() / NLOOPS;
// Timings and Performance
size_t memory = 2.0 * N;
size_t flops = 2.0 * N * p;
print_performance(sec, memory, flops, sizeof(x[0]));
printf("-------------------------\n");
}
// Timing
NLOOPS = 50;
int K=9, T=16;
vector<double> speedup_sum((K-3+1)*T), speedup_scalar((K-3+1)*T);
for (int k=0; k<(K-3+1); ++k) {
N = pow(10,k);
auto [x,y] = init_A(N);
for (int t=0; t<T; t++) {
omp_set_num_threads(t+1);
tic();
benchmark_summation(x, NLOOPS);
speedup_sum[k*T+t] = toc() / NLOOPS;
tic();
benchmark_A(x, y, NLOOPS, false);
speedup_scalar[k*T+t] = toc() / NLOOPS;
}
}
// Calculating speedup
for (int k=0; k<(K-3+1); ++k) {
double t0 = speedup_sum[k*T];
double t00 = speedup_scalar[k*T];
for (int t=0; t<T; t++){
speedup_sum[k*T+t] = t0/speedup_sum[k*T+t];
speedup_scalar[k*T+t] = t00/speedup_scalar[k*T+t];
}
}
// Printing tables
cout << fixed << setprecision(4);
cout << "\n\nSpeedup: summation" << endl;
cout << "k \\ threads | ";
for (int t=0; t<T; t++) {cout << setw(2) << t+1 << " | ";}
cout << endl;
for (int k=3; k<K+1; ++k) {
cout << " " << k << " |";
for (int t=0; t<T; t++) {
cout << speedup_sum[(k-3)*T+t] << "|";
}
cout << endl;
}
cout << "\n\nSpeedup: scalar" << endl;
cout << "k \\ threads | ";
for (int t=0; t<T; t++) {cout << setw(2) << t+1 << " | ";}
cout << endl;
for (int k=3; k<K+1; ++k) {
cout << " " << k << " |";
for (int t=0; t<T; t++) {
cout << speedup_scalar[(k-3)*T+t] << "|";
}
cout << endl;
}
}
int main() {
task_2();
task_3();
task_4();
return 0;
}