ex5 task5
This commit is contained in:
parent
95b3017475
commit
3d054f8ae7
94 changed files with 159884 additions and 6 deletions
|
|
@ -1,215 +0,0 @@
|
|||
// clang-tidy *.cpp -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" -- *.cpp
|
||||
|
||||
#include "task_2.h"
|
||||
#include "task_3.h"
|
||||
#include "task_4.h"
|
||||
#include "timing.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <execution>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
void task_2() {
|
||||
printf("\n\n-------------- Task 2 --------------\n\n");
|
||||
|
||||
int threads = 4;
|
||||
omp_set_num_threads(threads);
|
||||
cout << omp_get_max_threads() << " threads have been started." << endl;
|
||||
|
||||
// Read vector
|
||||
vector<double> a;
|
||||
read_vector_from_file("data_1.txt", a);
|
||||
|
||||
tic();
|
||||
// min and max
|
||||
// auto [min, max] = min_max_par(a);
|
||||
auto min = *min_element(std::execution::par, a.begin(), a.end());
|
||||
auto max = *max_element(std::execution::par, a.begin(), a.end());
|
||||
// means
|
||||
auto [x,y,z] = means_par(a);
|
||||
// deviation
|
||||
double deviation(0.0);
|
||||
#pragma omp parallel for shared(x,a) reduction(+:deviation)
|
||||
for (long unsigned int i=0; i<a.size(); i++){
|
||||
deviation += pow(x - a.at(i),2);
|
||||
}
|
||||
deviation = sqrt(deviation/static_cast<double>(a.size()));
|
||||
double t = toc();
|
||||
|
||||
printf("Minimum: %f\n", min);
|
||||
printf("Maximum: %f\n", max);
|
||||
printf("Arithmetic: %f\n", x);
|
||||
printf("Geometric: %f\n", y);
|
||||
printf("Harmonic: %f\n", z);
|
||||
printf("Deviation: %f\n", deviation);
|
||||
printf("Execution time: %f\n", t);
|
||||
|
||||
// write results to file
|
||||
vector<double> b = {min,max,x,y,z,deviation};
|
||||
write_vector_to_file("out_1.txt", b);
|
||||
}
|
||||
|
||||
void task_3() {
|
||||
printf("\n\n-------------- Task 3 --------------\n\n");
|
||||
|
||||
int threads = 4;
|
||||
omp_set_num_threads(threads);
|
||||
cout << omp_get_max_threads() << " threads have been started." << endl;
|
||||
|
||||
// #####################################
|
||||
// single_goldbach(k)
|
||||
int k = 694;
|
||||
printf("single_goldbach(k = %d) = %d\n", k, single_goldbach_par(k));
|
||||
|
||||
// Prints decompositions
|
||||
print_decomps(k);
|
||||
|
||||
// count_goldbach(n)
|
||||
// printf("\nNOTE: For n=2'000'000 it will take ~30 seconds.\n");
|
||||
for (int n : {10'000, 100'000, 400'000, 1'000'000, 2'000'000/*, 10'000'000*/}) {
|
||||
tic();
|
||||
vector<int> counts = count_goldbach_par(n);
|
||||
double sec = toc();
|
||||
|
||||
auto max = max_element(counts.begin(), counts.end());
|
||||
printf("count_goldbach(n = %d): k = %ld, decompositions = %d, time elapsed: %f milliseconds\n", n, max-counts.begin(), *max, sec*1000);
|
||||
}
|
||||
printf("Should be: k = 9240, 99330, 390390, 990990, 1981980, 9699690\n");
|
||||
printf(" decompositions = 329, 2168, 7094, 15594, 27988, 124180\n\n");
|
||||
}
|
||||
|
||||
void task_4() {
|
||||
printf("\n\n-------------- Task 4 --------------\n\n");
|
||||
|
||||
int threads = 32;
|
||||
omp_set_num_threads(threads);
|
||||
cout << omp_get_max_threads() << " threads have been started." << endl;
|
||||
|
||||
size_t M, N, L, p, NLOOPS;
|
||||
|
||||
{ // Matrix-Vector product
|
||||
printf("----- Benchmark (B) -----\n");
|
||||
// Initialization
|
||||
M = 8'000;
|
||||
N = 12'000;
|
||||
NLOOPS = 30;
|
||||
auto [A,x] = init_B(M,N);
|
||||
// Benchmark
|
||||
tic();
|
||||
benchmark_B(A, x, NLOOPS, false);
|
||||
double sec = toc() / NLOOPS;
|
||||
// Timings and Performance
|
||||
size_t memory = M*N + M + N;
|
||||
size_t flops = 2 * M * N;
|
||||
print_performance(sec, memory, flops, sizeof(A[0]));
|
||||
printf("-------------------------\n");
|
||||
}
|
||||
|
||||
{ // Matrix-Matrix product
|
||||
printf("----- Benchmark (C) -----\n");
|
||||
// Initialization
|
||||
M = 1'000;
|
||||
N = 2'000;
|
||||
L = 500;
|
||||
NLOOPS = 20;
|
||||
auto [A,B] = init_C(M,N,L);
|
||||
// Benchmark
|
||||
tic();
|
||||
benchmark_C(A, B, L, NLOOPS, false);
|
||||
double sec = toc() / NLOOPS;
|
||||
// Timings and Performance
|
||||
size_t memory = M*L + L*N + M*N;
|
||||
size_t flops = M * 2*L * N;
|
||||
print_performance(sec, memory, flops, sizeof(A[0]));
|
||||
printf("-------------------------\n");
|
||||
}
|
||||
|
||||
{ // Polynomial evaluation
|
||||
printf("----- Benchmark (D) -----\n");
|
||||
// Initialization
|
||||
N = 1'000'000;
|
||||
p = 200;
|
||||
NLOOPS = 20;
|
||||
auto [x,a] = init_D(N,p);
|
||||
// Benchmark
|
||||
tic();
|
||||
benchmark_D(x, a, NLOOPS);
|
||||
double sec = toc() / NLOOPS;
|
||||
// Timings and Performance
|
||||
size_t memory = 2.0 * N;
|
||||
size_t flops = 2.0 * N * p;
|
||||
print_performance(sec, memory, flops, sizeof(x[0]));
|
||||
printf("-------------------------\n");
|
||||
}
|
||||
|
||||
|
||||
// Timing
|
||||
NLOOPS = 50;
|
||||
int K=9, T=16;
|
||||
vector<double> speedup_sum((K-3+1)*T), speedup_scalar((K-3+1)*T);
|
||||
for (int k=0; k<(K-3+1); ++k) {
|
||||
N = pow(10,k);
|
||||
auto [x,y] = init_A(N);
|
||||
for (int t=0; t<T; t++) {
|
||||
omp_set_num_threads(t+1);
|
||||
|
||||
tic();
|
||||
benchmark_summation(x, NLOOPS);
|
||||
speedup_sum[k*T+t] = toc() / NLOOPS;
|
||||
|
||||
tic();
|
||||
benchmark_A(x, y, NLOOPS, false);
|
||||
speedup_scalar[k*T+t] = toc() / NLOOPS;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculating speedup
|
||||
for (int k=0; k<(K-3+1); ++k) {
|
||||
double t0 = speedup_sum[k*T];
|
||||
double t00 = speedup_scalar[k*T];
|
||||
for (int t=0; t<T; t++){
|
||||
speedup_sum[k*T+t] = t0/speedup_sum[k*T+t];
|
||||
speedup_scalar[k*T+t] = t00/speedup_scalar[k*T+t];
|
||||
}
|
||||
}
|
||||
|
||||
// Printing tables
|
||||
cout << fixed << setprecision(4);
|
||||
cout << "\n\nSpeedup: summation" << endl;
|
||||
cout << "k \\ threads | ";
|
||||
for (int t=0; t<T; t++) {cout << setw(2) << t+1 << " | ";}
|
||||
cout << endl;
|
||||
for (int k=3; k<K+1; ++k) {
|
||||
cout << " " << k << " |";
|
||||
for (int t=0; t<T; t++) {
|
||||
cout << speedup_sum[(k-3)*T+t] << "|";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
cout << "\n\nSpeedup: scalar" << endl;
|
||||
cout << "k \\ threads | ";
|
||||
for (int t=0; t<T; t++) {cout << setw(2) << t+1 << " | ";}
|
||||
cout << endl;
|
||||
for (int k=3; k<K+1; ++k) {
|
||||
cout << " " << k << " |";
|
||||
for (int t=0; t<T; t++) {
|
||||
cout << speedup_scalar[(k-3)*T+t] << "|";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
task_2();
|
||||
task_3();
|
||||
task_4();
|
||||
|
||||
return 0;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue