// clang-tidy *.cpp -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" -- *.cpp #include "task_2.h" #include "task_3.h" #include "task_4.h" #include "timing.h" #include #include #include #include #include #include #include #include #include void task_2() { printf("\n\n-------------- Task 2 --------------\n\n"); int threads = 4; omp_set_num_threads(threads); cout << omp_get_max_threads() << " threads have been started." << endl; // Read vector vector a; read_vector_from_file("data_1.txt", a); tic(); // min and max // auto [min, max] = min_max_par(a); auto min = *min_element(std::execution::par, a.begin(), a.end()); auto max = *max_element(std::execution::par, a.begin(), a.end()); // means auto [x,y,z] = means_par(a); // deviation double deviation(0.0); #pragma omp parallel for shared(x,a) reduction(+:deviation) for (long unsigned int i=0; i(a.size())); double t = toc(); printf("Minimum: %f\n", min); printf("Maximum: %f\n", max); printf("Arithmetic: %f\n", x); printf("Geometric: %f\n", y); printf("Harmonic: %f\n", z); printf("Deviation: %f\n", deviation); printf("Execution time: %f\n", t); // write results to file vector b = {min,max,x,y,z,deviation}; write_vector_to_file("out_1.txt", b); } void task_3() { printf("\n\n-------------- Task 3 --------------\n\n"); //int threads = 4; //omp_set_num_threads(threads); cout << omp_get_max_threads() << " threads have been started." << endl; // ##################################### // single_goldbach(k) int k = 694; printf("single_goldbach(k = %d) = %d\n", k, single_goldbach_par(k)); // Prints decompositions print_decomps(k); // count_goldbach(n) // printf("\nNOTE: For n=2'000'000 it will take ~30 seconds.\n"); for (int n : {10'000, 100'000, 400'000, 1'000'000, 2'000'000/*, 10'000'000*/}) { tic(); vector counts = count_goldbach_par(n); double sec = toc(); auto max = max_element(counts.begin(), counts.end()); printf("count_goldbach(n = %d): k = %ld, decompositions = %d, time elapsed: %f milliseconds\n", n, max-counts.begin(), *max, sec*1000); } printf("Should be: k = 9240, 99330, 390390, 990990, 1981980, 9699690\n"); printf(" decompositions = 329, 2168, 7094, 15594, 27988, 124180\n\n"); } void task_4() { printf("\n\n-------------- Task 4 --------------\n\n"); int threads = 32; omp_set_num_threads(threads); cout << omp_get_max_threads() << " threads have been started." << endl; size_t M, N, L, p, NLOOPS; { // Matrix-Vector product printf("----- Benchmark (B) -----\n"); // Initialization M = 8'000; N = 12'000; NLOOPS = 30; auto [A,x] = init_B(M,N); // Benchmark tic(); benchmark_B(A, x, NLOOPS, false); double sec = toc() / NLOOPS; // Timings and Performance size_t memory = M*N + M + N; size_t flops = 2 * M * N; print_performance(sec, memory, flops, sizeof(A[0])); printf("-------------------------\n"); } { // Matrix-Matrix product printf("----- Benchmark (C) -----\n"); // Initialization M = 1'000; N = 2'000; L = 500; NLOOPS = 20; auto [A,B] = init_C(M,N,L); // Benchmark tic(); benchmark_C(A, B, L, NLOOPS, false); double sec = toc() / NLOOPS; // Timings and Performance size_t memory = M*L + L*N + M*N; size_t flops = M * 2*L * N; print_performance(sec, memory, flops, sizeof(A[0])); printf("-------------------------\n"); } { // Polynomial evaluation printf("----- Benchmark (D) -----\n"); // Initialization N = 1'000'000; p = 200; NLOOPS = 20; auto [x,a] = init_D(N,p); // Benchmark tic(); benchmark_D(x, a, NLOOPS); double sec = toc() / NLOOPS; // Timings and Performance size_t memory = 2.0 * N; size_t flops = 2.0 * N * p; print_performance(sec, memory, flops, sizeof(x[0])); printf("-------------------------\n"); } // Timing NLOOPS = 50; int K=9, T=16; vector speedup_sum((K-3+1)*T), speedup_scalar((K-3+1)*T); for (int k=0; k<(K-3+1); ++k) { N = pow(10,k); auto [x,y] = init_A(N); for (int t=0; t