ex5
This commit is contained in:
parent
0386d99307
commit
95b3017475
28 changed files with 5800 additions and 0 deletions
31
ex5/code/Makefile
Normal file
31
ex5/code/Makefile
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
PROGRAM = main
|
||||
|
||||
SOURCES = $(wildcard *.cpp)
|
||||
OBJECTS = ${SOURCES:.cpp=.o}
|
||||
|
||||
CXX = g++
|
||||
LINKER = g++
|
||||
|
||||
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -fmax-errors=1
|
||||
|
||||
CXXFLAGS = -g -flto -O3 -ffast-math -march=native -fopenmp ${WARNINGS}
|
||||
LINKFLAGS = -g -flto -O3 -lopenblas -llapacke -fopenmp
|
||||
|
||||
|
||||
all: ${PROGRAM}
|
||||
|
||||
# %.o: %.cpp
|
||||
# ${CXX} ${CXXFLAGS} -c $< -o $@
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) ${OBJECTS} ${LINKFLAGS} -o ${PROGRAM}
|
||||
|
||||
clean:
|
||||
rm -f ${OBJECTS} ${PROGRAM}
|
||||
rm out_1.txt
|
||||
|
||||
|
||||
run: ${PROGRAM}
|
||||
# run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
500
ex5/code/data_1.txt
Normal file
500
ex5/code/data_1.txt
Normal file
|
|
@ -0,0 +1,500 @@
|
|||
141
|
||||
261
|
||||
87
|
||||
430
|
||||
258
|
||||
298
|
||||
425
|
||||
120
|
||||
496
|
||||
707
|
||||
244
|
||||
786
|
||||
75
|
||||
394
|
||||
4
|
||||
221
|
||||
2
|
||||
190
|
||||
143
|
||||
269
|
||||
175
|
||||
139
|
||||
599
|
||||
902
|
||||
940
|
||||
222
|
||||
483
|
||||
377
|
||||
524
|
||||
265
|
||||
69
|
||||
437
|
||||
174
|
||||
27
|
||||
955
|
||||
431
|
||||
962
|
||||
763
|
||||
8
|
||||
681
|
||||
706
|
||||
646
|
||||
553
|
||||
219
|
||||
773
|
||||
229
|
||||
371
|
||||
891
|
||||
857
|
||||
403
|
||||
319
|
||||
609
|
||||
911
|
||||
910
|
||||
592
|
||||
333
|
||||
854
|
||||
443
|
||||
905
|
||||
34
|
||||
533
|
||||
717
|
||||
180
|
||||
337
|
||||
188
|
||||
322
|
||||
404
|
||||
549
|
||||
49
|
||||
553
|
||||
275
|
||||
242
|
||||
244
|
||||
155
|
||||
957
|
||||
936
|
||||
819
|
||||
729
|
||||
176
|
||||
361
|
||||
189
|
||||
2
|
||||
317
|
||||
700
|
||||
626
|
||||
544
|
||||
440
|
||||
288
|
||||
502
|
||||
762
|
||||
763
|
||||
577
|
||||
748
|
||||
646
|
||||
124
|
||||
505
|
||||
348
|
||||
93
|
||||
148
|
||||
199
|
||||
673
|
||||
432
|
||||
695
|
||||
257
|
||||
10
|
||||
533
|
||||
280
|
||||
947
|
||||
907
|
||||
393
|
||||
25
|
||||
672
|
||||
838
|
||||
972
|
||||
57
|
||||
451
|
||||
583
|
||||
687
|
||||
720
|
||||
651
|
||||
727
|
||||
374
|
||||
582
|
||||
117
|
||||
58
|
||||
980
|
||||
285
|
||||
595
|
||||
963
|
||||
186
|
||||
194
|
||||
342
|
||||
933
|
||||
391
|
||||
274
|
||||
152
|
||||
398
|
||||
375
|
||||
132
|
||||
436
|
||||
92
|
||||
615
|
||||
11
|
||||
574
|
||||
790
|
||||
236
|
||||
449
|
||||
570
|
||||
62
|
||||
497
|
||||
643
|
||||
222
|
||||
838
|
||||
972
|
||||
847
|
||||
506
|
||||
279
|
||||
747
|
||||
237
|
||||
958
|
||||
621
|
||||
601
|
||||
173
|
||||
91
|
||||
256
|
||||
859
|
||||
912
|
||||
700
|
||||
726
|
||||
230
|
||||
577
|
||||
811
|
||||
404
|
||||
989
|
||||
90
|
||||
321
|
||||
512
|
||||
61
|
||||
726
|
||||
557
|
||||
530
|
||||
830
|
||||
859
|
||||
790
|
||||
318
|
||||
453
|
||||
753
|
||||
110
|
||||
110
|
||||
270
|
||||
525
|
||||
973
|
||||
711
|
||||
312
|
||||
292
|
||||
851
|
||||
912
|
||||
640
|
||||
256
|
||||
89
|
||||
839
|
||||
585
|
||||
949
|
||||
62
|
||||
585
|
||||
286
|
||||
828
|
||||
191
|
||||
443
|
||||
394
|
||||
827
|
||||
677
|
||||
208
|
||||
319
|
||||
134
|
||||
672
|
||||
571
|
||||
170
|
||||
148
|
||||
477
|
||||
909
|
||||
553
|
||||
33
|
||||
54
|
||||
806
|
||||
452
|
||||
383
|
||||
790
|
||||
365
|
||||
533
|
||||
712
|
||||
872
|
||||
329
|
||||
651
|
||||
975
|
||||
76
|
||||
588
|
||||
414
|
||||
310
|
||||
264
|
||||
759
|
||||
996
|
||||
187
|
||||
782
|
||||
196
|
||||
993
|
||||
803
|
||||
425
|
||||
729
|
||||
499
|
||||
809
|
||||
357
|
||||
74
|
||||
591
|
||||
911
|
||||
194
|
||||
433
|
||||
750
|
||||
40
|
||||
947
|
||||
764
|
||||
559
|
||||
184
|
||||
498
|
||||
518
|
||||
995
|
||||
855
|
||||
963
|
||||
679
|
||||
404
|
||||
935
|
||||
480
|
||||
232
|
||||
397
|
||||
706
|
||||
559
|
||||
757
|
||||
996
|
||||
963
|
||||
536
|
||||
964
|
||||
116
|
||||
52
|
||||
305
|
||||
581
|
||||
531
|
||||
902
|
||||
541
|
||||
432
|
||||
543
|
||||
713
|
||||
17
|
||||
801
|
||||
143
|
||||
479
|
||||
257
|
||||
370
|
||||
662
|
||||
170
|
||||
279
|
||||
199
|
||||
196
|
||||
327
|
||||
881
|
||||
472
|
||||
404
|
||||
180
|
||||
969
|
||||
408
|
||||
845
|
||||
616
|
||||
377
|
||||
878
|
||||
785
|
||||
465
|
||||
814
|
||||
899
|
||||
430
|
||||
335
|
||||
597
|
||||
902
|
||||
703
|
||||
378
|
||||
735
|
||||
955
|
||||
543
|
||||
541
|
||||
312
|
||||
72
|
||||
182
|
||||
93
|
||||
464
|
||||
10
|
||||
916
|
||||
643
|
||||
2
|
||||
31
|
||||
209
|
||||
455
|
||||
128
|
||||
9
|
||||
728
|
||||
355
|
||||
781
|
||||
437
|
||||
437
|
||||
50
|
||||
50
|
||||
92
|
||||
595
|
||||
242
|
||||
842
|
||||
858
|
||||
964
|
||||
489
|
||||
221
|
||||
227
|
||||
537
|
||||
763
|
||||
348
|
||||
462
|
||||
640
|
||||
918
|
||||
162
|
||||
716
|
||||
578
|
||||
434
|
||||
885
|
||||
394
|
||||
179
|
||||
634
|
||||
625
|
||||
328
|
||||
803
|
||||
1000
|
||||
981
|
||||
128
|
||||
233
|
||||
24
|
||||
608
|
||||
111
|
||||
408
|
||||
885
|
||||
549
|
||||
370
|
||||
209
|
||||
441
|
||||
957
|
||||
125
|
||||
471
|
||||
857
|
||||
44
|
||||
692
|
||||
979
|
||||
284
|
||||
134
|
||||
686
|
||||
910
|
||||
611
|
||||
900
|
||||
194
|
||||
755
|
||||
347
|
||||
419
|
||||
156
|
||||
820
|
||||
625
|
||||
739
|
||||
806
|
||||
68
|
||||
951
|
||||
498
|
||||
756
|
||||
743
|
||||
832
|
||||
157
|
||||
458
|
||||
619
|
||||
933
|
||||
836
|
||||
896
|
||||
583
|
||||
583
|
||||
855
|
||||
35
|
||||
886
|
||||
408
|
||||
37
|
||||
747
|
||||
155
|
||||
144
|
||||
606
|
||||
255
|
||||
325
|
||||
402
|
||||
407
|
||||
387
|
||||
610
|
||||
167
|
||||
189
|
||||
95
|
||||
324
|
||||
770
|
||||
235
|
||||
741
|
||||
693
|
||||
825
|
||||
828
|
||||
294
|
||||
310
|
||||
524
|
||||
326
|
||||
832
|
||||
811
|
||||
557
|
||||
263
|
||||
681
|
||||
234
|
||||
457
|
||||
385
|
||||
539
|
||||
992
|
||||
756
|
||||
981
|
||||
235
|
||||
529
|
||||
52
|
||||
757
|
||||
602
|
||||
858
|
||||
989
|
||||
930
|
||||
410
|
||||
1
|
||||
541
|
||||
208
|
||||
220
|
||||
326
|
||||
96
|
||||
748
|
||||
749
|
||||
544
|
||||
339
|
||||
833
|
||||
553
|
||||
958
|
||||
893
|
||||
357
|
||||
547
|
||||
347
|
||||
623
|
||||
797
|
||||
746
|
||||
126
|
||||
823
|
||||
26
|
||||
415
|
||||
732
|
||||
782
|
||||
368
|
||||
215
ex5/code/main.cpp
Normal file
215
ex5/code/main.cpp
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
// clang-tidy *.cpp -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" -- *.cpp
|
||||
|
||||
#include "task_2.h"
|
||||
#include "task_3.h"
|
||||
#include "task_4.h"
|
||||
#include "timing.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <execution>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
void task_2() {
|
||||
printf("\n\n-------------- Task 2 --------------\n\n");
|
||||
|
||||
int threads = 4;
|
||||
omp_set_num_threads(threads);
|
||||
cout << omp_get_max_threads() << " threads have been started." << endl;
|
||||
|
||||
// Read vector
|
||||
vector<double> a;
|
||||
read_vector_from_file("data_1.txt", a);
|
||||
|
||||
tic();
|
||||
// min and max
|
||||
// auto [min, max] = min_max_par(a);
|
||||
auto min = *min_element(std::execution::par, a.begin(), a.end());
|
||||
auto max = *max_element(std::execution::par, a.begin(), a.end());
|
||||
// means
|
||||
auto [x,y,z] = means_par(a);
|
||||
// deviation
|
||||
double deviation(0.0);
|
||||
#pragma omp parallel for shared(x,a) reduction(+:deviation)
|
||||
for (long unsigned int i=0; i<a.size(); i++){
|
||||
deviation += pow(x - a.at(i),2);
|
||||
}
|
||||
deviation = sqrt(deviation/static_cast<double>(a.size()));
|
||||
double t = toc();
|
||||
|
||||
printf("Minimum: %f\n", min);
|
||||
printf("Maximum: %f\n", max);
|
||||
printf("Arithmetic: %f\n", x);
|
||||
printf("Geometric: %f\n", y);
|
||||
printf("Harmonic: %f\n", z);
|
||||
printf("Deviation: %f\n", deviation);
|
||||
printf("Execution time: %f\n", t);
|
||||
|
||||
// write results to file
|
||||
vector<double> b = {min,max,x,y,z,deviation};
|
||||
write_vector_to_file("out_1.txt", b);
|
||||
}
|
||||
|
||||
void task_3() {
|
||||
printf("\n\n-------------- Task 3 --------------\n\n");
|
||||
|
||||
int threads = 4;
|
||||
omp_set_num_threads(threads);
|
||||
cout << omp_get_max_threads() << " threads have been started." << endl;
|
||||
|
||||
// #####################################
|
||||
// single_goldbach(k)
|
||||
int k = 694;
|
||||
printf("single_goldbach(k = %d) = %d\n", k, single_goldbach_par(k));
|
||||
|
||||
// Prints decompositions
|
||||
print_decomps(k);
|
||||
|
||||
// count_goldbach(n)
|
||||
// printf("\nNOTE: For n=2'000'000 it will take ~30 seconds.\n");
|
||||
for (int n : {10'000, 100'000, 400'000, 1'000'000, 2'000'000/*, 10'000'000*/}) {
|
||||
tic();
|
||||
vector<int> counts = count_goldbach_par(n);
|
||||
double sec = toc();
|
||||
|
||||
auto max = max_element(counts.begin(), counts.end());
|
||||
printf("count_goldbach(n = %d): k = %ld, decompositions = %d, time elapsed: %f milliseconds\n", n, max-counts.begin(), *max, sec*1000);
|
||||
}
|
||||
printf("Should be: k = 9240, 99330, 390390, 990990, 1981980, 9699690\n");
|
||||
printf(" decompositions = 329, 2168, 7094, 15594, 27988, 124180\n\n");
|
||||
}
|
||||
|
||||
void task_4() {
|
||||
printf("\n\n-------------- Task 4 --------------\n\n");
|
||||
|
||||
int threads = 32;
|
||||
omp_set_num_threads(threads);
|
||||
cout << omp_get_max_threads() << " threads have been started." << endl;
|
||||
|
||||
size_t M, N, L, p, NLOOPS;
|
||||
|
||||
{ // Matrix-Vector product
|
||||
printf("----- Benchmark (B) -----\n");
|
||||
// Initialization
|
||||
M = 8'000;
|
||||
N = 12'000;
|
||||
NLOOPS = 30;
|
||||
auto [A,x] = init_B(M,N);
|
||||
// Benchmark
|
||||
tic();
|
||||
benchmark_B(A, x, NLOOPS, false);
|
||||
double sec = toc() / NLOOPS;
|
||||
// Timings and Performance
|
||||
size_t memory = M*N + M + N;
|
||||
size_t flops = 2 * M * N;
|
||||
print_performance(sec, memory, flops, sizeof(A[0]));
|
||||
printf("-------------------------\n");
|
||||
}
|
||||
|
||||
{ // Matrix-Matrix product
|
||||
printf("----- Benchmark (C) -----\n");
|
||||
// Initialization
|
||||
M = 1'000;
|
||||
N = 2'000;
|
||||
L = 500;
|
||||
NLOOPS = 20;
|
||||
auto [A,B] = init_C(M,N,L);
|
||||
// Benchmark
|
||||
tic();
|
||||
benchmark_C(A, B, L, NLOOPS, false);
|
||||
double sec = toc() / NLOOPS;
|
||||
// Timings and Performance
|
||||
size_t memory = M*L + L*N + M*N;
|
||||
size_t flops = M * 2*L * N;
|
||||
print_performance(sec, memory, flops, sizeof(A[0]));
|
||||
printf("-------------------------\n");
|
||||
}
|
||||
|
||||
{ // Polynomial evaluation
|
||||
printf("----- Benchmark (D) -----\n");
|
||||
// Initialization
|
||||
N = 1'000'000;
|
||||
p = 200;
|
||||
NLOOPS = 20;
|
||||
auto [x,a] = init_D(N,p);
|
||||
// Benchmark
|
||||
tic();
|
||||
benchmark_D(x, a, NLOOPS);
|
||||
double sec = toc() / NLOOPS;
|
||||
// Timings and Performance
|
||||
size_t memory = 2.0 * N;
|
||||
size_t flops = 2.0 * N * p;
|
||||
print_performance(sec, memory, flops, sizeof(x[0]));
|
||||
printf("-------------------------\n");
|
||||
}
|
||||
|
||||
|
||||
// Timing
|
||||
NLOOPS = 50;
|
||||
int K=9, T=16;
|
||||
vector<double> speedup_sum((K-3+1)*T), speedup_scalar((K-3+1)*T);
|
||||
for (int k=0; k<(K-3+1); ++k) {
|
||||
N = pow(10,k);
|
||||
auto [x,y] = init_A(N);
|
||||
for (int t=0; t<T; t++) {
|
||||
omp_set_num_threads(t+1);
|
||||
|
||||
tic();
|
||||
benchmark_summation(x, NLOOPS);
|
||||
speedup_sum[k*T+t] = toc() / NLOOPS;
|
||||
|
||||
tic();
|
||||
benchmark_A(x, y, NLOOPS, false);
|
||||
speedup_scalar[k*T+t] = toc() / NLOOPS;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculating speedup
|
||||
for (int k=0; k<(K-3+1); ++k) {
|
||||
double t0 = speedup_sum[k*T];
|
||||
double t00 = speedup_scalar[k*T];
|
||||
for (int t=0; t<T; t++){
|
||||
speedup_sum[k*T+t] = t0/speedup_sum[k*T+t];
|
||||
speedup_scalar[k*T+t] = t00/speedup_scalar[k*T+t];
|
||||
}
|
||||
}
|
||||
|
||||
// Printing tables
|
||||
cout << fixed << setprecision(4);
|
||||
cout << "\n\nSpeedup: summation" << endl;
|
||||
cout << "k \\ threads | ";
|
||||
for (int t=0; t<T; t++) {cout << setw(2) << t+1 << " | ";}
|
||||
cout << endl;
|
||||
for (int k=3; k<K+1; ++k) {
|
||||
cout << " " << k << " |";
|
||||
for (int t=0; t<T; t++) {
|
||||
cout << speedup_sum[(k-3)*T+t] << "|";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
cout << "\n\nSpeedup: scalar" << endl;
|
||||
cout << "k \\ threads | ";
|
||||
for (int t=0; t<T; t++) {cout << setw(2) << t+1 << " | ";}
|
||||
cout << endl;
|
||||
for (int k=3; k<K+1; ++k) {
|
||||
cout << " " << k << " |";
|
||||
for (int t=0; t<T; t++) {
|
||||
cout << speedup_scalar[(k-3)*T+t] << "|";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
task_2();
|
||||
task_3();
|
||||
task_4();
|
||||
|
||||
return 0;
|
||||
}
|
||||
73
ex5/code/mayer_primes.h
Normal file
73
ex5/code/mayer_primes.h
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstring> //memset
|
||||
#include <vector>
|
||||
//using namespace std;
|
||||
|
||||
/** \brief Determines all prime numbers in interval [2, @p max].
|
||||
*
|
||||
* The sieve of Eratosthenes is used.
|
||||
*
|
||||
* The implementation originates from <a href="http://code.activestate.com/recipes/576559-fast-prime-generator/">Florian Mayer</a>.
|
||||
*
|
||||
* \param[in] max end of interval for the prime number search.
|
||||
* \return vector of prime numbers @f$2,3,5, ..., p<=max @f$.
|
||||
*
|
||||
* \copyright
|
||||
* Copyright (c) 2008 Florian Mayer (adapted by Gundolf Haase 2018)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
template <class T>
|
||||
std::vector<T> get_primes(T max)
|
||||
{
|
||||
std::vector<T> primes;
|
||||
char *sieve;
|
||||
sieve = new char[max / 8 + 1];
|
||||
// Fill sieve with 1
|
||||
memset(sieve, 0xFF, (max / 8 + 1) * sizeof(char));
|
||||
for (T x = 2; x <= max; x++)
|
||||
{
|
||||
if (sieve[x / 8] & (0x01 << (x % 8))) {
|
||||
primes.push_back(x);
|
||||
// Is prime. Mark multiplicates.
|
||||
for (T j = 2 * x; j <= max; j += x)
|
||||
{
|
||||
sieve[j / 8] &= ~(0x01 << (j % 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[] sieve;
|
||||
return primes;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------
|
||||
//int main() // by Florian Mayer
|
||||
//{g++ -O3 -std=c++14 -fopenmp main.cpp && ./a.out
|
||||
// vector<unsigned long> primes;
|
||||
// primes = get_primes(10000000);
|
||||
// // return 0;
|
||||
// // Print out result.
|
||||
// vector<unsigned long>::iterator it;
|
||||
// for(it=primes.begin(); it < primes.end(); it++)
|
||||
// cout << *it << " ";
|
||||
//
|
||||
// cout << endl;
|
||||
// return 0;
|
||||
//}
|
||||
6
ex5/code/out_1.txt
Normal file
6
ex5/code/out_1.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
1
|
||||
1000
|
||||
498.184
|
||||
364.412
|
||||
95.6857
|
||||
287.905
|
||||
93
ex5/code/task_2.cpp
Normal file
93
ex5/code/task_2.cpp
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#include "task_2.h"
|
||||
#include <cassert> // assert
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <omp.h>
|
||||
|
||||
tuple<double, double> min_max_par(const vector<double> &v) {
|
||||
int min_val = v[0];
|
||||
int max_val = v[0];
|
||||
|
||||
#pragma omp parallel for reduction(min:min_val) reduction(max:max_val)
|
||||
for (size_t i = 0; i < v.size(); ++i) {
|
||||
if (v[i] < min_val) min_val = v[i];
|
||||
if (v[i] > max_val) max_val = v[i];
|
||||
}
|
||||
|
||||
return make_tuple(min_val,max_val);
|
||||
}
|
||||
|
||||
|
||||
tuple<double, double, double> means_par(const vector<double>& v){
|
||||
size_t n = v.size();
|
||||
double sum = 0;
|
||||
double logsum = 0;
|
||||
double invsum = 0;
|
||||
|
||||
#pragma omp parallel for shared(v,n) reduction(+:sum, logsum, invsum)
|
||||
for (size_t i = 0; i<n; ++i){
|
||||
sum += v[i];
|
||||
logsum += log(v[i]);
|
||||
invsum += 1.0/v[i];
|
||||
}
|
||||
|
||||
double arith = sum / static_cast<double>(n);
|
||||
double geo = exp(1.0/static_cast<double>(n) * logsum);
|
||||
double harm = static_cast<double>(n) / invsum;
|
||||
return make_tuple(arith, geo, harm);
|
||||
}
|
||||
|
||||
void fill_vector(istream& istr, vector<double>& v)
|
||||
{
|
||||
double d=0;
|
||||
while ( istr >> d) v.push_back(d); // Einlesen
|
||||
if (!istr.eof())
|
||||
{ // Fehlerbehandlung
|
||||
cout << " Error handling \n";
|
||||
if ( istr.bad() ) throw runtime_error("Schwerer Fehler in istr");
|
||||
if ( istr.fail() ) // Versuch des Aufraeumens
|
||||
{
|
||||
cout << " Failed in reading all data.\n";
|
||||
istr.clear();
|
||||
}
|
||||
}
|
||||
v.shrink_to_fit(); // C++11
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void read_vector_from_file(const string& file_name, vector<double>& v)
|
||||
{
|
||||
ifstream fin(file_name); // Oeffne das File im ASCII-Modus
|
||||
if( fin.is_open() ) // File gefunden:
|
||||
{
|
||||
v.clear(); // Vektor leeren
|
||||
fill_vector(fin, v);
|
||||
}
|
||||
else // File nicht gefunden:
|
||||
{
|
||||
cout << "\nFile " << file_name << " has not been found.\n\n" ;
|
||||
assert( fin.is_open() && "File not found." ); // exeption handling for the poor programmer
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void write_vector_to_file(const string& file_name, const vector<double>& v)
|
||||
{
|
||||
ofstream fout(file_name); // Oeffne das File im ASCII-Modus
|
||||
if( fout.is_open() )
|
||||
{
|
||||
for (size_t k=0; k<v.size(); ++k)
|
||||
{
|
||||
fout << v.at(k) << endl;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "\nFile " << file_name << " has not been opened.\n\n" ;
|
||||
assert( fout.is_open() && "File not opened." ); // exeption handling for the poor programmer
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
38
ex5/code/task_2.h
Normal file
38
ex5/code/task_2.h
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#pragma once
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
tuple<double, double> min_max_par(const vector<double> &v);
|
||||
tuple<double, double, double> means_par(const vector<double>& v);
|
||||
|
||||
/**
|
||||
This function opens the ASCII-file named @p file_name and reads the
|
||||
double data into the C++ vector @p v.
|
||||
If the file @p file_name does not exist then the code stops with an appropriate message.
|
||||
@param[in] file_name name of the ASCII-file
|
||||
@param[out] v C++ vector with double values
|
||||
*/
|
||||
|
||||
void read_vector_from_file(const string& file_name, vector<double>& v);
|
||||
|
||||
|
||||
/**
|
||||
This function opens the ASCII-file named @p file_name and rewrites its with the
|
||||
double data from the C++ vector @p v.
|
||||
If there are problems in opening/generating file @p file_name
|
||||
then the code stops with an appropriate message.
|
||||
@param[in] file_name name of the ASCII-file
|
||||
@param[in] v C++ vector with double values
|
||||
*/
|
||||
|
||||
void write_vector_to_file(const string& file_name, const vector<double>& v);
|
||||
|
||||
/**
|
||||
Fills the double-vector @p v with data from an input stream @p istr until this input stream
|
||||
ends regularily. The vector is cleared and its memory is automatically allocated.
|
||||
@param[in] istr input stream
|
||||
@param[out] v C++ vector with double values
|
||||
@warning An exception is thrown in case of wrong data format or corrupted data.
|
||||
*/
|
||||
void fill_vector(istream& istr, vector<double>& v);
|
||||
73
ex5/code/task_3.cpp
Normal file
73
ex5/code/task_3.cpp
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#include "task_3.h"
|
||||
#include "mayer_primes.h"
|
||||
#include "timing.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
|
||||
int single_goldbach_par(int k) {
|
||||
const vector<int> primes = get_primes(k);
|
||||
int count = 0;
|
||||
|
||||
#pragma omp parallel for reduction(+:count)
|
||||
for (size_t i = 0; i < primes.size(); i++) {
|
||||
for (size_t j = i; j < primes.size(); j++) {
|
||||
if (primes[i] + primes[j] == k) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
vector<int> count_goldbach_par(int n) {
|
||||
const vector<int> primes = get_primes(n);
|
||||
vector<int> counts(n+1);
|
||||
|
||||
#pragma omp parallel reduction(VecAdd:counts)
|
||||
// #pragma omp parallel
|
||||
{
|
||||
vector<int> local_counts(n+1, 0);
|
||||
|
||||
#pragma omp for
|
||||
for (size_t i = 1; i < primes.size(); i++) {
|
||||
for (size_t j = i; j < primes.size(); j++) {
|
||||
int sum = primes[i] + primes[j];
|
||||
if (sum <= n) {
|
||||
local_counts[sum]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
counts += local_counts;
|
||||
// #pragma omp critical
|
||||
// {
|
||||
// for(int k=0; k<n+1; k++){
|
||||
// counts[k] += local_counts[k];
|
||||
// }
|
||||
// }
|
||||
}
|
||||
return counts;
|
||||
}
|
||||
|
||||
|
||||
void print_decomps(int k) {
|
||||
const vector<int> primes = get_primes(k);
|
||||
cout << "\nDecompositions for k = " << k << ": ";
|
||||
|
||||
for (size_t i = 0; i < primes.size(); i++) {
|
||||
for (size_t j = i; j < primes.size(); j++) {
|
||||
if (primes[i] + primes[j] == k) {
|
||||
cout << primes[i] << " + " << primes[j] << ", ";
|
||||
}
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
33
ex5/code/task_3.h
Normal file
33
ex5/code/task_3.h
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
#pragma once
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
|
||||
// Counts number of possible decompositions with 2 primes that sum up to k.
|
||||
int single_goldbach_par(int k);
|
||||
|
||||
// Counts number of possible decompositions with 2 primes that sum up to k for all even numbers k \in {4,...,n}.
|
||||
vector<int> count_goldbach_par(int n);
|
||||
|
||||
// Prints all decompositions of k.
|
||||
void print_decomps(int k);
|
||||
|
||||
|
||||
/** Vector @p b adds its elements to vector @p a .
|
||||
@param[in] a vector
|
||||
@param[in] b vector
|
||||
@return a+=b componentwise
|
||||
*/
|
||||
template<class T>
|
||||
std::vector<T> &operator+=(std::vector<T> &a, std::vector<T> const &b)
|
||||
{
|
||||
assert(a.size()==b.size());
|
||||
for (size_t k = 0; k < a.size(); ++k) {
|
||||
a[k] += b[k];
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
#pragma omp declare reduction(VecAdd : std::vector<int> : omp_out += omp_in) \
|
||||
initializer (omp_priv=omp_orig)
|
||||
232
ex5/code/task_4.cpp
Normal file
232
ex5/code/task_4.cpp
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
#include "task_4.h"
|
||||
#include "timing.h"
|
||||
#include <cassert>
|
||||
#include <cblas.h> // cBLAS Library
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
vector<double> matrix_vec(vector<double> const &A, vector<double> const &x) {
|
||||
size_t const N = x.size();
|
||||
size_t const M = A.size() / N;
|
||||
vector<double> b(M);
|
||||
|
||||
#pragma omp parallel for shared(A,x,N,M,b)
|
||||
for (size_t i = 0; i < M; ++i) {
|
||||
for (size_t j = 0; j < N; ++j) {
|
||||
b[i] += A[i*N + j] * x[j];
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
vector<double> matrix_matrix(vector<double> const &A, vector<double> const &B, size_t const &M) {
|
||||
size_t const L = A.size() / M;
|
||||
size_t const N = B.size() / L;
|
||||
vector<double> C(M*N,0);
|
||||
|
||||
#pragma omp parallel for shared(A,B,M,L,N,C)
|
||||
for (size_t i = 0; i < M; ++i) {
|
||||
for (size_t k = 0; k < L; ++k) {
|
||||
for (size_t j = 0; j < N; ++j) {
|
||||
C[i*N + j] += A[i*L + k] * B[k*N + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
return C;
|
||||
}
|
||||
|
||||
|
||||
vector<double> poly(vector<double> const &x, vector<double> const &a) {
|
||||
size_t N = x.size();
|
||||
size_t p = a.size();
|
||||
vector<double> y(N);
|
||||
|
||||
#pragma omp parallel for shared(x,a,N,p,y)
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
y[i] = a[p];
|
||||
for (size_t k = 1; k < p; ++k) {
|
||||
y[i] = y[i]*x[i] + a[p-k];
|
||||
}
|
||||
}
|
||||
return y;
|
||||
}
|
||||
|
||||
double scalar(vector<double> const &x, vector<double> const &y) {
|
||||
assert(x.size() == y.size());
|
||||
size_t const N = x.size();
|
||||
double sum = 0.0;
|
||||
|
||||
#pragma omp parallel for shared(x,y,N) reduction(+:sum)
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
sum += x[i] * y[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
double summation(vector<double> const &x){
|
||||
size_t N = x.size();
|
||||
double sum = 0.0;
|
||||
|
||||
#pragma omp parallel for shared(x,N) reduction(+:sum)
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
sum += x[i];
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ##########################################################################
|
||||
|
||||
|
||||
void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {
|
||||
printf("Memory allocated : %.3f GByte\n", 1.0 * memory / 1024 / 1024 / 1024 * size);
|
||||
printf("Duration per loop : %.3f sec\n", sec);
|
||||
printf("GFLOPS : %.3f\n", 1.0 * flops / sec / 1024 / 1024 / 1024);
|
||||
printf("GiByte/s : %.3f\n", 1.0 * memory / sec / 1024 / 1024 / 1024 * size);
|
||||
}
|
||||
|
||||
tuple<vector<double>, vector<double>> init_A(size_t N) {
|
||||
vector<double> x(N), y(N);
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x[i] = i%219 + 1.0;
|
||||
y[i] = 1.0 / x[i];
|
||||
}
|
||||
return make_tuple(x, y);
|
||||
}
|
||||
|
||||
void benchmark_A(vector<double> const &x, vector<double> const &y, size_t NLOOPS, bool cblas) {
|
||||
size_t N = x.size();
|
||||
|
||||
double s(0.0), sum(0.0);
|
||||
if (cblas == false) {
|
||||
for (size_t i = 0; i < NLOOPS; ++i) {
|
||||
s = scalar(x, y);
|
||||
sum += s;
|
||||
}
|
||||
} else if (cblas == true) {
|
||||
for (size_t i = 0; i < NLOOPS; ++i) {
|
||||
s = cblas_ddot(N, x.data(), 1, y.data(), 1);
|
||||
sum += s;
|
||||
}
|
||||
}
|
||||
|
||||
// Check correctness
|
||||
if (static_cast<size_t>(sum) != N*NLOOPS) {printf(" !! W R O N G result !!\n");}
|
||||
}
|
||||
|
||||
tuple<vector<double>, vector<double>> init_B(size_t M, size_t N) {
|
||||
vector<double> A(M*N), x(N);
|
||||
for (size_t i = 0; i < M; ++i) {
|
||||
for (size_t j = 0; j < N; ++j) {
|
||||
A[i*N + j] = (i+j)%219 + 1.0;
|
||||
}
|
||||
}
|
||||
for (size_t j = 0; j < N; ++j) {
|
||||
x[j] = 1.0/A[17*N + j];
|
||||
}
|
||||
return make_tuple(A, x);
|
||||
}
|
||||
|
||||
void benchmark_B(vector<double> const &A, vector<double> const &x, size_t NLOOPS, bool cblas) {
|
||||
size_t N = x.size();
|
||||
size_t M = A.size() / N;
|
||||
vector<double> b(M);
|
||||
double sum(0.0);
|
||||
|
||||
if (cblas == false) {
|
||||
for (size_t i = 0; i < NLOOPS; ++i) {
|
||||
b = matrix_vec(A,x);
|
||||
sum += b[17];
|
||||
}
|
||||
} else if (cblas == true) {
|
||||
for (size_t i = 0; i < NLOOPS; ++i) {
|
||||
cblas_dgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0, A.data(), N, x.data(), 1, 0, b.data(), 1);
|
||||
sum += b[17];
|
||||
}
|
||||
}
|
||||
|
||||
// Check correctness
|
||||
if (static_cast<size_t>(sum) != N*NLOOPS) {printf(" !! W R O N G result !!\n");}
|
||||
}
|
||||
|
||||
tuple<vector<double>, vector<double>> init_C(size_t M, size_t N, size_t L) {
|
||||
vector<double> A(M*L), B(L*N);
|
||||
for (size_t i = 0; i < M; ++i) {
|
||||
for (size_t j = 0; j < L; ++j) {
|
||||
A[i*L + j] = (i+j)%219 + 1.0;
|
||||
}
|
||||
}
|
||||
// B chosen such that C[0,17]=L
|
||||
// so B[i,17] = 1/A[0,i]
|
||||
for (size_t i = 0; i < L; ++i) {
|
||||
for (size_t j = 0; j < N; ++j) {
|
||||
if (j==17) {
|
||||
B[i*N + 17] = 1.0/A[i];
|
||||
} else {
|
||||
B[i*N + j] = (i+j)%219 + 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return make_tuple(A, B);
|
||||
}
|
||||
|
||||
void benchmark_C(vector<double> const &A, vector<double> const &B, size_t L, size_t NLOOPS, bool cblas) {
|
||||
size_t M = A.size() / L;
|
||||
size_t N = B.size() / L;
|
||||
vector<double> C(M*N);
|
||||
double sum(0.0);
|
||||
|
||||
if (cblas == false) {
|
||||
for (size_t i = 0; i < NLOOPS; ++i) {
|
||||
C = matrix_matrix(A,B,M);
|
||||
sum += C[17];
|
||||
}
|
||||
} else if (cblas == true) {
|
||||
for (size_t i = 0; i < NLOOPS; ++i) {
|
||||
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, L, 1.0, A.data(), L, B.data(), N, 0.0, C.data(), N);
|
||||
sum += C[17];
|
||||
}
|
||||
}
|
||||
|
||||
// Check correctness
|
||||
if (static_cast<size_t>(sum) != L*NLOOPS) {printf(" !! W R O N G result !!\n");}
|
||||
}
|
||||
|
||||
tuple<vector<double>, vector<double>> init_D(size_t N, size_t p) {
|
||||
// x_i = i/N for i=0,...,N-1
|
||||
// a_j = 1 for j=0,...,p-1
|
||||
vector<double> x(N), a(p);
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x[i] = static_cast<double>(i) / N;
|
||||
}
|
||||
for (size_t j = 0; j < p; ++j) {
|
||||
a[j] = 1.0;
|
||||
}
|
||||
return make_tuple(x, a);
|
||||
}
|
||||
|
||||
void benchmark_D(vector<double> const &x, vector<double> const &a, size_t NLOOPS) {
|
||||
size_t N = x.size();
|
||||
vector<double> y(N);
|
||||
double sum(0.0);
|
||||
|
||||
for (size_t i = 0; i < NLOOPS; ++i) {
|
||||
y = poly(x,a);
|
||||
sum += y[0];
|
||||
}
|
||||
|
||||
// Check correctness
|
||||
if (static_cast<size_t>(sum) != NLOOPS) {printf(" !! W R O N G result sum = %f !!\n", sum);}
|
||||
}
|
||||
|
||||
void benchmark_summation(vector<double> const &x, size_t NLOOPS) {
|
||||
double s(0.0), sum(0.0);
|
||||
for (size_t i = 0; i < NLOOPS; ++i) {
|
||||
s = summation(x);
|
||||
sum += s;
|
||||
}
|
||||
}
|
||||
22
ex5/code/task_4.h
Normal file
22
ex5/code/task_4.h
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
#pragma once
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
vector<double> matrix_vec(vector<double> const &A, vector<double> const &x);
|
||||
vector<double> matrix_matrix(vector<double> const &A, vector<double> const &B, size_t const &M);
|
||||
vector<double> poly(vector<double> const &x, vector<double> const &a);
|
||||
double scalar(vector<double> const &x, vector<double> const &y);
|
||||
double summation(vector<double> const &x);
|
||||
|
||||
void print_performance(double sec, size_t memory, size_t flops, unsigned int size);
|
||||
tuple<vector<double>, vector<double>> init_A(size_t N);
|
||||
tuple<vector<double>, vector<double>> init_B(size_t M, size_t N);
|
||||
tuple<vector<double>, vector<double>> init_C(size_t M, size_t N, size_t L);
|
||||
tuple<vector<double>, vector<double>> init_D(size_t N, size_t p);
|
||||
|
||||
|
||||
void benchmark_A(vector<double> const &x, vector<double> const &y, size_t NLOOPS, bool cblas);
|
||||
void benchmark_B(vector<double> const &A, vector<double> const &x, size_t NLOOPS, bool cblas);
|
||||
void benchmark_C(vector<double> const &A, vector<double> const &B, size_t L, size_t NLOOPS, bool cblas);
|
||||
void benchmark_D(vector<double> const &x, vector<double> const &a, size_t NLOOPS);
|
||||
void benchmark_summation(vector<double> const &x, size_t NLOOPS);
|
||||
51
ex5/code/timing.h
Normal file
51
ex5/code/timing.h
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
//
|
||||
// Gundolf Haase, Oct 18 2024
|
||||
//
|
||||
#pragma once
|
||||
#include <chrono> // timing
|
||||
#include <stack>
|
||||
|
||||
//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
using TPoint= std::chrono::time_point<Clock>;
|
||||
|
||||
// [Galowicz, C++17 STL Cookbook, p. 29]
|
||||
inline
|
||||
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
|
||||
|
||||
/** Starts stopwatch timer.
|
||||
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
|
||||
*
|
||||
* The timining can be nested and the recent time point is stored on top of the stack.
|
||||
*
|
||||
* @return recent time point
|
||||
* @see toc
|
||||
*/
|
||||
inline auto tic()
|
||||
{
|
||||
MyStopWatch.push(Clock::now());
|
||||
return MyStopWatch.top();
|
||||
}
|
||||
|
||||
/** Returns the elapsed time from stopwatch.
|
||||
*
|
||||
* The time point from top of the stack is used
|
||||
* if time point @p t_b is not passed as input parameter.
|
||||
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
|
||||
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
|
||||
* The last option is to be used in the case of
|
||||
* non-nested but overlapping time measurements.
|
||||
*
|
||||
* @param[in] t_b start time of some stop watch
|
||||
* @return elapsed time in seconds.
|
||||
*
|
||||
*/
|
||||
inline double toc(TPoint const &t_b = MyStopWatch.top())
|
||||
{
|
||||
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
|
||||
using Unit = std::chrono::seconds;
|
||||
using FpSeconds = std::chrono::duration<double, Unit::period>;
|
||||
auto t_e = Clock::now();
|
||||
MyStopWatch.pop();
|
||||
return FpSeconds(t_e-t_b).count();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue