sheet 5 not tested

This commit is contained in:
Markus Schmidt 2025-12-02 20:28:11 +01:00
commit 64c7aed176
169 changed files with 225337 additions and 0 deletions

BIN
sheet1/F/main.GCC_ Executable file

Binary file not shown.

BIN
sheet1/F/main.o Normal file

Binary file not shown.

59
sheet5/1/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,59 @@
{
"files.associations": {
"iostream": "cpp",
"cmath": "cpp",
"array": "cpp",
"atomic": "cpp",
"bit": "cpp",
"cctype": "cpp",
"charconv": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"compare": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"list": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"optional": "cpp",
"random": "cpp",
"ratio": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"format": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"new": "cpp",
"numbers": "cpp",
"ostream": "cpp",
"span": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"typeinfo": "cpp",
"variant": "cpp"
}
}

2877
sheet5/1/Doxyfile Normal file

File diff suppressed because it is too large Load diff

View file

30
sheet5/1/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp mylib.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

99
sheet5/1/check_env.h Normal file
View file

@ -0,0 +1,99 @@
#pragma once
#include <iostream>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <unordered_map>
//#####################################
// G.Haase
// See https://sourceforge.net/p/predef/wiki/Compilers/
// http://www.cplusplus.com/doc/tutorial/preprocessor/
// also: export OMP_DISPLAY_ENV=VERBOSE
//#####################################
/** Checks for compilers, its versions, threads etc.
*
@param[in] argc number of command line arguemnts
@param[in] argv command line arguments as array of C-strings
*/
template <class T>
void check_env(T argc, char const *argv[])
{
std::cout << "\n#######################################################################\n";
std::cout << "Code :";
for (T k = 0; k < argc; ++k) std::cout << " " << argv[k];
std::cout << std::endl;
// compiler: https://sourceforge.net/p/predef/wiki/Compilers/
std::cout << "Compiler: ";
#if defined __INTEL_COMPILER
#pragma message(" ########## INTEL ###############")
std::cout << "INTEL " << __INTEL_COMPILER;
// Ignore warnings for #pragma acc unrecognice
#pragma warning disable 161
// Ignore warnings for #pragma omp unrecognice
#pragma warning disable 3180
#elif defined __PGI
#pragma message(" ########## PGI ###############")
std::cout << "PGI " << __PGIC__ << "." << __PGIC_MINOR__ << "." << __PGIC_PATCHLEVEL__;
#elif defined __clang__
#pragma message(" ########## CLANG ###############")
std::cout << "CLANG " << __clang_major__ << "." << __clang_minor__ << "."; // << __clang_patchlevel__;
#elif defined __GNUC__
#pragma message(" ########## Gnu ###############")
std::cout << "Gnu " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__;
#else
#pragma message(" ########## unknown Compiler ###############")
std::cout << "unknown";
#endif
std::cout << " C++ standard: " << __cplusplus << std::endl;
// Parallel environments
std::cout << "Parallel: ";
#if defined MPI_VERSION
#pragma message(" ########## MPI ###############")
#ifdef OPEN_MPI
std::cout << "OpenMPI ";
#else
std::cout << "MPI ";
#endif
std::cout << MPI_VERSION << "." << MPI_SUBVERSION << " ";
#endif
#ifdef _OPENMP
//https://www.openmp.org/specifications/
//https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
std::unordered_map<unsigned, std::string> const map{
{200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0"}, {201811, "5.0"}};
#pragma message(" ########## OPENMP ###############")
//std::cout << _OPENMP;
std::cout << "OpenMP ";
try {
std::cout << map.at(_OPENMP);
}
catch (...) {
std::cout << _OPENMP;
}
#pragma omp parallel
{
#pragma omp master
{
const int nn = omp_get_num_threads(); // OpenMP
std::cout << " ---> " << nn << " Threads ";
}
#pragma omp barrier
}
#endif
#ifdef _OPENACC
#pragma message(" ########## OPENACC ###############")
std::cout << "OpenACC ";
#endif
std::cout << std::endl;
std::cout << "Date : " << __DATE__ << " " << __TIME__;
std::cout << "\n#######################################################################\n";
}
// HG

View file

BIN
sheet5/1/main.GCC_ Executable file

Binary file not shown.

203
sheet5/1/main.cpp Normal file
View file

@ -0,0 +1,203 @@
#include "check_env.h"
#include "mylib.h"
#include <cstdlib> // atoi()
#include <cstring> // strncmp()
#include <ctime>
#include <iostream>
#include <omp.h> // OpenMP
#include <sstream>
#include <string>
#include <cmath>
using namespace std;
void benchmark(vector<double> &x, vector<double> &y, unsigned int N, unsigned int NLOOPS)
{
double sk = 0.0;
for (int i = 0; i < NLOOPS; ++i)
{
sk += scalar(x, y);
// or scalar_trans(x,y) / norm(x) if you want
}
}
int main(int argc, char const *argv[])
{
//int const NLOOPS = 5; // chose a value such that the benchmark runs at least 10 sec.
unsigned int N = 5000001;
int const NLOOPS = 5; // chose a value such that the benchmark runs at least 10 sec.
//unsigned int N = 5000001;
//##########################################################################
// Read Parameter from command line (C++ style)
cout << "Checking command line parameters for: -n <number> " << endl;
for (int i = 1; i < argc; i++)
{
cout << " arg[" << i << "] = " << argv[i] << endl;
string ss(argv[i]);
if ("-n"==ss && i + 1 < argc) // found "-n" followed by another parameter
{
N = static_cast<unsigned int>(atoi(argv[i + 1]));
}
else
{
cout << "Corect call: " << argv[0] << " -n <number>\n";
}
}
cout << "\nN = " << N << endl;
check_env(argc, argv);
//########################################################################
int nthreads; // OpenMP
#pragma omp parallel default(none) shared(cout,nthreads)
{
int const th_id = omp_get_thread_num(); // OpenMP
int const nthrds = omp_get_num_threads(); // OpenMP
stringstream ss;
ss << "C++: Hello World from thread " << th_id << " / " << nthrds << endl;
#pragma omp critical
{
cout << ss.str(); // output to a shared ressource
}
#pragma omp master
nthreads = nthrds; // transfer nn to to master thread
}
cout << " " << nthreads << " threads have been started." << endl;
//##########################################################################
// Memory allocation
cout << "Memory allocation\n";
vector<double> x(N), y(N);
cout.precision(2);
cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
cout.precision(6);
//##########################################################################
// Data initialization
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
for (unsigned int i = 0; i < N; ++i)
{
x[i] = i + 1;
y[i] = 1.0 / x[i];
}
//##########################################################################
cout << "\nStart Benchmarking\n";
// Do calculation
double tstart = omp_get_wtime(); // OpenMP
double sk(0.0);
for (int i = 0; i < NLOOPS; ++i)
{
sk = scalar(x, y);
sk = scalar_trans(x, y);
//sk = norm(x);
}
double t1 = omp_get_wtime() - tstart; // OpenMP
t1 /= NLOOPS; // divide by number of function calls
//##########################################################################
// Check the correct result
cout << "\n <x,y> = " << sk << endl;
if (static_cast<unsigned int>(sk) != N)
{
cout << " !! W R O N G result !!\n";
}
cout << endl;
//##########################################################################
// Timings and Performance
cout << endl;
cout.precision(2);
cout << "Timing in sec. : " << t1 << endl;
cout << "GFLOPS : " << 2.0 * N / t1 / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << 2.0 * N / t1 / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
//#########################################################################
cout << "\n Try the reduction with an STL-vektor \n";
auto vr = reduction_vec(100);
cout << "done\n";
cout << vr << endl;
N=2;
//Data (re-)inizialiion
for (unsigned int i = 0; i < N; ++i)
{
x[i] = i + 1;
y[i] = 1.0 / x[i];
}
int proc_count = omp_get_num_procs();
cout << "Number of available processors: " << proc_count << endl;
for(int j=1; j<=proc_count; j++)
{
omp_set_num_threads(j);
cout << "used threads: "<< j << endl;
omp_set_schedule(omp_sched_static, 0);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
cout << "static (chunk 0) "<< (t1-tstart) << endl;
for(int i=0; i<= 5; i++)
{
int chunk = 1 << i;
cout << "chunk size: "<< chunk << endl;
// STATIC
omp_set_schedule(omp_sched_static, chunk);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
std::cout << "static: " << (t1 - tstart) << " s\n";
// DYNAMIC
omp_set_schedule(omp_sched_dynamic, chunk);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
std::cout << "dynamic: " << (t1 - tstart) << " s\n";
// GUIDED
omp_set_schedule(omp_sched_guided, chunk);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
std::cout << "guided: " << (t1 - tstart) << " s\n";
// AUTO
omp_set_schedule(omp_sched_auto, chunk);
tstart = omp_get_wtime();
benchmark(x, y, N, NLOOPS);
t1 = omp_get_wtime()/NLOOPS;
std::cout << "auto: " << (t1 - tstart) << " s\n";
cout << endl;
}
cout << endl;
}
cout << scalar_parrallel_env(x,y) << endl;
vector<int> vec = reduction_vec_append(N);
for(int i=0; i< N; i++)
{
cout << vec[i] << ", ";
}
return 0;
} // memory for x and y will be deallocated their destructors

View file

BIN
sheet5/1/main.o Normal file

Binary file not shown.

136
sheet5/1/mylib.cpp Normal file
View file

@ -0,0 +1,136 @@
#include "mylib.h"
#include <cassert> // assert()
#include <cmath>
#include <iostream>
#include <functional> // multiplies<>{}
#include <list>
#include <numeric> // iota()
#ifdef _OPENMP
#include <omp.h>
#endif
#include <vector>
using namespace std;
double scalar(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
size_t const N = x.size();
double sum = 0.0;
if (!omp_in_parallel())
{
// Safe to start a parallel region
#pragma omp parallel for default(none) shared(x,y,N) reduction(+:sum) schedule(runtime)
for (size_t i = 0; i < N; ++i)
sum += x[i] * y[i];
}
else
{
// Already inside parallel region: do it sequentially to avoid nested parallelism
for (size_t i = 0; i < N; ++i)
sum += x[i] * y[i];
}
return sum;
}
double scalar_parrallel_env(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
size_t const N = x.size();
double sum = 0.0;
// Safe to start a parallel region
#pragma omp parallel default(none) shared(x,y,N,cout) reduction(+:sum)
{
int tid = omp_get_thread_num();
int threadCount = omp_get_num_threads();
cout << threadCount << endl;
for (size_t i = tid*N/threadCount; i < tid*(N + 1)/threadCount; ++i)
{
sum += x[i] * y[i];
}
}
return sum;
}
double norm(vector<double> const &x)
{
size_t const N = x.size();
double sum = 0.0;
#pragma omp parallel for default(none) shared(x,N) reduction(+:sum)
for (size_t i = 0; i < N; ++i)
{
sum += x[i]*x[i];
}
return sum;
}
vector<int> reduction_vec(int n)
{
vector<int> vec(n);
#pragma omp parallel default(none) shared(cout) reduction(VecAdd:vec)
{
#pragma omp barrier
#pragma omp critical
cout << omp_get_thread_num() << " : " << vec.size() << endl;
#pragma omp barrier
iota( vec.begin(),vec.end(), omp_get_thread_num() );
#pragma omp barrier
}
return vec;
}
vector<int> reduction_vec_append(int n)
{
vector<int> vec(n);
#pragma omp parallel default(none) shared(cout,n) reduction(VecAppend:vec)
{
int tid = omp_get_thread_num();
vector<int> local(n);
iota(local.begin(), local.end(), tid);
#pragma omp critical
cout << tid << " : " << local.size() << endl;
vec = local;
}
return vec;
}
double scalar_trans(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
vector<double> z(x.size());
//list<double> z(x.size()); // parallel for-loop on iterators not possible (missing 'operator-')
// c++-20 CLANG_, ONEAPI_:condition of OpenMP for loop must be a relational comparison
transform(cbegin(x),cend(x),cbegin(y),begin(z),std::multiplies<>{});
double sum = 0.0;
#pragma omp parallel for default(none) shared(z) reduction(+:sum)
for (auto pi = cbegin(z); pi!=cend(z); ++pi)
{
sum += *pi;
}
//for (auto val: z)
//{
//sum += val;
//}
return sum;
}

View file

86
sheet5/1/mylib.h Normal file
View file

@ -0,0 +1,86 @@
#pragma once
#include <cassert>
#include <iomanip> // setw()
#include <iostream>
#include <omp.h>
#include <vector>
using namespace std;
/** Inner product
@param[in] x vector
@param[in] y vector
@return resulting Euclidian inner product <x,y>
*/
double scalar(std::vector<double> const &x, std::vector<double> const &y);
double scalar_trans(std::vector<double> const &x, std::vector<double> const &y);
double scalar_parrallel_env(std::vector<double> const &x, std::vector<double> const &y);
/** l2-norm
@param[in] x vector
@return resulting Euclidian norm
*/
double norm(std::vector<double> const &x);
/** Vector @p b adds its elements to vector @p a .
@param[in] a vector
@param[in] b vector
@return a+=b componentwise
*/
template<class T>
std::vector<T> &operator+=(std::vector<T> &a, std::vector<T> const &b)
{
assert(a.size()==b.size());
for (size_t k = 0; k < a.size(); ++k) {
a[k] += b[k];
}
return a;
}
// Declare the reduction operation in OpenMP for an STL-vector
// omp_out += omp_in requires operator+=(vector<int> &, vector<int> const &) from above
// ------------------------------------------------------------
// https://scc.ustc.edu.cn/zlsc/tc4600/intel/2016.0.109/compiler_c/common/core/GUID-7312910C-D175-4544-99C5-29C12D980744.htm
// https://gist.github.com/eruffaldi/7180bdec4c8c9a11f019dd0ba9a2d68c
// https://stackoverflow.com/questions/29633531/user-defined-reduction-on-vector-of-varying-size
// see also p.74ff in https://www.fz-juelich.de/ias/jsc/EN/AboutUs/Staff/Hagemeier_A/docs-parallel-programming/OpenMP-Slides.pdf
#pragma omp declare reduction(VecAdd : std::vector<int> : omp_out += omp_in) \
initializer (omp_priv=omp_orig)
#pragma omp declare reduction(VecAppend: std::vector<int> : omp_out.insert(omp_out.end(), omp_in.begin(), omp_in.end())) \
initializer (omp_priv=vector<int>())
// Templates are n o t possible, i.e. the reduction has to be declared fore a specified type.
//template <class T>
//#pragma omp declare reduction(VecAdd : std::vector<T> : omp_out += omp_in) initializer (omp_priv(omp_orig))
// MS: template nach #pragma !?
// ------------------------------------------------------------
/** Test for vector reduction.
*
* The thread-private vectors of size @p n are initialized via @f$v_k^{tID}=tID+k@f$.
* Afterwards these vectors are accumulated, i.e.,
* @f$v_k= \sum_{tID=0}^{numThreads} v_k^{tID}@f$.
*
* @param[in] n size of global/private vector
* @return resulting global vector.
*/
std::vector<int> reduction_vec(int n);
std::vector<int> reduction_vec_append(int n);
/** Output of a vector.
@param[in,out] s output stream
@param[in] x vector
@return modified output stream
*/
template <class T>
std::ostream &operator<<(std::ostream &s, std::vector<T> const &x)
{
for (auto const &v : x) s << std::setw(4) << v << " ";
return s;
}

View file

BIN
sheet5/1/mylib.o Normal file

Binary file not shown.

70
sheet5/1/timing.h Normal file
View file

@ -0,0 +1,70 @@
#pragma once
#include <chrono> // timing
#include <stack>
using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
//using Clock = std::chrono::high_resolution_clock;
using TPoint= std::chrono::time_point<Clock>;
// [Galowicz, C++17 STL Cookbook, p. 29]
inline
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
/** Starts stopwatch timer.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
*
* The timining is allowed to be nested and the recent time is stored on top of the stack.
*
* @return recent time
* @see toc
*/
inline auto tic()
{
MyStopWatch.push(Clock::now());
return MyStopWatch.top();
}
/** Returns the elapsed time from stopwatch.
*
* The time from top of the stack is used
* if time point @p t_b is not passed as input parameter.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
* The last option is to be used in the case of
* non-nested but overlapping time measurements.
*
* @param[in] t_b start time of some stop watch
* @return elapsed time in seconds.
*
*/
inline double toc(TPoint const &t_b = MyStopWatch.top())
{
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
using Unit = std::chrono::seconds;
using FpSeconds = std::chrono::duration<double, Unit::period>;
auto t_e = Clock::now();
MyStopWatch.pop();
return FpSeconds(t_e-t_b).count();
}
#include <iostream>
#include <string>
/** Executes function @p f and measures/prints elapsed wall clock time in seconds
*
* Call as
* @code measure("Time for (b = b + 1)", [&]() {
thrust::transform(b.begin(), b.end(), b.begin(), increment());
}); @endcode
*
* @param[in] label additional string to be printed with the measurement.
* @param[in] f function to execute.
* @author Therese Bösmüller, 2025
*
*/
auto measure = [](const std::string& label, auto&& f) {
auto start = std::chrono::high_resolution_clock::now();
f();
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
std::cout << label << ": " << duration << " microseconds" << std::endl;
}; // ';' is needed for a visible documentation of this lambda-function

View file

16
sheet5/2/.vscode/c_cpp_properties.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [],
"compilerPath": "/usr/bin/gcc",
"cStandard": "c17",
"cppStandard": "gnu++17",
"intelliSenseMode": "linux-gcc-x64"
}
],
"version": 4
}

59
sheet5/2/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,59 @@
{
"files.associations": {
"iostream": "cpp",
"cmath": "cpp",
"array": "cpp",
"atomic": "cpp",
"bit": "cpp",
"cctype": "cpp",
"charconv": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"compare": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"list": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"optional": "cpp",
"random": "cpp",
"ratio": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"format": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"new": "cpp",
"numbers": "cpp",
"ostream": "cpp",
"span": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"typeinfo": "cpp",
"variant": "cpp"
}
}

View file

39
sheet5/2/Makefile Normal file
View file

@ -0,0 +1,39 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
# use CLANG compilers
# COMPILER=CLANG_
SOURCES = main.cpp file_io.cpp means.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk
task:
@pdflatex task
@pdflatex task

View file

500
sheet5/2/data_1.txt Normal file
View file

@ -0,0 +1,500 @@
141
261
87
430
258
298
425
120
496
707
244
786
75
394
4
221
2
190
143
269
175
139
599
902
940
222
483
377
524
265
69
437
174
27
955
431
962
763
8
681
706
646
553
219
773
229
371
891
857
403
319
609
911
910
592
333
854
443
905
34
533
717
180
337
188
322
404
549
49
553
275
242
244
155
957
936
819
729
176
361
189
2
317
700
626
544
440
288
502
762
763
577
748
646
124
505
348
93
148
199
673
432
695
257
10
533
280
947
907
393
25
672
838
972
57
451
583
687
720
651
727
374
582
117
58
980
285
595
963
186
194
342
933
391
274
152
398
375
132
436
92
615
11
574
790
236
449
570
62
497
643
222
838
972
847
506
279
747
237
958
621
601
173
91
256
859
912
700
726
230
577
811
404
989
90
321
512
61
726
557
530
830
859
790
318
453
753
110
110
270
525
973
711
312
292
851
912
640
256
89
839
585
949
62
585
286
828
191
443
394
827
677
208
319
134
672
571
170
148
477
909
553
33
54
806
452
383
790
365
533
712
872
329
651
975
76
588
414
310
264
759
996
187
782
196
993
803
425
729
499
809
357
74
591
911
194
433
750
40
947
764
559
184
498
518
995
855
963
679
404
935
480
232
397
706
559
757
996
963
536
964
116
52
305
581
531
902
541
432
543
713
17
801
143
479
257
370
662
170
279
199
196
327
881
472
404
180
969
408
845
616
377
878
785
465
814
899
430
335
597
902
703
378
735
955
543
541
312
72
182
93
464
10
916
643
2
31
209
455
128
9
728
355
781
437
437
50
50
92
595
242
842
858
964
489
221
227
537
763
348
462
640
918
162
716
578
434
885
394
179
634
625
328
803
1000
981
128
233
24
608
111
408
885
549
370
209
441
957
125
471
857
44
692
979
284
134
686
910
611
900
194
755
347
419
156
820
625
739
806
68
951
498
756
743
832
157
458
619
933
836
896
583
583
855
35
886
408
37
747
155
144
606
255
325
402
407
387
610
167
189
95
324
770
235
741
693
825
828
294
310
524
326
832
811
557
263
681
234
457
385
539
992
756
981
235
529
52
757
602
858
989
930
410
1
541
208
220
326
96
748
749
544
339
833
553
958
893
357
547
347
623
797
746
126
823
26
415
732
782
368

View file

60
sheet5/2/file_io.cbp Normal file
View file

@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<CodeBlocks_project_file>
<FileVersion major="1" minor="6" />
<Project>
<Option title="file_io" />
<Option pch_mode="2" />
<Option compiler="gcc" />
<Build>
<Target title="Debug">
<Option output="bin/Debug/file_io" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Debug/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-g" />
</Compiler>
</Target>
<Target title="Release">
<Option output="bin/Release/file_io" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Release/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-O2" />
</Compiler>
<Linker>
<Add option="-s" />
</Linker>
</Target>
</Build>
<Compiler>
<Add option="-Wshadow" />
<Add option="-Winit-self" />
<Add option="-Wunreachable-code" />
<Add option="-pedantic" />
<Add option="-std=c++11" />
<Add option="-Wextra" />
<Add option="-Wall" />
<Add option="-fexceptions" />
</Compiler>
<Unit filename="file_io.cpp" />
<Unit filename="file_io.h" />
<Unit filename="main.cpp" />
<Extensions>
<envvars />
<code_completion />
<lib_finder disable_auto="1" />
<debugger />
<DoxyBlocks>
<comment_style block="0" line="0" />
<doxyfile_project />
<doxyfile_build extract_all="1" />
<doxyfile_warnings />
<doxyfile_output />
<doxyfile_dot />
<general use_at_in_tags="1" />
</DoxyBlocks>
</Extensions>
</Project>
</CodeBlocks_project_file>

View file

65
sheet5/2/file_io.cpp Normal file
View file

@ -0,0 +1,65 @@
#include "file_io.h"
#include <cassert>
#include <fstream>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>
using namespace std;
// [Str10, p.364]
void fill_vector(istream& istr, vector<short>& v)
{
double d=0;
while ( istr >> d) v.push_back(d); // Einlesen
if (!istr.eof())
{ // Fehlerbehandlung
cout << " Error handling \n";
if ( istr.bad() ) throw runtime_error("Schwerer Fehler in istr");
if ( istr.fail() ) // Versuch des Aufraeumens
{
cout << " Failed in reading all data.\n";
istr.clear();
}
}
v.shrink_to_fit(); // C++11
return;
}
void read_vector_from_file(const string& file_name, vector<short>& v)
{
ifstream fin(file_name); // Oeffne das File im ASCII-Modus
if( fin.is_open() ) // File gefunden:
{
v.clear(); // Vektor leeren
fill_vector(fin, v);
}
else // File nicht gefunden:
{
cout << "\nFile " << file_name << " has not been found.\n\n" ;
assert( fin.is_open() && "File not found." ); // exeption handling for the poor programmer
}
return;
}
void write_vector_to_file(const string& file_name, const vector<double>& v)
{
ofstream fout(file_name); // Oeffne das File im ASCII-Modus
if( fout.is_open() )
{
for (unsigned int k=0; k<v.size(); ++k)
{
fout << v.at(k) << endl;
}
}
else
{
cout << "\nFile " << file_name << " has not been opened.\n\n" ;
assert( fout.is_open() && "File not opened." ); // exeption handling for the poor programmer
}
return;
}

View file

40
sheet5/2/file_io.h Normal file
View file

@ -0,0 +1,40 @@
#ifndef FILE_IO_H_INCLUDED
#define FILE_IO_H_INCLUDED
#include <string>
#include <vector>
//using namespace std;
/**
This function opens the ASCII-file named @p file_name and reads the
double data into the C++ vector @p v.
If the file @p file_name does not exist then the code stops with an appropriate message.
@param[in] file_name name of the ASCII-file
@param[out] v C++ vector with double values
*/
void read_vector_from_file(const std::string& file_name, std::vector<short>& v);
/**
This function opens the ASCII-file named @p file_name and rewrites its with the
double data from the C++ vector @p v.
If there are problems in opening/generating file @p file_name
then the code stops with an appropriate message.
@param[in] file_name name of the ASCII-file
@param[in] v C++ vector with double values
*/
void write_vector_to_file(const std::string& file_name, const std::vector<double>& v);
/**
Fills the double-vector @p v with data from an input stream @p istr until this input stream
ends regularily. The vector is cleared and its memory is automatically allocated.
@param[in] istr input stream
@param[out] v C++ vector with double values
@warning An exception is thrown in case of wrong data format or corrupted data.
*/
void fill_vector(std::istream& istr, std::vector<double>& v);
#endif // FILE_IO_H_INCLUDED

View file

BIN
sheet5/2/file_io.o Normal file

Binary file not shown.

BIN
sheet5/2/main.GCC_ Executable file

Binary file not shown.

129
sheet5/2/main.cpp Normal file
View file

@ -0,0 +1,129 @@
#include "file_io.h"
#include "means.h"
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cmath>
#include <execution>
#include <omp.h>
using namespace std;
int main()
{
cout << "File einlesen." << endl;
const string name("data_1.txt"); // name of input file
const string name2("out_1.txt"); // name of output file
vector<short> a; //-2^15 to 2^15-1 fits the values from the file
double min, max, ar, ge, ha, std;
read_vector_from_file(name, a);
const unsigned size = a.size();
double t_omp_start = omp_get_wtime();
min = a[0];
max = a[0];
#pragma omp parallel for reduction(min:min) reduction(max:max)
for (int i = 0; i < static_cast<int>(size); ++i) {
if (a[i] < min)
{
min = a[i];
}
if (a[i] > max) {
max = a[i];
}
}
means_vector(a, ar, ge, ha);
std = 0.0;
#pragma omp parallel for reduction(+:std)
for(unsigned int i = 0; i < size; i++)
{
std += pow(a.at(i)-ar,2);
}
std = sqrt(std/size);
double t_omp_end = omp_get_wtime();
double time_omp = t_omp_end - t_omp_start;
cout << "min: " << min << ", max: " << max << endl;
cout << "Arithmetic mean: " << ar
<< ", geometric mean: " << ge
<< ", harmonic mean: " << ha << endl;
cout << "std: " << std << endl;
vector<double> results = {min, max, ar, ge, ha, std};
write_vector_to_file(name2, results);
//C++17 execution policies
double min_ep = 0.0, max_ep = 0.0, ar_ep = 0.0, ge_ep = 0.0, ha_ep = 0.0, std_ep = 0.0;
double t_ep_start = omp_get_wtime();
auto p = minmax_element(execution::par, a.begin(), a.end());
min_ep = *p.first;
max_ep = *p.second;
double sum_ep = reduce(
execution::par,
a.begin(), a.end(),
0.0
);
ar_ep = sum_ep / size;
ge_ep = transform_reduce(
execution::par,
a.begin(), a.end(),
1.0,
multiplies<>(),
[size](short x){
return pow((double)x, 1.0 / size);
}
);
double sum_inv_ep = transform_reduce(
execution::par,
a.begin(), a.end(),
0.0,
plus<>(),
[](short x){ return 1.0 / (double)x; }
);
ha_ep = size / sum_inv_ep;
double sum_sq_ep = transform_reduce(
execution::par,
a.begin(), a.end(),
0.0,
plus<>(),
[ar_ep](short x){
double d = x - ar_ep;
return d * d;
}
);
std_ep = sqrt(sum_sq_ep / size);
double t_ep_end = omp_get_wtime();
double time_ep = t_ep_end - t_ep_start;
cout << "min: " << min_ep << ", max: " << max_ep << endl;
cout << "Arithmetic mean: " << ar_ep
<< ", geometric mean: " << ge_ep
<< ", harmonic mean: " << ha_ep << endl;
cout << "std: " << std_ep << endl;
cout << "OpenMP per run: " << (time_omp) << endl;
cout << "Execution policy per run: " << (time_ep) << endl;
return 0;
}

View file

BIN
sheet5/2/main.o Normal file

Binary file not shown.

37
sheet5/2/means.cpp Normal file
View file

@ -0,0 +1,37 @@
#include <iostream>
#include <cmath>
#include <vector>
#include <omp.h>
using namespace std;
void means(int a, int b, int c, double &ar, double &ge, double &ha) {
ar = (a+b+c) / 3.0;
ge = pow(a,1.0/3.0) * pow(b,1.0/3.0) * pow(c,1.0/3.0); //do it instead of pow(a*b*c,1.0/3.0) to prevent integer overflow
ha = 3.0 / (1.0/a +1.0/b +1.0/c);
}
void means_vector(const vector<short> &input, double &ar, double &ge, double &ha) {
int size = input.size();
if (size == 0) {
cout << "Empty input" << endl;
return;
}
ar = 0;
ge = 1;
ha = 0;
#pragma omp parallel for reduction(+:ar,ha) reduction(*:ge)
for (int i = 0; i < size; i++) {
ar += input.at(i);
ge *= pow(input.at(i), 1.0 / size);
ha += 1.0 / input.at(i);
}
ar /= size;
ha = size / ha;
}

View file

9
sheet5/2/means.h Normal file
View file

@ -0,0 +1,9 @@
#ifndef MEANS_H_INCLUDED
#define MEANS_H_INCLUDED
#include <vector>
void means(int a, int b, int c, double &ar, double &ge, double &ha);
void means_vector(const std::vector<short> &input, double &ar, double &ge, double &ha);
#endif // MEANS_H_INCLUDED

View file

BIN
sheet5/2/means.o Normal file

Binary file not shown.

6
sheet5/2/out_1.txt Normal file
View file

@ -0,0 +1,6 @@
1
1000
498.184
364.412
95.6857
287.905

View file

16
sheet5/3/.vscode/c_cpp_properties.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [],
"compilerPath": "/usr/bin/gcc",
"cStandard": "c17",
"cppStandard": "gnu++17",
"intelliSenseMode": "linux-gcc-x64"
}
],
"version": 4
}

5
sheet5/3/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,5 @@
{
"files.associations": {
"ostream": "cpp"
}
}

View file

30
sheet5/3/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

BIN
sheet5/3/main.GCC_ Executable file

Binary file not shown.

77
sheet5/3/main.cpp Normal file
View file

@ -0,0 +1,77 @@
#include <iostream>
#include "mayer_primes.h"
#include <vector>
#include <algorithm>
#include "timing.h"
#include "omp.h"
using namespace std;
vector<unsigned int> count_goldbach(unsigned int n)
{
vector<unsigned int> primes = get_primes(n);
size_t npr = primes.size();
int nthreads = omp_get_max_threads();
vector<vector<unsigned int>> local_counts(nthreads, vector<unsigned int>(n+1, 0));
#pragma omp parallel
{
int tid = omp_get_thread_num();
auto &lc = local_counts[tid];
#pragma omp for schedule(static)
for (int j = 0; j < (int)npr; ++j)
{
unsigned int p = primes[j];
for (unsigned int i = j;i < primes.size() && p + primes[i] <= n; ++i)
{
unsigned int q = primes[i];
unsigned int s = p + q;
lc[s] +=1;
}
}
}
// combine
vector<unsigned int> counts(n+1, 0);
for (int t = 0; t < nthreads; ++t)
for (unsigned int k = 0; k <= n; ++k)
counts[k] += local_counts[t][k];
return counts;
}
int main()
{
//3
vector<unsigned int> counts = count_goldbach(100000);
cout << (max_element(counts.begin(), counts.end()) - counts.begin()) << endl;
//4
vector<unsigned int> nvalues = {10000, 100000, 400000, 1000000, 2000000};
double time;
unsigned int n;
for(unsigned int i = 0; i< nvalues.size(); i++)
{
n = nvalues.at(i);
tic();
count_goldbach(n);
time = toc();
cout << "Time for n=" << n << ": " << time << endl;
}
return 0;
}

View file

BIN
sheet5/3/main.o Normal file

Binary file not shown.

73
sheet5/3/mayer_primes.h Normal file
View file

@ -0,0 +1,73 @@
#pragma once
#include <cstring> //memset
#include <vector>
//using namespace std;
/** \brief Determines all prime numbers in interval [2, @p max].
*
* The sieve of Eratosthenes is used.
*
* The implementation originates from <a href="http://code.activestate.com/recipes/576559-fast-prime-generator/">Florian Mayer</a>.
*
* \param[in] max end of interval for the prime number search.
* \return vector of prime numbers @f$2,3,5, ..., p<=max @f$.
*
* \copyright
* Copyright (c) 2008 Florian Mayer (adapted by Gundolf Haase 2018)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
template <class T>
std::vector<T> get_primes(T max)
{
std::vector<T> primes;
char *sieve;
sieve = new char[max / 8 + 1];
// Fill sieve with 1
memset(sieve, 0xFF, (max / 8 + 1) * sizeof(char));
for (T x = 2; x <= max; x++)
{
if (sieve[x / 8] & (0x01 << (x % 8))) {
primes.push_back(x);
// Is prime. Mark multiplicates.
for (T j = 2 * x; j <= max; j += x)
{
sieve[j / 8] &= ~(0x01 << (j % 8));
}
}
}
delete[] sieve;
return primes;
}
//---------------------------------------------------------------
//int main() // by Florian Mayer
//{g++ -O3 -std=c++14 -fopenmp main.cpp && ./a.out
// vector<unsigned long> primes;
// primes = get_primes(10000000);
// // return 0;
// // Print out result.
// vector<unsigned long>::iterator it;
// for(it=primes.begin(); it < primes.end(); it++)
// cout << *it << " ";
//
// cout << endl;
// return 0;
//}

View file

@ -0,0 +1,3 @@
[ZoneTransfer]
ZoneId=3
HostUrl=https://imsc.uni-graz.at/haasegu/Lectures/Math2CPP/Examples/goldbach/mayer_primes.h

51
sheet5/3/timing.h Normal file
View file

@ -0,0 +1,51 @@
//
// Gundolf Haase, Oct 18 2024
//
#pragma once
#include <chrono> // timing
#include <stack>
//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
using Clock = std::chrono::high_resolution_clock;
using TPoint= std::chrono::time_point<Clock>;
// [Galowicz, C++17 STL Cookbook, p. 29]
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
/** Starts stopwatch timer.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
*
* The timining can be nested and the recent time point is stored on top of the stack.
*
* @return recent time point
* @see toc
*/
auto tic()
{
MyStopWatch.push(Clock::now());
return MyStopWatch.top();
}
/** Returns the elapsed time from stopwatch.
*
* The time point from top of the stack is used
* if time point @p t_b is not passed as input parameter.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
* The last option is to be used in the case of
* non-nested but overlapping time measurements.
*
* @param[in] t_b start time of some stop watch
* @return elapsed time in seconds.
*
*/
double toc(TPoint const &t_b = MyStopWatch.top())
{
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
using Unit = std::chrono::seconds;
using FpSeconds = std::chrono::duration<double, Unit::period>;
auto t_e = Clock::now();
MyStopWatch.pop();
return FpSeconds(t_e-t_b).count();
}

View file

6
sheet5/4/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,6 @@
{
"files.associations": {
"ostream": "cpp",
"iostream": "cpp"
}
}

View file

2563
sheet5/4/Doxyfile Normal file

File diff suppressed because it is too large Load diff

View file

30
sheet5/4/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp mylib.cpp benchmark.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

BIN
sheet5/4/bench Executable file

Binary file not shown.

115
sheet5/4/benchmark.cpp Normal file
View file

@ -0,0 +1,115 @@
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
// Inner product
double benchmark_A(const vector<double> &x, const vector<double> &y)
{
double sum = 0.0;
#pragma omp parallel for reduction(+:sum)
for (unsigned int i = 0; i < x.size(); i++)
{
sum += x[i]*y[i];
}
return sum;
}
// Inner product
double benchmark_A_sum(const vector<double> &x)
{
double sum = 0.0;
#pragma omp parallel for reduction(+:sum)
for (unsigned int i = 0; i < x.size(); i++)
{
sum += x[i];
}
return sum;
}
//Matrix-vector product
vector<double> benchmark_B(const vector<double> &A, const vector<double> &x)
{
unsigned int N = x.size();
unsigned int M = A.size() / N;
vector<double> b(M, 0.0);
#pragma omp parallel for
for (unsigned int i = 0; i < M; i++)
{
double bi = 0.0;
for (unsigned int j = 0; j < N; j++)
{
bi += A[i*N+j]*x[j];
}
b[i] = bi;
}
return b;
}
//Matrix-Matrix product
vector<double> benchmark_C(const vector<double> &A, const vector<double> &B, unsigned int M)
{
unsigned int L = A.size()/M;
unsigned int N = B.size()/L;
vector<double> C(M*N,0.0);
#pragma omp parallel for collapse(2)
for (unsigned int i = 0; i < M; i++)
{
for (unsigned int j = 0; j < N; j++)
{
double sum = 0.0;
for (unsigned int k = 0; k < L; k++)
{
sum += A[i*L+k]*B[k*N+j];
}
C[i*N+j] = sum;
}
}
return C;
}
//polynomial evaluation
vector<double> benchmark_D(const vector<double>& coeff, const vector<double>& x)
{
unsigned int p = coeff.size(); // p coefficients, degree p-1
unsigned int N = x.size();
vector<double> y(N);
#pragma omp parallel for
for (unsigned int i = 0; i < N; i++){
double yi = coeff[p-1];
double xi = x[i];
for(int j=p-2; j>=0; --j)
{
yi = yi*xi+coeff[j];
}
y[i] = yi;
}
return y;
}
double benchmark_A_old(const vector<double> &x, const vector<double> &y)
{
double sum = 0.0;
for (unsigned int i = 0; i < x.size(); i++)
{
sum += x[i]*y[i];
}
return sum;
}
double benchmark_A_sum_old(const vector<double> &x)
{
double sum = 0.0;
for (unsigned int i = 0; i < x.size(); i++)
{
sum += x[i];
}
return sum;
}

View file

27
sheet5/4/benchmark.h Normal file
View file

@ -0,0 +1,27 @@
#ifndef BENCHMARK_H
#define BENCHMARK_H
#include <vector>
using namespace std;
double benchmark_A(const vector<double> &x,
const vector<double> &y);
double benchmark_A_sum(const vector<double> &x);
vector<double> benchmark_B(const vector<double> &A,
const vector<double> &x);
vector<double> benchmark_C(const vector<double> &A,
const vector<double> &B,
unsigned int M);
vector<double> benchmark_D(const vector<double> &coefficients,
const vector<double> &x);
double benchmark_A_old(const vector<double> &x,
const vector<double> &y);
double benchmark_A_sum_old(const vector<double> &x);
#endif

View file

BIN
sheet5/4/benchmark.o Normal file

Binary file not shown.

BIN
sheet5/4/main.GCC_ Executable file

Binary file not shown.

234
sheet5/4/main.cpp Normal file
View file

@ -0,0 +1,234 @@
#include "mylib.h"
#include <cassert>
#include <chrono> // timing
#include <cmath> // sqrt()
#include <cstdlib> // atoi()
#include <cstring> // strncmp()
#include <ctime>
#include <iostream>
#include <sstream>
#include "benchmark.h"
#include "omp.h"
using namespace std;
using namespace std::chrono; // timing
int main(int argc, char **argv)
{
const unsigned int NA = 1400000;
const unsigned int NLOOPSA = 2000;
//const unsigned int NLOOPS = 10;
const unsigned int MC = 1000;
int const NLOOPSC = 5;
// ---------- Benchmark A ----------
{
vector<double> xA(NA), yA(NA);
for (unsigned int i = 0; i < NA; ++i)
{
double xi= (i % 219) + 1;
xA[i] = xi;
yA[i] = 1.0 / xi;
}
auto tA1 = system_clock::now();
double sA = 0.0, sumA = 0.0;
for (unsigned int loop = 0; loop < NLOOPSA; ++loop)
{
sA = benchmark_A(xA, yA);
sumA += sA;
}
auto tA2 = system_clock::now();
auto durA = duration_cast<microseconds>(tA2 - tA1);
double tA = static_cast<double>(durA.count()) / 1e6 / NLOOPSA; //duration per loop seconds
cout << "\n===== Benchmark A =====\n";
cout << "<xA,yA> = " << sA << endl;
cout << "Timing in sec. : " << tA << endl;
cout << "GFLOPS : " << 2.0 * NA / tA / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : "
<< 2.0 * NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
}
// ---------- Benchmark B----------
{
const unsigned int MB = 1700;
const unsigned int NB = MB;
const unsigned int NLOOPSB = 200;//50;
vector<double> AB(MB * NB);
vector<double> xB(NB);
for (unsigned int i = 0; i < MB; ++i)
for (unsigned int j = 0; j < NB; ++j)
AB[i * NB + j] = (i+j) %219 +1;
for (unsigned int j = 0; j < NB; ++j)
{
xB[j] = 1.0 / AB[17*NB+j];
}
vector<double> bB;
auto tB1 = system_clock::now();
double guardB = 0.0;
for (unsigned int loop = 0; loop < NLOOPSB; ++loop)
{
bB = benchmark_B(AB, xB);
guardB += bB[17];
}
auto tB2 = system_clock::now();
auto durB = duration_cast<microseconds>(tB2 - tB1);
double tB = static_cast<double>(durB.count()) / 1e6 / NLOOPSB;
double flopsB = 2.0 * MB * NB;
double bytesB = (MB * NB + NB + MB) * sizeof(double);
cout << "\n===== Benchmark B =====\n";
cout << guardB << endl;
cout << "bytes: " << bytesB << endl;
cout << "Timing in sec. : " << tB << endl;
cout << "GFLOPS : " << flopsB / tB / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << bytesB / tB / 1024 / 1024 / 1024 << endl;
}
// ---------- Benchmark C ----------
{
const unsigned int LC = MC;
const unsigned int NC = MC;
vector<double> AC(MC * LC), BC(LC * NC);
for (unsigned int i = 0; i < MC; ++i)
for (unsigned int j = 0; j < LC; ++j)
AC[i * LC + j] = (i+j) %219 +1;
for (unsigned int i = 0; i < LC; ++i)
for (unsigned int j = 0; j < NC; ++j)
BC[i * NC + j] = (i+j) %219 +1;
vector<double> CC;
auto tC1 = system_clock::now();
double guardC = 0.0;
for (unsigned int loop = 0; loop < NLOOPSC; ++loop)
{
CC = benchmark_C(AC, BC, MC);
guardC += CC[0];
}
auto tC2 = system_clock::now();
auto durC = duration_cast<microseconds>(tC2 - tC1);
double tC = static_cast<double>(durC.count()) / 1e6 / NLOOPSC;
double flopsC = 2.0 * MC * LC * NC;
double bytesC = (MC * LC + LC * NC + MC * NC)* sizeof(double);
cout << "\n===== Benchmark C =====\n";
cout << guardC << endl;
cout << "bytes: " << bytesC << endl;
cout << "Timing in sec. : " << tC << endl;
cout << "GFLOPS : " << flopsC / tC / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << bytesC / tC / 1024 / 1024 / 1024 << endl;
}
// ---------- Benchmark D----------
{
const unsigned int ND = 2000000;
const unsigned int p = 14; // degree p-1 = 15
const unsigned int NLOOPSD = 100;
vector<double> coeff(p, 0.0);
vector<double> xD(ND);
for (unsigned int k = 0; k < p; ++k)
coeff[k] = k%219+1;
for (unsigned int i = 0; i < ND; ++i)
xD[i] = i%219+1;
vector<double> yD;
auto tD1 = system_clock::now();
double guardD = 0.0;
for (unsigned int loop = 0; loop < NLOOPSD; ++loop)
{
yD = benchmark_D(coeff, xD);
guardD += yD[0];
}
auto tD2 = system_clock::now();
auto durD = duration_cast<microseconds>(tD2 - tD1);
double tD = static_cast<double>(durD.count()) / 1e6 / NLOOPSD;
double flopsD = ND * 2 * p;
double bytesD = (p + 2 * ND)*sizeof(double);
cout << "\n===== Benchmark D =====\n";
cout << guardD << endl;
cout << "bytes: " << bytesD << endl;
cout << "Timing in sec. : " << tD << endl;
cout << "GFLOPS : " << flopsD / tD / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << bytesD / tD / 1024 / 1024 / 1024 << endl;
}
for (int k = 3; k <= 8; ++k)
{
size_t n = (size_t)pow(10, k);
vector<double> x(n), y(n);
for (unsigned int i = 0; i < n; ++i)
{
double xi= (i % 219) + 1;
x[i] = xi;
y[i] = 1.0 / xi;
}
// ---- SUM benchmark (sequential) ----
double t0 = omp_get_wtime();
double s1 = benchmark_A_sum_old(x);
double t_sum_seq = omp_get_wtime() - t0;
// ---- SUM benchmark (parallel) ----
t0 = omp_get_wtime();
double s2 = benchmark_A_sum(x);
double t_sum_omp = omp_get_wtime() - t0;
double sum_speedup = t_sum_seq / t_sum_omp;
// ---- INNER PRODUCT benchmark (sequential) ----
t0 = omp_get_wtime();
double ip1 = benchmark_A_old(x, y);
double t_inner_seq = omp_get_wtime() - t0;
// ---- INNER PRODUCT benchmark (parallel) ----
t0 = omp_get_wtime();
double ip2 = benchmark_A(x, y);
double t_inner_omp = omp_get_wtime() - t0;
double inner_speedup = t_inner_seq / t_inner_omp;
// ---- Print results ----
cout << k << endl;
cout << t_sum_seq << ", " << t_sum_omp << ", " << sum_speedup << endl;
cout << t_inner_seq << ", " << t_inner_omp << ", " << inner_speedup << endl;
cout << endl;
}
return 0;
}

View file

BIN
sheet5/4/main.o Normal file

Binary file not shown.

65
sheet5/4/mylib.cpp Normal file
View file

@ -0,0 +1,65 @@
#include "mylib.h"
#include <cassert> // assert()
#include <cmath>
#include <vector>
#ifdef __INTEL_CLANG_COMPILER
#pragma message(" ########## Use of MKL ###############")
#include <mkl.h>
#else
#pragma message(" ########## Use of CBLAS ###############")
//extern "C"
//{
#include <cblas.h> // cBLAS Library
#include <lapacke.h> // Lapack
//}
#endif
using namespace std;
double scalar(vector<double> const &x, vector<double> const &y)
{
assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
size_t const N = x.size();
double sum = 0.0;
for (size_t i = 0; i < N; ++i)
{
sum += x[i] * y[i];
//sum += exp(x[i])*log(y[i]);
}
return sum;
}
double scalar_cblas(vector<double> const &x, vector<double> const &y)
{
int const asize = static_cast<int>(size(x));
int const bsize = static_cast<int>(size(y));
assert(asize == bsize); // switch off via compile flag: -DNDEBUG
return cblas_ddot(asize,x.data(),1,y.data(),1);
//assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
//return cblas_ddot(x.size(),x.data(),1,y.data(),1);
}
float scalar_cblas(vector<float> const &x, vector<float> const &y)
{
int const asize = static_cast<int>(size(x));
int const bsize = static_cast<int>(size(y));
assert(asize == bsize); // switch off via compile flag: -DNDEBUG
return cblas_sdot(asize,x.data(),1,y.data(),1);
//assert(x.size() == y.size()); // switch off via compile flag: -DNDEBUG
//return cblas_ddot(x.size(),x.data(),1,y.data(),1);
}
double norm(vector<double> const &x)
{
size_t const N = x.size();
double sum = 0.0;
for (size_t i = 0; i < N; ++i)
{
sum += x[i] * x[i];
}
return std::sqrt(sum);
}

View file

30
sheet5/4/mylib.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef FILE_MYLIB
#define FILE_MYLIB
#include <vector>
/** Inner product
@param[in] x vector
@param[in] y vector
@return resulting Euclidian inner product <x,y>
*/
double scalar(std::vector<double> const &x, std::vector<double> const &y);
/** Inner product using BLAS routines
@param[in] x vector
@param[in] y vector
@return resulting Euclidian inner product <x,y>
*/
double scalar_cblas(std::vector<double> const &x, std::vector<double> const &y);
float scalar_cblas(std::vector<float> const &x, std::vector<float> const &y);
/** L_2 Norm of a vector
@param[in] x vector
@return resulting Euclidian norm <x,y>
*/
double norm(std::vector<double> const &x);
#endif

View file

BIN
sheet5/4/mylib.o Normal file

Binary file not shown.

51
sheet5/4/output.txt Normal file
View file

@ -0,0 +1,51 @@
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o main.o main.cpp
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o mylib.o mylib.cpp
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o benchmark.o benchmark.cpp
g++ main.o mylib.o benchmark.o -g -O0 -llapack -lblas -flto -o main.GCC_
./main.GCC_
===== Benchmark A =====
<xA,yA> = 1.4e+06
Timing in sec. : 0.00893637
GFLOPS : 0.291808
GiByte/s : 2.33446
===== Benchmark B =====
340000
bytes: 2.31472e+07
Timing in sec. : 0.0133897
GFLOPS : 0.402029
GiByte/s : 1.61001
===== Benchmark C =====
7.37196e+07
bytes: 2.4e+07
Timing in sec. : 8.67235
GFLOPS : 0.21478
GiByte/s : 0.00257736
===== Benchmark D =====
10500
bytes: 3.20001e+07
Timing in sec. : 0.101087
GFLOPS : 0.515935
GiByte/s : 0.294821
===== Benchmark 5A =====
NORM = 150114
Timing in sec. : 0.00703533
GFLOPS : 0.370658
GiByte/s : 1.48263
===== Benchmark 5B =====
<xA,yA> = 1.4e+06
Timing in sec. : 0.0108377
GFLOPS : 0.601533
GiByte/s : 1.92491
===== Benchmark 5C =====
7.37196e+07
bytes: 2.4e+07
Timing in sec. : 15.2407
GFLOPS : 0.122215
GiByte/s : 0.00146658

View file

1826
sheet5/4/small_Doxyfile Normal file

File diff suppressed because it is too large Load diff

View file

2877
sheet5/5/Doxyfile Normal file

File diff suppressed because it is too large Load diff

View file

54
sheet5/5/Makefile Normal file
View file

@ -0,0 +1,54 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# COMPILER=GCC_SEQ_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
MAIN = main
SOURCES = ${MAIN}.cpp vdop.cpp geom.cpp\
getmatrix.cpp jacsolve.cpp userset.cpp
# dexx.cpp debugd.cpp skalar.cpp vecaccu.cpp accudiag.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = ${MAIN}.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
# -pg slows down the code on my laptop when using CLANG_
#LINKFLAGS += -pg
#CXXFLAGS += -Q --help=optimizers
#CXXFLAGS += -fopt-info
include ../${COMPILER}default.mk
#############################################################################
# additional specific cleaning in this directory
clean_all::
@rm -f t.dat*
#############################################################################
# special testing
# NPROCS = 4
#
TFILE = t.dat
# TTMP = t.tmp
#
graph: $(PROGRAM)
# @rm -f $(TFILE).*
# next two lines only sequentially
./$(PROGRAM)
@mv $(TFILE).000 $(TFILE)
# $(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
# @echo " "; echo "Manipulate data for graphics."; echo " "
# @cat $(TFILE).* > $(TTMP)
# @sort -b -k 2 $(TTMP) -o $(TTMP).1
# @sort -b -k 1 $(TTMP).1 -o $(TTMP).2
# @awk -f nl.awk $(TTMP).2 > $(TFILE)
# @rm -f $(TTMP).* $(TTMP) $(TFILE).*
#
-gnuplot jac.dem

View file

5
sheet5/5/ToDo.txt Normal file
View file

@ -0,0 +1,5 @@
// Jan 15, 2019
geom.h:75 void SetValues(std::vector<double> &v) const; // GH: TODO with functor
Set vector values using a functor ff(x,y).
See solution in Progs/cds

View file

View file

@ -0,0 +1,43 @@
function [ xc, ia, v ] = ascii_read_meshvector( fname )
%
% Loads the 2D triangular mesh (coordinates, vertex connectivity)
% together with values on its vertices from an ASCII file.
% Matlab indexing is stored (starts with 1).
%
% The input file format is compatible
% with Mesh_2d_3_matlab:Write_ascii_matlab(..) in jacobi_oo_stl/geom.h
%
%
% IN: fname - filename
% OUT: xc - coordinates
% ia - mesh connectivity
% v - solution vector
DELIMETER = ' ';
fprintf('Read file %s\n',fname)
% Read mesh constants
nn = dlmread(fname,DELIMETER,[0 0 0 3]); %% row_1, col_1, row_2, col_2 in C indexing!!!
nnode = nn(1);
ndim = nn(2);
nelem = nn(3);
nvert = nn(4);
% Read coordinates
row_start = 0+1;
row_end = 0+nnode;
xc = dlmread(fname,DELIMETER,[row_start 0 row_end ndim-1]);
% Read connectivity
row_start = row_end+1;
row_end = row_end+nelem;
ia = dlmread(fname,DELIMETER,[row_start 0 row_end nvert-1]);
% Read solution
row_start = row_end+1;
row_end = row_end+nnode;
v = dlmread(fname,DELIMETER,[row_start 0 row_end 0]);
end

View file

@ -0,0 +1,49 @@
function ascii_write_mesh( xc, ia, e, basename)
%
% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
% in an ASCII file.
% Matlab indexing is stored (starts with 1).
%
% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
%
% IN:
% coordinates xc: [2][nnode]
% connectivity ia: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
% basename: file name without extension
%
% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
%
fname = [basename, '.txt'];
nnode = int32(size(xc,2));
ndim = int32(size(xc,1));
nelem = int32(size(ia,2));
nvert_e = int32(3);
dlmwrite(fname,nnode,'delimiter','\t','precision',16) % number of nodes
dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16) % space dimension
dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16) % number of elements
dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16) % number of vertices per element
% dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16) % coordinates
dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
% no subdomain info transferred
tmp=int32(ia(1:3,:));
% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
% store only start and end point of boundary edges,
nbedges = size(e,2);
dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16) % number boundary edges
tmp=int32(e(1:2,:));
% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
end

522
sheet5/5/geom.cpp Normal file
View file

@ -0,0 +1,522 @@
// see: http://llvm.org/docs/CodingStandards.html#include-style
#include "geom.h"
#include <algorithm>
#include <cassert>
#include <fstream>
#include <iostream>
#include <list>
#include <string>
#include <vector>
using namespace std;
Mesh::Mesh(int ndim, int nvert_e, int ndof_e)
: _nelem(0), _nvert_e(nvert_e), _ndof_e(ndof_e), _nnode(0), _ndim(ndim), _ia(0), _xc(0)
{
}
Mesh::~Mesh()
{}
void Mesh::SetValues(std::vector<double> &v, const std::function<double(double, double)> &func) const
{
int const nnode = Nnodes(); // number of vertices in mesh
assert( nnode == static_cast<int>(v.size()) );
for (int k = 0; k < nnode; ++k)
{
v[k] = func( _xc[2 * k], _xc[2 * k + 1] );
}
}
void Mesh::Debug() const
{
cout << "\n ############### Debug M E S H ###################\n";
cout << "\n ............... Coordinates ...................\n";
for (int k = 0; k < _nnode; ++k)
{
cout << k << " : " ;
for (int i = 0; i < _ndof_e; ++i )
{
cout << _xc[2*k+i] << " ";
}
cout << endl;
}
cout << "\n ............... Elements ...................\n";
for (int k = 0; k < _nelem; ++k)
{
cout << k << " : ";
for (int i = 0; i < _ndof_e; ++i )
cout << _ia[_ndof_e * k + i] << " ";
cout << endl;
}
return;
}
void Mesh::Write_ascii_matlab(std::string const &fname, std::vector<double> const &v) const
{
assert(Nnodes() == static_cast<int>(v.size())); // fits vector length to mesh information?
ofstream fout(fname); // open file ASCII mode
if ( !fout.is_open() )
{
cout << "\nFile " << fname << " has not been opened.\n\n" ;
assert( fout.is_open() && "File not opened." );
}
string const DELIMETER(" "); // define the same delimeter as in matlab/ascii_read*.m
int const OFFSET(1); // convert C-indexing to matlab
// Write data: #nodes, #space dimensions, #elements, #vertices per element
fout << Nnodes() << DELIMETER << Ndims() << DELIMETER << Nelems() << DELIMETER << NverticesElements() << endl;
// Write cordinates: x_k, y_k in seperate lines
assert( Nnodes()*Ndims() == static_cast<int>(_xc.size()));
for (int k = 0, kj = 0; k < Nnodes(); ++k)
{
for (int j = 0; j < Ndims(); ++j, ++kj)
{
fout << _xc[kj] << DELIMETER;
}
fout << endl;
}
// Write connectivity: ia_k,0, ia_k,1 etc in seperate lines
assert( Nelems()*NverticesElements() == static_cast<int>(_ia.size()));
for (int k = 0, kj = 0; k < Nelems(); ++k)
{
for (int j = 0; j < NverticesElements(); ++j, ++kj)
{
fout << _ia[kj] + OFFSET << DELIMETER; // C to matlab
}
fout << endl;
}
// Write vector
for (int k = 0; k < Nnodes(); ++k)
{
fout << v[k] << endl;
}
fout.close();
return;
}
void Mesh::Visualize(std::vector<double> const &v) const
{
// define external command
const string exec_m("matlab -nosplash < visualize_results.m"); // Matlab
//const string exec_m("octave --no-window-system --no-gui visualize_results.m"); // Octave
//const string exec_m("flatpak run org.octave.Octave visualize_results.m"); // Octave (flatpak): desktop GH
const string fname("uv.txt");
Write_ascii_matlab(fname, v);
int ierror = system(exec_m.c_str()); // call external command
if (ierror != 0)
{
cout << endl << "Check path to Matlab/octave on your system" << endl;
}
cout << endl;
return;
}
// #####################################################################
Mesh_2d_3_square::Mesh_2d_3_square(int nx, int ny, int myid, int procx, int procy)
: Mesh(2, 3, 3), // two dimensions, 3 vertices, 3 dofs
_myid(myid), _procx(procx), _procy(procy), _neigh{{-1, -1, -1, -1}}, _color(0),
_xl(0.0), _xr(1.0), _yb(0.0), _yt(1.0), _nx(nx), _ny(ny)
{
//void IniGeom(int const myid, int const procx, int const procy, int neigh[], int &color)
int const ky = _myid / _procx;
int const kx = _myid % _procy; // MOD(myid,procx)
// Determine the neighbors of domain/rank myid
_neigh[0] = (ky == 0) ? -1 : _myid - _procx; // South
_neigh[1] = (kx == _procx - 1) ? -1 : _myid + 1; // East
_neigh[2] = (ky == _procy - 1) ? -1 : _myid + _procx; // North
_neigh[3] = (kx == 0) ? -1 : _myid - 1; // West
_color = (kx + ky) & 1 ;
// subdomain is part of unit square
double const hx = 1. / _procx;
double const hy = 1. / _procy;
_xl = kx * hx; // left
_xr = (kx + 1) * hx; // right
_yb = ky * hy; // bottom
_yt = (ky + 1) * hy; // top
// Calculate coordinates
int const nnode = (_nx + 1) * (_ny + 1); // number of nodes
Resize_Coords(nnode, 2); // coordinates in 2D [nnode][ndim]
GetCoordsInRectangle(_nx, _ny, _xl, _xr, _yb, _yt, GetCoords().data());
// Calculate element connectivity (linear triangles)
int const nelem = 2 * _nx * _ny; // number of elements
Resize_Connectivity(nelem, 3); // connectivity matrix [nelem][3]
GetConnectivityInRectangle(_nx, _ny, GetConnectivity().data());
return;
}
void Mesh_2d_3_square::SetU(std::vector<double> &u) const
{
int dx = _nx + 1;
for (int j = 0; j <= _ny; ++j)
{
int k = j * dx;
for (int i = 0; i <= _nx; ++i, ++k)
{
u[k] = 0.0;
}
}
}
void Mesh_2d_3_square::SetF(std::vector<double> &f) const
{
int dx = _nx + 1;
for (int j = 0; j <= _ny; ++j)
{
int k = j * dx;
for (int i = 0; i <= _nx; ++i, ++k)
{
f[k] = 1.0;
}
}
}
std::vector<int> Mesh_2d_3_square::Index_DirichletNodes() const
{
int const dx = 1,
dy = _nx + 1,
bl = 0,
br = _nx,
tl = _ny * (_nx + 1),
tr = (_ny + 1) * (_nx + 1) - 1;
int const start[4] = { bl, br, tl, bl},
end[4] = { br, tr, tr, tl},
step[4] = { dx, dy, dx, dy};
vector<int> idx(0);
for (int j = 0; j < 4; j++)
{
if (_neigh[j] < 0)
{
for (int i = start[j]; i <= end[j]; i += step[j])
{
idx.push_back(i); // node i is Dirichlet node
}
}
}
// remove multiple elements
sort(idx.begin(), idx.end()); // sort
idx.erase( unique(idx.begin(), idx.end()), idx.end() ); // remove duplicate data
return idx;
}
void Mesh_2d_3_square::SaveVectorP(std::string const &name, vector<double> const &u) const
{
// construct the file name for subdomain myid
const string tmp( std::to_string(_myid / 100) + to_string((_myid % 100) / 10) + to_string(_myid % 10) );
const string namep = name + "." + tmp;
ofstream ff(namep.c_str());
ff.precision(6);
ff.setf(ios::fixed, ios::floatfield);
// assumes tensor product grid in unit square; rowise numbered (as generated in class constructor)
// output is provided for tensor product grid visualization ( similar to Matlab-surf() )
auto const &xc = GetCoords();
int k = 0;
for (int j = 0; j <= _ny; ++j)
{
for (int i = 0; i <= _nx; ++i, ++k)
ff << xc[2 * k + 0] << " " << xc[2 * k + 1] << " " << u[k] << endl;
ff << endl;
}
ff.close();
return;
}
void Mesh_2d_3_square::GetCoordsInRectangle(int const nx, int const ny,
double const xl, double const xr, double const yb, double const yt,
double xc[])
{
const double hx = (xr - xl) / nx,
hy = (yt - yb) / ny;
int k = 0;
for (int j = 0; j <= ny; ++j)
{
const double y0 = yb + j * hy;
for (int i = 0; i <= nx; ++i, k += 2)
{
xc[k ] = xl + i * hx;
xc[k + 1] = y0;
}
}
return;
}
void Mesh_2d_3_square::GetConnectivityInRectangle(int const nx, int const ny, int ia[])
{
const int dx = nx + 1;
int k = 0;
int l = 0;
for (int j = 0; j < ny; ++j, ++k)
{
for (int i = 0; i < nx; ++i, ++k)
{
ia[l ] = k;
ia[l + 1] = k + 1;
ia[l + 2] = k + dx + 1;
l += 3;
ia[l ] = k;
ia[l + 1] = k + dx;
ia[l + 2] = k + dx + 1;
l += 3;
}
}
return;
}
// #################### still some old code (--> MPI) ############################
// Copies the values of w corresponding to the boundary
// South (ib==1), East (ib==2), North (ib==3), West (ib==4)
void GetBound(int const ib, int const nx, int const ny, double const w[], double s[])
{
const int //dx = 1,
dy = nx + 1,
bl = 0,
br = nx,
tl = ny * (nx + 1),
tr = (ny + 1) * (nx + 1) - 1;
switch (ib)
{
case 1:
{
for (int i = bl, j = 0; i <= br; ++i, ++j)
s[j] = w[i];
break;
}
case 3:
{
for (int i = tl, j = 0; i <= tr; ++i, ++j)
s[j] = w[i];
break;
}
case 4:
{
for (int i = bl, j = 0; i <= tl; i += dy, ++j)
s[j] = w[i];
break;
}
case 2:
{
for (int i = br, j = 0; i <= tr; i += dy, ++j)
s[j] = w[i];
break;
}
default:
{
cout << endl << "Wrong parameter ib in " << __FILE__ << ":" << __LINE__ << endl;
}
}
return;
}
// ----------------------------------------------------------------------------------------------------------
// Computes w: = w + s at nodes on the boundary
// South (ib == 1), East (ib == 2), North (ib == 3), West (ib == 4)
void AddBound(int const ib, int const nx, int const ny, double w[], double const s[])
{
int const dy = nx + 1,
bl = 0,
br = nx,
tl = ny * (nx + 1),
tr = (ny + 1) * (nx + 1) - 1;
switch (ib)
{
case 1:
{
for (int i = bl, j = 0; i <= br; ++i, ++j)
w[i] += s[j];
break;
}
case 3:
{
for (int i = tl, j = 0; i <= tr; ++i, ++j)
w[i] += s[j];
break;
}
case 4:
{
for (int i = bl, j = 0; i <= tl; i += dy, ++j)
w[i] += s[j];
break;
}
case 2:
{
for (int i = br, j = 0; i <= tr; i += dy, ++j)
w[i] += s[j];
break;
}
default:
{
cout << endl << "Wrong parameter ib in " << __FILE__ << ":" << __LINE__ << endl;
}
}
return;
}
// ####################################################################
Mesh_2d_3_matlab::Mesh_2d_3_matlab(string const &fname)
: Mesh(2, 3, 3), // two dimensions, 3 vertices, 3 dofs
bedges(0)
{
ifstream ifs(fname);
if (!(ifs.is_open() && ifs.good()))
{
cerr << "Mesh_2d_3_matlab: Error cannot open file " << fname << endl;
assert(ifs.is_open());
}
int const OFFSET(1); // Matlab to C indexing
cout << "ASCI file " << fname << " opened" << endl;
// Read some mesh constants
int nnode, ndim, nelem, nvert_e;
ifs >> nnode >> ndim >> nelem >> nvert_e;
cout << nnode << " " << ndim << " " << nelem << " " << nvert_e << endl;
assert(ndim == 2 && nvert_e == 3);
// Allocate memory
Resize_Coords(nnode, ndim); // coordinates in 2D [nnode][ndim]
Resize_Connectivity(nelem, nvert_e); // connectivity matrix [nelem][nvert]
// Read ccordinates
auto &xc = GetCoords();
for (int k = 0; k < nnode * ndim; ++k)
{
ifs >> xc[k];
}
// Read connectivity
auto &ia = GetConnectivity();
for (int k = 0; k < nelem * nvert_e; ++k)
{
ifs >> ia[k];
ia[k] -= OFFSET; // Matlab to C indexing
}
// additional read of boundary information (only start/end point)
int nbedges;
ifs >> nbedges;
bedges.resize(nbedges * 2);
for (int k = 0; k < nbedges * 2; ++k)
{
ifs >> bedges[k];
bedges[k] -= OFFSET; // Matlab to C indexing
}
return;
}
// binary
//{
//ifstream ifs(fname, ios_base::in | ios_base::binary);
//if(!(ifs.is_open() && ifs.good()))
//{
//cerr << "ReadBinMatrix: Error cannot open file " << file << endl;
//assert(ifs.is_open());
//}
//cout << "ReadBinMatrix: file opened" << file << endl;
//}
// binaryIO.cpp
//void read_binMatrix(const string& file, vector<int> &cnt, vector<int> &col, vector<double> &ele)
//{
//ifstream ifs(file, ios_base::in | ios_base::binary);
//if(!(ifs.is_open() && ifs.good()))
//{
//cerr << "ReadBinMatrix: Error cannot open file " << file << endl;
//assert(ifs.is_open());
//}
//cout << "ReadBinMatrix: Opened file " << file << endl;
//int _size;
//ifs.read(reinterpret_cast<char*>(&_size), sizeof(int)); // old: ifs.read((char*)&_size, sizeof(int));
//cnt.resize(_size);
//cout << "ReadBinMatrix: cnt size: " << _size << endl;
//ifs.read((char*)&_size, sizeof(int));
//col.resize(_size);
//cout << "ReadBinMatrix: col size: " << _size << endl;
//ifs.read((char*)&_size, sizeof(int));
//ele.resize(_size);
//cout << "ReadBinMatrix: ele size: " << _size << endl;
//ifs.read((char*)cnt.data(), cnt.size() * sizeof(int));
//ifs.read((char*)col.data(), col.size() * sizeof(int));
//ifs.read((char*)ele.data(), ele.size() * sizeof(double));
//ifs.close();
//cout << "ReadBinMatrix: Finished reading matrix.." << endl;
//}
std::vector<int> Mesh_2d_3_matlab::Index_DirichletNodes() const
{
vector<int> idx(bedges); // copy
sort(idx.begin(), idx.end()); // sort
idx.erase( unique(idx.begin(), idx.end()), idx.end() ); // remove duplicate data
return idx;
}

View file

381
sheet5/5/geom.h Normal file
View file

@ -0,0 +1,381 @@
#ifndef GEOM_FILE
#define GEOM_FILE
#include <array>
#include <functional> // function; C++11
#include <string>
#include <vector>
/**
* Basis class for finite element meshes.
*/
class Mesh
{
public:
/**
* Constructor initializing the members with default values.
*
* @param[in] ndim space dimensions (dimension for coordinates)
* @param[in] nvert_e number of vertices per element (dimension for connectivity)
* @param[in] ndof_e degrees of freedom per element (= @p nvert_e for linear elements)
*/
explicit Mesh(int ndim, int nvert_e = 0, int ndof_e = 0);
/**
* Destructor.
*
* See clang warning on
* <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
*/
virtual ~Mesh();
/**
* Number of finite elements in (sub)domain.
* @return number of elements.
*/
int Nelems() const
{
return _nelem;
}
/**
* Global number of vertices for each finite element.
* @return number of vertices per element.
*/
int NverticesElements() const
{
return _nvert_e;
}
/**
* Global number of degrees of freedom (dof) for each finite element.
* @return degrees of freedom per element.
*/
int NdofsElement() const
{
return _ndof_e;
}
/**
* Number of vertices in mesh.
* @return number of vertices.
*/
int Nnodes() const
{
return _nnode;
}
/**
* Space dimension.
* @return number of dimensions.
*/
int Ndims() const
{
return _ndim;
}
/**
* (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
*
* @param[in] nelem number of elements
* @param[in] nvert_e number of vertices per element
*/
void Resize_Connectivity(int nelem, int nvert_e)
{
SetNelem(nelem); // number of elements
SetNverticesElement(nvert_e); // vertices per element
_ia.resize(nelem * nvert_e);
}
/**
* Read connectivity information (g1,g2,g3)_i.
* @return convectivity vector [nelems*ndofs].
*/
const std::vector<int> &GetConnectivity() const
{
return _ia;
}
/**
* Access/Change connectivity information (g1,g2,g3)_i.
* @return convectivity vector [nelems*ndofs].
*/
std::vector<int> &GetConnectivity()
{
return _ia;
}
/**
* (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
*
* @param[in] nnodes number of nodes
* @param[in] ndim space dimension
*/
void Resize_Coords(int nnodes, int ndim)
{
SetNnode(nnodes); // number of nodes
SetNdim(ndim); // space dimension
_xc.resize(nnodes * ndim);
}
/**
* Read coordinates of vertices (x,y)_i.
* @return coordinates vector [nnodes*2].
*/
const std::vector<double> &GetCoords() const
{
return _xc;
}
/**
* Access/Change coordinates of vertices (x,y)_i.
* @return coordinates vector [nnodes*2].
*/
std::vector<double> &GetCoords()
{
return _xc;
}
/**
* Calculate values in vector @p v via function @p func(x,y)
* @param[in] v vector
* @param[in] func function of (x,y) returning a double value.
*/
void SetValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
/**
* Prints the information for a finite element mesh
*/
void Debug() const;
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
virtual std::vector<int> Index_DirichletNodes() const = 0;
/**
* Write vector @p v toghether with its mesh information to an ASCii file @p fname.
*
* The data are written in C-style.
*
* @param[in] fname file name
* @param[in] v vector
*/
void Write_ascii_matlab(std::string const &fname, std::vector<double> const &v) const;
/**
* Visualize @p v together with its mesh information via matlab or octave.
*
* Comment/uncomment those code lines in method Mesh:Visualize (geom.cpp)
* that are supported on your system.
*
* @param[in] v vector
*
* @warning matlab files ascii_read_meshvector.m visualize_results.m
* must be in the executing directory.
*/
void Visualize(std::vector<double> const &v) const;
protected:
void SetNelem(int nelem)
{
_nelem = nelem;
}
void SetNverticesElement(int nvert)
{
_nvert_e = nvert;
}
void SetNdofsElement(int ndof)
{
_ndof_e = ndof;
}
void SetNnode(int nnode)
{
_nnode = nnode;
}
void SetNdim(int ndim)
{
_ndim = ndim;
}
private:
int _nelem; //!< number elements
int _nvert_e; //!< number of vertices per element
int _ndof_e; //!< degrees of freedom (d.o.f.) per element
int _nnode; //!< number nodes/vertices
int _ndim; //!< space dimension of the problem (1, 2, or 3)
std::vector<int> _ia; //!< element connectivity
std::vector<double> _xc; //!< coordinates
};
/**
* 2D finite element mesh of the square consiting of linear triangular elements.
*/
class Mesh_2d_3_square: public Mesh
{
public:
/**
* Generates the f.e. mesh for the unit square.
*
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[in] myid my MPI-rank / subdomain
* @param[in] procx number of ranks/subdomains in x-direction
* @param[in] procy number of processes in y-direction
*/
Mesh_2d_3_square(int nx, int ny, int myid = 0, int procx = 1, int procy = 1);
/**
* Destructor
*/
~Mesh_2d_3_square() override
{}
/**
* Set solution vector based on a tensor product grid in the rectangle.
* @param[in] u solution vector
*/
void SetU(std::vector<double> &u) const;
/**
* Set right hand side (rhs) vector on a tensor product grid in the rectangle.
* @param[in] f rhs vector
*/
void SetF(std::vector<double> &f) const;
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
std::vector<int> Index_DirichletNodes() const override;
/**
* Stores the values of vector @p u of (sub)domain into a file @p name for further processing in gnuplot.
* The file stores rowise the x- and y- coordinates together with the value from @p u .
* The domain [@p xl, @p xr] x [@p yb, @p yt] is discretized into @p nx x @p ny intervals.
*
* @param[in] name basename of file name (file name will be extended by the rank number)
* @param[in] u local vector
*
* @warning Assumes tensor product grid in unit square; rowise numbered
* (as generated in class constructor).
* The output is provided for tensor product grid visualization
* ( similar to Matlab-surf() ).
*
* @see Mesh_2d_3_square
*/
void SaveVectorP(std::string const &name, std::vector<double> const &u) const;
// here will still need to implement in the class
// GetBound(), AddBound()
// or better a generalized way with indices and their appropriate ranks for MPI communication
private:
/**
* Determines the coordinates of the dicretization nodes of the domain [@p xl, @p xr] x [@p yb, @p yt]
* which is discretized into @p nx x @p ny intervals.
*
* @param[in] ny number of discretization intervals in y-direction
* @param[in] xl x-coordinate of left boundary
* @param[in] xr x-coordinate of right boundary
* @param[in] yb y-coordinate of lower boundary
* @param[in] yt y-coordinate of upper boundary
* @param[out] xc coordinate vector of length 2n with x(2*k,2*k+1) as coodinates of node k
*/
void GetCoordsInRectangle(int nx, int ny, double xl, double xr, double yb, double yt,
double xc[]);
/**
* Determines the element connectivity of linear triangular elements of a FEM discretization
* of a rectangle using @p nx x @p ny equidistant intervals for discretization.
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[out] ia element connectivity matrix with ia(3*s,3*s+1,3*s+2) as node numbers od element s
*/
void GetConnectivityInRectangle(int nx, int ny, int ia[]);
private:
int _myid; //!< my MPI rank
int _procx; //!< number of MPI ranks in x-direction
int _procy; //!< number of MPI ranks in y-direction
std::array<int, 4> _neigh; //!< MPI ranks of neighbors (negative: no neighbor but b.c.)
int _color; //!< red/black coloring (checker board) of subdomains
double _xl; //!< x coordinate of lower left corner of square
double _xr; //!< x coordinate of lower right corner of square
double _yb; //!< y coordinate or lower left corner of square
double _yt; //!< y coordinate of upper right corner of square
int _nx; //!< number of intervals in x-direction
int _ny; //!< number of intervals in y-direction
};
// #################### still some old code (--> MPI) ############################
/**
* Copies the values of @p w corresponding to boundary @p ib
* onto vector s. South (ib==1), East (ib==2), North (ib==3), West (ib==4).
* The vector @p s has to be long enough!!
* @param[in] ib my local boundary
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[in] w vector for all nodes of local discretization
* @param[out] s short vector with values on boundary @p ib
*/
// GH_NOTE: Absicherung bei s !!
void GetBound(int ib, int nx, int ny, double const w[], double s[]);
/**
* Computes @p w := @p w + @p s at the interface/boundary nodes on the
* boundary @p ib . South (ib==1), East (ib==2), North (ib==3), West (ib==4)
* @param[in] ib my local boundary
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[in,out] w vector for all nodes of local discretization
* @param[in] s short vector with values on boundary @p ib
*/
void AddBound(int ib, int nx, int ny, double w[], double const s[]);
// #################### Mesh from Matlab ############################
/**
* 2D finite element mesh of the square consiting of linear triangular elements.
*/
class Mesh_2d_3_matlab: public Mesh
{
public:
/**
* Reads mesh data from a binary file.
*
* File format, see ascii_write_mesh.m
*
* @param[in] fname file name
*/
explicit Mesh_2d_3_matlab(std::string const &fname);
/**
* Determines the indices of those vertices with Dirichlet boundary conditions.
* @return index vector.
*
* @warning All boundary nodes are considered as Dirchlet nodes.
*/
std::vector<int> Index_DirichletNodes() const override;
private:
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
int Nnbedges() const
{
return static_cast<int>(bedges.size());
}
std::vector<int> bedges; //!< boundary edges [nbedges][2] storing start/end vertex
};
#endif

View file

Some files were not shown because too many files have changed in this diff Show more