Pushing everything again, accidentally deleted my remote repository

2025-12-09 22:06:13 +01:00 · 2025-12-09 22:06:13 +01:00 · 1bee3e8e5b
commit 1bee3e8e5b
101 changed files with 9428 additions and 0 deletions
--- a/ex1/CLANG_default.mk
+++ b/ex1/CLANG_default.mk
@ -0,0 +1,123 @@
+# Basic Defintions for using GNU-compiler suite sequentially
+# requires setting of COMPILER=CLANG_
+
+#CLANGPATH=//usr/lib/llvm-10/bin/
+CC     = ${CLANGPATH}clang
+CXX    = ${CLANGPATH}clang++
+#CXX   = ${CLANGPATH}clang++ -lomptarget  -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
+#F77   = gfortran
+LINKER = ${CXX}
+
+#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
+WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
+WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
+#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
+
+CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
+# don't use -Ofast
+# -ftrapv
+LINKFLAGS += -O3
+
+# different libraries in Ubuntu or manajaró
+ifndef UBUNTU
+UBUNTU=1
+endif
+
+# BLAS, LAPACK
+LINKFLAGS += -llapack -lblas
+# -lopenblas
+ifeq ($(UBUNTU),1)
+# ubuntu
+else
+# on  archlinux
+LINKFLAGS += -lcblas
+endif
+
+# interprocedural optimization
+CXXFLAGS  += -flto
+LINKFLAGS += -flto
+
+#   very good check
+# http://clang.llvm.org/extra/clang-tidy/
+#   good check, see:  http://llvm.org/docs/CodingStandards.html#include-style
+SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
+SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
+#READABILITY=,readability*${SWITCH_OFF}
+#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
+TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
+#TIDYFLAGS += -checks='modernize*
+#   ???
+#TIDYFLAGS = -checks='cert*'  -header-filter=.*
+#   MPI checks ??
+#TIDYFLAGS = -checks='mpi*'
+#   ??
+#TIDYFLAGS = -checks='performance*'   -header-filter=.*
+#TIDYFLAGS = -checks='portability-*'  -header-filter=.*
+#TIDYFLAGS = -checks='readability-*'  -header-filter=.*
+
+default: ${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+
+clean:
+	@rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+codecheck: tidy_check
+tidy_check:
+	clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
+# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
+
+run: clean ${PROGRAM}
+#	time  ./${PROGRAM} ${PARAMS}
+	./${PROGRAM} ${PARAMS}
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+#    some tools
+# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
+cache: ${PROGRAM}
+	valgrind --tool=callgrind --simulate-cache=yes ./$^  ${PARAMS}
+#	kcachegrind callgrind.out.<pid> &
+	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
+
+# Check for wrong memory accesses, memory leaks, ...
+# use smaller data sets
+mem: ${PROGRAM}
+	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^  ${PARAMS}
+
+#  Simple run time profiling of your code
+#  CXXFLAGS += -g -pg
+#  LINKFLAGS += -pg
+prof: ${PROGRAM}
+	perf record ./$^  ${PARAMS}
+	perf report
+#	gprof -b ./$^ > gp.out
+#	kprof -f gp.out -p gprof &
+
+codecheck: tidy_check
--- a/ex1/GCC_AMD32_default.mk
+++ b/ex1/GCC_AMD32_default.mk
@ -0,0 +1,130 @@
+# Basic Defintions for using GNU-compiler suite sequentially
+# requires setting of COMPILER=GCC_
+
+CC	= gcc
+CXX     = g++
+F77	= gfortran
+LINKER  = ${CXX}
+
+# on mephisto:
+#CXXFLAGS  += -I/share/apps/atlas/include
+#LINKFLAGS += -L/share/apps/atlas/lib
+#LINKFLAGS   += -lcblas -latlas
+
+#LINKFLAGS   += -lblas
+# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
+
+
+#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual  -Wfloat-equal -Wshadow
+WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual  -Wfloat-equal -Wshadow \
+           -Wredundant-decls -Winline -fmax-errors=1
+#           -Wunreachable-code
+#  -Wunreachable-code
+CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
+#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
+#-msse3
+# -ftree-vectorizer-verbose=2  -DNDEBUG
+# -ftree-vectorizer-verbose=5
+# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump  -fdump-tree-pre=stderr
+
+# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
+# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
+# #CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
+# FFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
+# LFLAGS  = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
+LINKFLAGS   += -O3
+
+# BLAS, LAPACK
+OPENBLAS_DIR = /opt/openblas_GCCseq
+#OPENBLAS_DIR = /opt/openblas_GCC
+OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
+OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
+CXXFLAGS += -I${OPENBLAS_INCDIR}
+LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
+
+# interprocedural optimization
+CXXFLAGS += -flto
+LINKFLAGS += -flto
+
+# profiling tools
+#CXXFLAGS  += -pg
+#LINKFLAGS += -pg
+
+default: ${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+
+clean:
+	@rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
+	-@rm -r html
+
+run: clean ${PROGRAM}
+#	time  ./${PROGRAM}
+#	./${PROGRAM}
+	( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
+#            or  'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}'  in your ~/.bashrc 
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar:
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+#    some tools
+# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
+cache: ${PROGRAM}
+	valgrind --tool=callgrind --simulate-cache=yes ./$^
+#	kcachegrind callgrind.out.<pid> &
+	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
+
+# Check for wrong memory accesses, memory leaks, ...
+# use smaller data sets
+# no "-pg"  in compile/link options
+mem: ${PROGRAM}
+	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
+
+#  Simple run time profiling of your code
+#  CXXFLAGS += -g -pg
+#  LINKFLAGS += -pg
+prof: ${PROGRAM}
+	./$^
+	gprof -b ./$^ > gp.out
+#	kprof -f gp.out -p gprof &
+
+#Trace your heap:
+#> heaptrack ./main.GCC_
+#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
+heap: ${PROGRAM}
+	heaptrack ./$^ 11
+	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
+
+
+
+########################################################################
+#  get the detailed  status of all optimization flags
+info:
+	echo "detailed  status of all optimization flags"
+	$(CXX) --version
+	$(CXX) -Q $(CXXFLAGS) --help=optimizers
--- a/ex1/GCC_default.mk
+++ b/ex1/GCC_default.mk
@ -0,0 +1,183 @@
+# Basic Defintions for using GNU-compiler suite sequentially
+# requires setting of COMPILER=GCC_
+
+CC	= gcc
+CXX     = g++
+F77	= gfortran
+LINKER  = ${CXX}
+
+WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
+           -Wredundant-decls -Winline -fmax-errors=1
+#  -Wunreachable-code
+CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
+#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
+#-msse3
+# -ftree-vectorizer-verbose=2  -DNDEBUG
+# -ftree-vectorizer-verbose=5
+# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump  -fdump-tree-pre=stderr
+
+# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
+# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
+# #CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
+# FFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
+# LFLAGS  = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
+LINKFLAGS   += -O3
+
+#architecture
+#CPU = -march=znver2
+CXXFLAGS  += ${CPU}
+LINKFLAGS += ${CPU}
+
+# different libraries in Ubuntu or manajaró
+ifndef UBUNTU
+UBUNTU=1
+endif
+
+# BLAS, LAPACK
+ifeq ($(UBUNTU),1)
+LINKFLAGS += -llapack -lblas
+# -lopenblas
+else
+# on  archlinux
+LINKFLAGS += -llapack -lopenblas -lcblas
+endif
+
+# interprocedural optimization
+CXXFLAGS  += -flto
+LINKFLAGS += -flto
+
+# for debugging purpose (save code)
+# -fsanitize=leak         # only one out the three can be used
+# -fsanitize=address
+# -fsanitize=thread
+SANITARY =  -fsanitize=address  -fsanitize=undefined -fsanitize=null -fsanitize=return \
+ -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
+ -fsanitize=bool -fsanitize=enum -fsanitize=vptr
+#CXXFLAGS  += ${SANITARY}
+#LINKFLAGS += ${SANITARY}
+
+# profiling tools
+#CXXFLAGS  += -pg
+#LINKFLAGS += -pg
+
+
+default: ${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+
+clean:
+	@rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
+	-@rm -rf html
+
+run: clean ${PROGRAM}
+#run: ${PROGRAM}
+#	time  ./${PROGRAM} ${PARAMS}
+	./${PROGRAM} ${PARAMS}
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+#find . -size +10M > large_files
+#--exclude-from ${MY_DIR}/large_files
+
+zip: clean
+	@echo "Zip the directory: " ${MY_DIR}
+	@cd .. ;\
+	zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+.SUFFIXES: .f90
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+#	$(CXX) -c $(CXXFLAGS) -o $@ $<  2>&1 | tee -a $<.log 
+#	$(CXX) -c $(CXXFLAGS) -o $@ $<  2>&1 | tee -a $(<:.cpp=.log)
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+.f90.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+#    some tools
+# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
+cache: ${PROGRAM}
+	valgrind --tool=callgrind --simulate-cache=yes ./$^  ${PARAMS}
+#	kcachegrind callgrind.out.<pid> &
+	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
+
+# Check for wrong memory accesses, memory leaks, ...
+# use smaller data sets
+# no "-pg"  in compile/link options
+mem: ${PROGRAM}
+	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^  ${PARAMS}
+# Graphical interface
+# valkyrie
+
+#  Simple run time profiling of your code
+#  CXXFLAGS += -g -pg
+#  LINKFLAGS += -pg
+prof: ${PROGRAM}
+	perf record ./$^  ${PARAMS}
+	perf report
+#	gprof -b ./$^ > gp.out
+#	kprof -f gp.out -p gprof &
+
+#  perf in Ubuntu 20.04:   https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
+#  * install 
+#  * sudo vi /etc/sysctl.conf
+#                add   kernel.perf_event_paranoid = 0
+
+#Trace your heap:
+#> heaptrack ./main.GCC_
+#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
+heap: ${PROGRAM}
+	heaptrack ./$^  ${PARAMS}
+	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
+
+codecheck: $(SOURCES)
+	cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
+
+
+########################################################################
+#  get the detailed  status of all optimization flags
+info:
+	echo "detailed  status of all optimization flags"
+	$(CXX) --version
+	$(CXX) -Q $(CXXFLAGS) --help=optimizers
+	lscpu
+	inxi -C
+	lstopo
+
+# Excellent hardware info
+#	hardinfo
+# Life monitoring of CPU frequency etc.
+#	sudo i7z
+
+# Memory  consumption
+#	vmstat -at -SM 3
+#	xfce4-taskmanager
+
+
+# https://www.tecmint.com/check-linux-cpu-information/
+#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
+
+# Debugging:
+# https://wiki.archlinux.org/index.php/Debugging
--- a/ex1/ICC_default.mk
+++ b/ex1/ICC_default.mk
@ -0,0 +1,137 @@
+# Basic Defintions for using INTEL compiler suite sequentially
+# requires setting of COMPILER=ICC_
+
+#BINDIR = /opt/intel/bin/
+
+# special on my sony [GH]
+#BINDIR = /opt/save.intel/bin/
+# very special on my sony [GH]
+# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
+
+# Error with g++-4.8 using icpc14.0,x:
+#   find directory wherein bits/c++config.h is located
+#   'locate bits/c++config.h'
+#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
+
+
+CC	= ${BINDIR}icc
+CXX     = ${BINDIR}icpc
+F77	= ${BINDIR}ifort
+LINKER  = ${CXX}
+
+
+WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
+#    -Winline -Wredundant-decls -Wunreachable-code
+CXXFLAGS +=  -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
+# profiling tools
+#CXXFLAGS  += -pg
+#LINKFLAGS += -pg
+# -vec-report=3
+# -qopt-report=5 -qopt-report-phase=vec
+# -guide -parallel
+# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
+# -auto-p32 -simd
+CXXFLAGS += -align
+
+# use MKL by INTEL
+#  https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
+# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
+# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
+LINKFLAGS += -O3 -mkl=sequential
+
+# interprocedural optimization
+CXXFLAGS += -ipo
+LINKFLAGS += -ipo
+
+# annotated assembler file
+ANNOTED = -fsource-asm -S
+
+default:	${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+
+clean:
+	rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+run: clean ${PROGRAM}
+	./${PROGRAM}
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+# #    some tools
+# # Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
+# cache: ${PROGRAM}
+# 	valgrind --tool=callgrind --simulate-cache=yes ./$^
+# #	kcachegrind callgrind.out.<pid> &
+#
+# # Check for wrong memory accesses, memory leaks, ...
+# # use smaller data sets
+# mem: ${PROGRAM}
+# 	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
+#
+# #  Simple run time profiling of your code
+# #  CXXFLAGS += -g -pg
+# #  LINKFLAGS += -pg
+# prof: ${PROGRAM}
+# 	./$^
+# 	gprof -b ./$^ > gp.out
+# #	kprof -f gp.out -p gprof &
+#
+
+
+mem: inspector
+prof: amplifier
+cache: amplifier
+
+gap_par_report:
+	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
+
+# GUI for performance report
+amplifier: ${PROGRAM}
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+#	alternatively to the solution above:
+            #edit file  /etc/sysctl.d/10-ptrace.conf     and set variable   kernel.yama.ptrace_scope   variable to 0 .
+	amplxe-gui &
+
+# GUI for Memory and Thread analyzer (race condition)
+inspector: ${PROGRAM}
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+	inspxe-gui &
+
+advisor:
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+	advixe-gui &
+
+icc-info:
+	icpc -# main.cpp
+
+
+
+
--- a/ex1/ONEAPI_default.mk
+++ b/ex1/ONEAPI_default.mk
@ -0,0 +1,176 @@
+# Basic Defintions for using INTEL compiler suite sequentially
+# requires setting of COMPILER=ONEAPI_
+
+#         https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
+# requires
+# source /opt/intel/oneapi/setvars.sh
+# on  AMD:    export MKL_DEBUG_CPU_TYPE=5
+
+#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
+#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
+#export KMP_AFFINITY=verbose,compact
+
+CC	= ${BINDIR}icc
+CXX     = ${BINDIR}dpcpp
+F77	= ${BINDIR}ifort
+LINKER  = ${CXX}
+
+## Compiler flags
+WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
+WARNINGS += -Wpessimizing-move -Wredundant-move
+#-wd2015,2012,2014 -wn3
+#    -Winline -Wredundant-decls -Wunreachable-code
+# -qopt-subscript-in-range
+# -vec-threshold0
+
+CXXFLAGS += -O3 -std=c++17  ${WARNINGS}
+#CXXFLAGS += -DMKL_ILP64  -I"${MKLROOT}/include"
+#CXXFLAGS += -DMKL_ILP32  -I"${MKLROOT}/include"
+LINKFLAGS += -O3
+
+# interprocedural optimization
+CXXFLAGS  += -ipo
+LINKFLAGS += -ipo
+LINKFLAGS += -flto
+
+# annotated Assembler file
+ANNOTED = -fsource-asm -S 
+
+#architecture
+CPU  = -march=core-avx2
+#CPU += -mtp=zen
+# -xCORE-AVX2
+# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
+CXXFLAGS  += ${CPU}
+LINKFLAGS += ${CPU}
+
+# use MKL by INTEL
+# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
+# sequential MKL
+#                              use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
+CXXFLAGS +=  -qmkl=sequential  -UMKL_ILP64
+LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
+
+# shared libs:  https://aur.archlinux.org/packages/intel-oneapi-compiler-static
+#     install intel-oneapi-compiler-static   
+# or 
+LINKFLAGS += -shared-intel
+
+
+OPENMP = -qopenmp
+CXXFLAGS += ${OPENMP}
+LINKFLAGS += ${OPENMP}
+
+
+# profiling tools
+#CXXFLAGS  += -pg
+#LINKFLAGS += -pg
+# -vec-report=3
+# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
+# -guide -parallel
+# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
+# -auto-p32 -simd
+
+# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
+#CXXFLAGS +=  -qopt-report=5 -qopt-report-phase=vec,par
+#CXXFLAGS +=  -qopt-report=5 -qopt-report-phase=cg
+# Redirect report from *.optrpt to stderr
+#    -qopt-report-file=stderr
+# Guided paralellization
+#    -guide -parallel
+#    -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
+#    -auto-p32 -simd
+
+## run time checks
+# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
+
+
+default:	${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+
+clean:
+	rm -f ${PROGRAM} ${OBJECTS} *.optrpt
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+run: clean ${PROGRAM}
+	./${PROGRAM}
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+#    some tools
+# Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
+# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
+
+mem: inspector
+prof: vtune
+cache: inspector
+
+gap_par_report:
+	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
+
+# GUI for performance report
+amplifier: ${PROGRAM}
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+	echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
+	amplxe-gui &
+
+# GUI for Memory and Thread analyzer (race condition)
+inspector: ${PROGRAM}
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+#	inspxe-gui &
+	vtune-gui ./${PROGRAM} &
+
+advisor:
+	source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
+#	/opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
+	advisor --collect=survey ./${PROGRAM} 
+#	advisor --collect=roofline ./${PROGRAM} 
+	advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
+
+vtune:
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+#	https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
+	export ADVIXE_EXPERIMENTAL=roofline
+	vtune -collect hotspots ./${PROGRAM}
+	vtune -report hotspots -r r000hs > vtune.out
+#	vtune-gui ./${PROGRAM} &	
+
+icc-info:
+	icpc -# main.cpp
+
+# MKL on AMD
+# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
+#
+# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
+# export MKL_DEBUG_CPU_TYPE=5
+# export MKL_NUM_THRAEDS=1
+# export MKL_DYNAMIC=false
+#  on Intel compiler
+# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
--- a/ex1/PGI_default.mk
+++ b/ex1/PGI_default.mk
@ -0,0 +1,93 @@
+# Basic Defintions for using PGI-compiler suite sequentially
+# requires setting of COMPILER=PGI_
+# OPTIRUN = optirun
+
+
+CC	= pgcc
+CXX     = pgc++
+F77	= pgfortran
+LINKER  = ${CXX}
+
+# on mephisto:
+#CXXFLAGS  += -I/share/apps/atlas/include
+#LINKFLAGS += -L/share/apps/atlas/lib
+#LINKFLAGS   += -lcblas -latlas
+
+#LINKFLAGS   += -lblas
+# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
+
+WARNINGS = -Minform=warn
+# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow  -Wredundant-decls
+#           -pedantic -Wunreachable-code -Wextra -Winline
+#  -Wunreachable-code
+
+#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
+PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
+# -Minfo
+# -Mprof=time
+# -Mprof=lines
+#        take care with option      -Msafeptr
+CXXFLAGS += -O3 -std=c++17  ${WARNINGS}
+#CXXFLAGS += -O3 -std=c++11  -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
+#  -fastsse  -fargument-noalias ${WARNINGS}  -msse3 -vec-report=3
+
+default:	${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+
+clean:
+	@rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+run: clean ${PROGRAM}
+	./${PROGRAM}
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+# #    some tools
+# #  Simple run time profiling of your code
+# #  CXXFLAGS += -g -pg
+# #  LINKFLAGS += -pg
+
+
+# Profiling options PGI, see: pgcollect -help
+# CPU_PROF = -allcache
+CPU_PROF = -time
+# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
+#GPU_PROF = -cuda=branch:cc20
+#
+PROF_FILE = pgprof.out
+
+cache: prof
+
+prof: ${PROGRAM}
+	${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
+	${OPTIRUN} ${BINDIR}pgprof -exe ./$^  $(PROF_FILE) &
+
+info:
+	pgaccelinfo -v
--- a/ex1/ex1A_mean_values/.vscode/tasks.json
+++ b/ex1/ex1A_mean_values/.vscode/tasks.json
@ -0,0 +1,28 @@
+{
+    "tasks": [
+        {
+            "type": "cppbuild",
+            "label": "C/C++: gcc build active file",
+            "command": "/usr/bin/gcc",
+            "args": [
+                "-fdiagnostics-color=always",
+                "-g",
+                "${file}",
+                "-o",
+                "${fileDirname}/${fileBasenameNoExtension}"
+            ],
+            "options": {
+                "cwd": "${fileDirname}"
+            },
+            "problemMatcher": [
+                "$gcc"
+            ],
+            "group": {
+                "kind": "build",
+                "isDefault": true
+            },
+            "detail": "Task generated by Debugger."
+        }
+    ],
+    "version": "2.0.0"
+}
--- a/ex1/ex1A_mean_values/Makefile
+++ b/ex1/ex1A_mean_values/Makefile
@ -0,0 +1,30 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+# use Intel compilers
+#COMPILER=ICC_
+
+# use PGI compilers
+# COMPILER=PGI_
+
+
+SOURCES = main.cpp ../ex1A_mean_values/means.cpp
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= main.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+LINKFLAGS += -g
+# do not use -pg with PGI compilers
+
+ifndef COMPILER
+  COMPILER=GCC_
+endif
+
+include ../${COMPILER}default.mk
--- a/ex1/ex1A_mean_values/main.GCC_
+++ b/ex1/ex1A_mean_values/main.GCC_
--- a/ex1/ex1A_mean_values/main.cpp
+++ b/ex1/ex1A_mean_values/main.cpp
@ -0,0 +1,35 @@
+#include "means.h"
+#include <vector>
+#include <iostream>
+using namespace std;
+
+int main(int argc, char **argv)
+{
+    double arithmetic_mean, geometric_mean, harmonic_mean;
+
+    // Fixed version
+    calculate_means(1, 4, 16, arithmetic_mean, geometric_mean, harmonic_mean);
+    cout << arithmetic_mean << ", " << geometric_mean << ", " << harmonic_mean << endl;
+
+    calculate_means(2, 3, 5, arithmetic_mean, geometric_mean, harmonic_mean);
+    cout << arithmetic_mean << ", " << geometric_mean << ", " << harmonic_mean << endl;
+
+    calculate_means(1000, 4000, 16000, arithmetic_mean, geometric_mean, harmonic_mean);
+    cout << arithmetic_mean << ", " << geometric_mean << ", " << harmonic_mean << endl;
+    cout << "--------------------------------" << endl;
+
+
+    
+    // Scalable version
+    calculate_means(vector<int> {1, 4, 16}, arithmetic_mean, geometric_mean, harmonic_mean);
+    cout << arithmetic_mean << ", " << geometric_mean << ", " << harmonic_mean << endl;
+    
+    calculate_means(vector<int> {2, 3, 5}, arithmetic_mean, geometric_mean, harmonic_mean);
+    cout << arithmetic_mean << ", " << geometric_mean << ", " << harmonic_mean << endl;
+
+    calculate_means(vector<int> {1000, 4000, 16000}, arithmetic_mean, geometric_mean, harmonic_mean);
+    cout << arithmetic_mean << ", " << geometric_mean << ", " << harmonic_mean << endl;
+    
+    
+    return 0;
+}
--- a/ex1/ex1A_mean_values/main.o
+++ b/ex1/ex1A_mean_values/main.o
--- a/ex1/ex1A_mean_values/means.cpp
+++ b/ex1/ex1A_mean_values/means.cpp
@ -0,0 +1,30 @@
+#include "../ex1A_mean_values/means.h"
+#include <cmath>
+#include <vector>
+
+void calculate_means(int a, int b, int c, double &am, double &gm, double &hm)
+{
+    am = (a + b + c)/3.0;
+    gm = exp((log(a)+log(b)+log(c))/3);
+    hm = 3.0/(1.0/a + 1.0/b + 1.0/c);
+}
+
+void calculate_means(std::vector<int> numbers, double &am, double &gm, double &hm)
+{
+    int n = numbers.size();
+
+    am = 0.;
+    gm = 0.;
+    hm = 0.;
+
+    for (int i = 0; i < n; ++i)
+    {
+        am += numbers[i];
+        gm += log(numbers[i]);
+        hm += 1.0/numbers[i];
+    }
+
+    am /= n;
+    gm = exp(gm/n);
+    hm = n/hm;
+}
--- a/ex1/ex1A_mean_values/means.h
+++ b/ex1/ex1A_mean_values/means.h
@ -0,0 +1,23 @@
+#pragma once
+#include <vector>
+
+/**
+  This function calculates arithmetic mean, geometric mean and harmonic mean of three integers.
+  @param[in]    a            first integer
+  @param[in]    b            second integer
+  @param[in]    c            third integer
+  @param[out]   am           arithmetic mean
+  @param[out]   gm           geometric mean
+  @param[out]   hm           harmonic mean
+*/
+void calculate_means(int a, int b, int c, double &am, double &gm, double &hm);
+
+/**
+  This function calculates arithmetic mean, geometric mean and harmonic mean of an integer vector.
+  @param[in]    numbers      vector containing integers
+  @param[out]   am           arithmetic mean
+  @param[out]   gm           geometric mean
+  @param[out]   hm           harmonic mean
+*/
+void calculate_means(std::vector<int> numbers, double &am, double &gm, double &hm);
+
--- a/ex1/ex1A_mean_values/means.o
+++ b/ex1/ex1A_mean_values/means.o
--- a/ex1/ex1B_data-IO_and_vectors/Makefile
+++ b/ex1/ex1B_data-IO_and_vectors/Makefile
@ -0,0 +1,30 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+# use Intel compilers
+#COMPILER=ICC_
+
+# use PGI compilers
+# COMPILER=PGI_
+
+
+SOURCES = main.cpp ../ex1A_mean_values/means.cpp
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= main.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+LINKFLAGS += -g
+# do not use -pg with PGI compilers
+
+ifndef COMPILER
+  COMPILER=GCC_
+endif
+
+include ../${COMPILER}default.mk
--- a/ex1/ex1B_data-IO_and_vectors/data_1.txt
+++ b/ex1/ex1B_data-IO_and_vectors/data_1.txt
@ -0,0 +1,501 @@
+141
+261
+87
+430
+258
+298
+425
+120
+496
+707
+244
+786
+75
+394
+4
+221
+2
+190
+143
+269
+175
+139
+599
+902
+940
+222
+483
+377
+524
+265
+69
+437
+174
+27
+955
+431
+962
+763
+8
+681
+706
+646
+553
+219
+773
+229
+371
+891
+857
+403
+319
+609
+911
+910
+592
+333
+854
+443
+905
+34
+533
+717
+180
+337
+188
+322
+404
+549
+49
+553
+275
+242
+244
+155
+957
+936
+819
+729
+176
+361
+189
+2
+317
+700
+626
+544
+440
+288
+502
+762
+763
+577
+748
+646
+124
+505
+348
+93
+148
+199
+673
+432
+695
+257
+10
+533
+280
+947
+907
+393
+25
+672
+838
+972
+57
+451
+583
+687
+720
+651
+727
+374
+582
+117
+58
+980
+285
+595
+963
+186
+194
+342
+933
+391
+274
+152
+398
+375
+132
+436
+92
+615
+11
+574
+790
+236
+449
+570
+62
+497
+643
+222
+838
+972
+847
+506
+279
+747
+237
+958
+621
+601
+173
+91
+256
+859
+912
+700
+726
+230
+577
+811
+404
+989
+90
+321
+512
+61
+726
+557
+530
+830
+859
+790
+318
+453
+753
+110
+110
+270
+525
+973
+711
+312
+292
+851
+912
+640
+256
+89
+839
+585
+949
+62
+585
+286
+828
+191
+443
+394
+827
+677
+208
+319
+134
+672
+571
+170
+148
+477
+909
+553
+33
+54
+806
+452
+383
+790
+365
+533
+712
+872
+329
+651
+975
+76
+588
+414
+310
+264
+759
+996
+187
+782
+196
+993
+803
+425
+729
+499
+809
+357
+74
+591
+911
+194
+433
+750
+40
+947
+764
+559
+184
+498
+518
+995
+855
+963
+679
+404
+935
+480
+232
+397
+706
+559
+757
+996
+963
+536
+964
+116
+52
+305
+581
+531
+902
+541
+432
+543
+713
+17
+801
+143
+479
+257
+370
+662
+170
+279
+199
+196
+327
+881
+472
+404
+180
+969
+408
+845
+616
+377
+878
+785
+465
+814
+899
+430
+335
+597
+902
+703
+378
+735
+955
+543
+541
+312
+72
+182
+93
+464
+10
+916
+643
+2
+31
+209
+455
+128
+9
+728
+355
+781
+437
+437
+50
+50
+92
+595
+242
+842
+858
+964
+489
+221
+227
+537
+763
+348
+462
+640
+918
+162
+716
+578
+434
+885
+394
+179
+634
+625
+328
+803
+1000
+981
+128
+233
+24
+608
+111
+408
+885
+549
+370
+209
+441
+957
+125
+471
+857
+44
+692
+979
+284
+134
+686
+910
+611
+900
+194
+755
+347
+419
+156
+820
+625
+739
+806
+68
+951
+498
+756
+743
+832
+157
+458
+619
+933
+836
+896
+583
+583
+855
+35
+886
+408
+37
+747
+155
+144
+606
+255
+325
+402
+407
+387
+610
+167
+189
+95
+324
+770
+235
+741
+693
+825
+828
+294
+310
+524
+326
+832
+811
+557
+263
+681
+234
+457
+385
+539
+992
+756
+981
+235
+529
+52
+757
+602
+858
+989
+930
+410
+1
+541
+208
+220
+326
+96
+748
+749
+544
+339
+833
+553
+958
+893
+357
+547
+347
+623
+797
+746
+126
+823
+26
+415
+732
+782
+368
+
--- a/ex1/ex1B_data-IO_and_vectors/main.GCC_
+++ b/ex1/ex1B_data-IO_and_vectors/main.GCC_
--- a/ex1/ex1B_data-IO_and_vectors/main.cpp
+++ b/ex1/ex1B_data-IO_and_vectors/main.cpp
@ -0,0 +1,53 @@
+#include "../ex1A_mean_values/means.h"
+#include <iostream>
+#include <fstream>
+#include <cmath>
+#include <vector>
+#include <algorithm>
+using namespace std;
+
+
+int main(int argc, char **argv)
+{
+    // read vector from file
+    vector<int> data_vector = {};
+
+    ifstream input_stream("data_1.txt"); 
+
+    int line;
+    while(input_stream >> line)
+    {
+        data_vector.push_back(line);
+    }
+    data_vector.shrink_to_fit();
+
+
+    // calculate minimum and maximum
+    vector<int>::iterator min_it = min_element(data_vector.begin(), data_vector.end());
+    vector<int>::iterator max_it = max_element(data_vector.begin(), data_vector.end());
+
+    // calculate arithmetic mean, geometric mean and harmonic mean
+    double am, gm, hm;
+    calculate_means(data_vector, am, gm, hm);
+
+
+    // calculate standard deviation
+    double sd = 0.;
+    int n = data_vector.size();
+    for (int i = 0; i < n; ++i)
+    {
+        sd += pow(data_vector[i] - am, 2);
+    }
+    sd = sqrt(sd/n);
+
+
+    // print results
+    cout << "minimum: " << *min_it << endl;
+    cout << "maximum: " << *max_it << endl;
+    cout << "arithmetic mean: " << am << endl;
+    cout << "geometric mean: " << gm << endl;
+    cout << "harmonic mean: " << hm << endl;
+    cout << "standard deviation: " << sd << endl;
+
+    return 0;
+}
--- a/ex1/ex1B_data-IO_and_vectors/main.o
+++ b/ex1/ex1B_data-IO_and_vectors/main.o
--- a/ex1/ex1C_summation_of_specified_numbers/main.cpp
+++ b/ex1/ex1C_summation_of_specified_numbers/main.cpp
@ -0,0 +1,31 @@
+#include "special_sum.h"
+#include "../utils/timing.h"
+#include <iostream>
+#include <chrono>
+#include <stack>
+using namespace std;
+
+int main(int argc, char **argv)
+{
+    // check results and compare speeds
+    for(size_t n : {15, 1001, 1432987})
+    {
+        cout << "n = " << n << endl;
+        size_t sum_1, sum_2;
+
+        tic();
+        for(size_t i = 0; i < 1000; ++i)
+            sum_1 = special_sum_loop(n);
+        double time_1 = toc();
+
+        tic();
+        for(size_t i = 0; i < 1000; ++i)
+            sum_2 = special_sum_noloop(n);
+        double time_2 = toc();
+            
+        cout << "loop: " << sum_1 << "\t\tDuration: " << time_1 << endl;
+        cout << "no loop: " << sum_2 << "\t\tDuration: " << time_2 << endl << "---------------------------------------------------" << endl;
+    }
+
+    return 0;
+}
--- a/ex1/ex1C_summation_of_specified_numbers/special_sum.cpp
+++ b/ex1/ex1C_summation_of_specified_numbers/special_sum.cpp
@ -0,0 +1,28 @@
+#include "special_sum.h"
+
+size_t gauss_sum(size_t n)
+{
+    return (n*(n+1))/2;
+}
+
+size_t special_sum_loop(size_t n)
+{
+    size_t sum = 0;
+    for (size_t i = 1; i < n+1; ++i)
+    {
+        if (i % 3 == 0 || i % 5 == 0)
+        {
+            sum += i;
+        }
+    }
+    return sum;
+}
+
+size_t special_sum_noloop(size_t n)
+{
+    size_t factor_3 = gauss_sum(n/3);   // dividing int by int automatically gets rounded off
+    size_t factor_5 = gauss_sum(n/5);
+    size_t factor_15 = gauss_sum(n/15);
+
+    return factor_3*3 + factor_5*5 - factor_15*15;
+}
--- a/ex1/ex1C_summation_of_specified_numbers/special_sum.h
+++ b/ex1/ex1C_summation_of_specified_numbers/special_sum.h
@ -0,0 +1,18 @@
+#include <cstddef>
+
+/**
+  This function returns the sum of all positive integers less or equal n which are a multiples of 3 or of 5, WITH using a loop.
+  @param[in]    n         
+  @param[out]   M        
+*/
+size_t special_sum_loop(size_t n);
+
+
+/**
+  This function returns the sum of all positive integers less or equal n which are a multiples of 3 or of 5, WITHOUT using a loop.
+  Example: For n=15, we have 60 = 3+5+6+9+10+12+15 = (1+2+3+4+5)*3 + (1+2+3)*5 - 1*15
+  Formula: M = (\sum_{i=1}^{k_3} i)*3 + (\sum_{i=1}^{k_5} i)*5 - (\sum_{i=1}^{k_15} i)*15
+  @param[in]    n         
+  @param[out]   M        
+*/
+size_t special_sum_noloop(size_t n);
--- a/ex1/ex1D_kahan_summation/main
+++ b/ex1/ex1D_kahan_summation/main
--- a/ex1/ex1D_kahan_summation/main.cpp
+++ b/ex1/ex1D_kahan_summation/main.cpp
@ -0,0 +1,33 @@
+#include "mylib.h"
+#include <cmath>
+#include <iostream>
+using namespace std;
+
+int main(int argc, char **argv)
+{
+    for(size_t i = 1; i < 8; ++i)
+    {
+        size_t n = pow(10,i);
+        vector<double> x(n);
+        for (size_t k = 0; k < n; ++k)
+            x[k] = 1.0/(k + 1);
+            
+
+        // compute scalar products
+        double sum_1 = scalar(x, x);
+        double sum_2 = Kahan_skalar(x, x);
+
+        // compute error
+        double err_1 = abs(sum_1 - pow(M_PI,2)/6);
+        double err_2 = abs(sum_2 - pow(M_PI,2)/6);
+
+        cout << "n = " << n << endl;
+        cout << "Normal scalar product: " << sum_1 << "\terror: " << err_1 << endl;
+        cout << "Kahan scalar product: " << sum_2 << "\terror: " << err_2 << endl;
+        cout << endl;
+    }
+    
+    
+
+    return 0;
+}
--- a/ex1/ex1D_kahan_summation/mylib.cpp
+++ b/ex1/ex1D_kahan_summation/mylib.cpp
@ -0,0 +1,34 @@
+#include "mylib.h"
+#include <cassert>       // assert()
+#include <cmath>
+#include <vector>
+using namespace std;
+
+double scalar(vector<double> const &x, vector<double> const &y)
+{
+    assert(x.size() == y.size());
+    size_t const N = x.size();
+    double sum = 0.0;
+    for (size_t i = 0; i < N; ++i)
+    {
+        sum += x[i] * y[i];
+    }
+    return sum;
+}
+
+
+
+double Kahan_skalar(vector<double> const &x, vector<double> const &y)
+{
+    double sum = 0;
+    double c = 0;
+    size_t n = x.size();
+    for (size_t i = 0; i < n; ++i)
+    {
+        double z = x[i]*y[i] - c;    // c is the part that got lost in the last iteration
+        double t = sum + z;          // when adding sum + z, the lower digits are lost if sum is large
+        c = (t - sum) - z;           // now we recover the lower digits to add in the next iteration
+        sum = t;
+    }
+    return sum;
+}
--- a/ex1/ex1D_kahan_summation/mylib.h
+++ b/ex1/ex1D_kahan_summation/mylib.h
@ -0,0 +1,30 @@
+#ifndef FILE_MYLIB
+#define FILE_MYLIB
+#include <vector>
+
+/** 	Inner product
+	@param[in] x	vector
+	@param[in] y	vector
+	@return 	    resulting Euclidian inner product <x,y>
+*/
+double scalar(std::vector<double> const &x, std::vector<double> const &y);
+
+/** 	Inner product using BLAS routines
+	@param[in] x	vector
+	@param[in] y	vector
+	@return 	    resulting Euclidian inner product <x,y>
+*/
+double scalar_cblas(std::vector<double> const &x, std::vector<double> const &y);
+float scalar_cblas(std::vector<float> const &x, std::vector<float> const &y);
+
+
+/** 	L_2 Norm of a vector
+	@param[in] x	vector
+	@return 	    resulting Euclidian norm <x,y>
+*/
+double norm(std::vector<double> const &x);
+
+double Kahan_skalar(std::vector<double> const &x,  std::vector<double> const &y);
+
+
+#endif
--- a/ex1/ex1E_vector_vs_list/main
+++ b/ex1/ex1E_vector_vs_list/main
--- a/ex1/ex1E_vector_vs_list/main.cpp
+++ b/ex1/ex1E_vector_vs_list/main.cpp
@ -0,0 +1,63 @@
+#include "../utils/timing.h"
+#include <iostream>
+#include <random>
+#include <chrono>
+#include <vector>
+#include <list>
+#include <algorithm>
+using namespace std;
+
+
+size_t random_integer(int lower_bound, int upper_bound)
+{
+    unsigned seed = chrono::system_clock::now().time_since_epoch().count();
+
+    minstd_rand0 generator (seed);
+
+    return lower_bound + generator() % (upper_bound - lower_bound + 1);
+}
+
+int main(int argc, char **argv)
+{
+    // start with generating a sorted vector/list
+    size_t n = 10000;
+    vector<int> x_vec(n);
+    list<int> x_list(n);
+    for(size_t k = 0; k < n; ++k)
+    {
+        x_vec[k] = k + 1;
+        x_list.push_back(k + 1);
+    }
+
+
+    // insert new random entries such that the container stays sorted
+    tic();
+    for(size_t i = 0; i < n; ++i)
+    {
+        size_t new_entry = random_integer(1,n);
+        auto it = lower_bound(x_vec.begin(), x_vec.end(), new_entry);
+        x_vec.insert(it, new_entry);
+    }
+    double time_1 = toc();
+
+    tic();
+    for(size_t i = 0; i < n; ++i)
+    {
+        size_t new_entry = random_integer(1,n);
+        auto it = lower_bound(x_list.begin(), x_list.end(), new_entry);
+        x_list.insert(it, new_entry);
+    }
+    double time_2 = toc();
+
+
+    // check results
+    cout << "New vector is sorted: " << std::boolalpha << is_sorted(x_vec.cbegin(), x_vec.cend()) << "\tsize: " << x_vec.size() << "\tduration: " << time_1 << endl;
+    cout << "New list is sorted: " << std::boolalpha << is_sorted(x_list.cbegin(), x_list.cend()) << "\tsize: " << x_list.size() << "\tduration: " << time_2 << endl;
+
+    // Vector stores 3 pointers
+    // List stores two pointers for every element: one to the previous, one to the next element
+
+
+
+    return 0;
+}
--- a/ex1/ex1F_goldbachs_conjecture/goldbach
+++ b/ex1/ex1F_goldbachs_conjecture/goldbach
--- a/ex1/ex1F_goldbachs_conjecture/goldbach.cpp
+++ b/ex1/ex1F_goldbachs_conjecture/goldbach.cpp
@ -0,0 +1,42 @@
+#include "goldbach.h"
+
+size_t single_goldbach(size_t k)
+{
+    const std::vector<size_t> relevant_primes = get_primes(k);
+    size_t m = relevant_primes.size();
+
+    size_t counter = 0;
+
+    for(size_t i = 0; i < m; ++i)
+    {
+        for(size_t j = i; j < m; ++j)
+        {
+            if(relevant_primes[i] + relevant_primes[j] == k)
+                ++counter;
+        }
+    }
+
+    return counter;
+}
+
+
+std::vector<size_t> count_goldbach(size_t n)
+{
+    const std::vector<size_t> relevant_primes = get_primes(n);
+    size_t m = relevant_primes.size();
+
+    std::vector<size_t> counter_vector(n + 1, 0);
+
+
+    for(size_t i = 0; i < m; ++i)
+    {
+        for(size_t j = i; j < m; ++j)
+        {
+            size_t sum = relevant_primes[i] + relevant_primes[j];
+            if(sum <= n)
+                ++counter_vector[relevant_primes[i] + relevant_primes[j]];
+        }
+    }
+
+    return counter_vector;
+}
--- a/ex1/ex1F_goldbachs_conjecture/goldbach.h
+++ b/ex1/ex1F_goldbachs_conjecture/goldbach.h
@ -0,0 +1,21 @@
+#pragma once
+#include "mayer_primes.h"
+#include <iostream>
+#include <vector>
+#include <iterator>
+#include <cassert>
+
+
+/**
+  This function returns the number of possible decompositions of an integer into a sum of two prime numbers.
+  @param[in]    k            first integer
+  @param[out]   count        number of decompositions
+*/
+size_t single_goldbach(size_t k);
+
+/**
+  This function returns the number of possible decompositions into a sum of two prime numbers of all even integers in the interval [4,n].
+  @param[in]    n                   upper integer bound
+  @param[out]   count_vector        vector containing the number of decompositions for a natural number the corresponding index
+*/
+std::vector<size_t> count_goldbach(size_t n);
--- a/ex1/ex1F_goldbachs_conjecture/main
+++ b/ex1/ex1F_goldbachs_conjecture/main
--- a/ex1/ex1F_goldbachs_conjecture/main.cpp
+++ b/ex1/ex1F_goldbachs_conjecture/main.cpp
@ -0,0 +1,37 @@
+#include "../utils/timing.h"
+#include "goldbach.h"
+#include <iostream>
+#include <algorithm>
+using namespace std;
+
+
+int main(int argc, char **argv)
+{
+    cout << "Check: 694 has "<< single_goldbach(694) << " decompositions." << endl << "----------------------------------------" << endl;
+
+
+    for(size_t n : {10000, 100000, 400000, 1000000, 2000000})
+    {
+        tic();
+
+        auto goldbach_vector = count_goldbach(n);
+
+        auto max_it = max_element(goldbach_vector.begin(), goldbach_vector.end());
+        size_t max_number = distance(goldbach_vector.begin(), max_it);
+
+        double time = toc();
+
+        cout << "The number " <<  max_number << " has " << *max_it << " decompositions. Duration: " << time << endl;
+    }
+
+    /*
+    The number 9240 has 329 decompositions. Duration: 0.00572876
+    The number 99330 has 2168 decompositions. Duration: 0.3342
+    The number 390390 has 7094 decompositions. Duration: 4.23734
+    The number 990990 has 15594 decompositions. Duration: 29.5817
+    The number 1981980 has 27988 decompositions. Duration: 135.985
+    */
+
+
+    return 0;
+}
--- a/ex1/ex1F_goldbachs_conjecture/mayer_primes.h
+++ b/ex1/ex1F_goldbachs_conjecture/mayer_primes.h
@ -0,0 +1,73 @@
+#pragma once
+
+#include <cstring> //memset
+#include <vector>
+//using namespace std;
+
+/** \brief Determines all prime numbers in interval [2, @p max].
+ *
+ *  The sieve of Eratosthenes is used.
+ *
+ *  The implementation originates from <a href="http://code.activestate.com/recipes/576559-fast-prime-generator/">Florian Mayer</a>.
+ *
+ * \param[in]   max end of interval for the prime number search.
+ * \return  vector of prime numbers @f$2,3,5, ..., p<=max @f$.
+ *
+ * \copyright
+ * Copyright (c) 2008 Florian Mayer          (adapted by Gundolf Haase 2018)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+template <class T>
+std::vector<T> get_primes(T max)
+{
+    std::vector<T> primes;
+    char *sieve;
+    sieve = new char[max / 8 + 1];
+    // Fill sieve with 1
+    memset(sieve, 0xFF, (max / 8 + 1) * sizeof(char));
+    for (T x = 2; x <= max; x++)
+    {
+        if (sieve[x / 8] & (0x01 << (x % 8))) {
+            primes.push_back(x);
+            // Is prime. Mark multiplicates.
+            for (T j = 2 * x; j <= max; j += x)
+            {
+                sieve[j / 8] &= ~(0x01 << (j % 8));
+			}
+        }
+	}
+    delete[] sieve;
+    return primes;
+}
+
+//---------------------------------------------------------------
+//int main()        // by  Florian Mayer
+//{g++ -O3  -std=c++14 -fopenmp main.cpp && ./a.out
+//    vector<unsigned long> primes;
+//    primes = get_primes(10000000);
+//    // return 0;
+//    // Print out result.
+//    vector<unsigned long>::iterator it;
+//    for(it=primes.begin(); it < primes.end(); it++)
+//        cout << *it << " ";
+//
+//    cout << endl;
+//    return 0;
+//}
--- a/ex1/ex1G_dense_matrices_access/DenseMatrix.h
+++ b/ex1/ex1G_dense_matrices_access/DenseMatrix.h
@ -0,0 +1,71 @@
+#pragma once
+#include "sigmoid.h"
+#include <iostream>
+#include <vector>
+using namespace std;
+
+class DenseMatrix
+{
+    private:
+        vector<double> M;
+        size_t n;
+        size_t m;
+
+
+    public:
+        vector<double> Mult(const vector<double> &x) const
+        {
+            vector<double> y(n,0);
+            for(size_t i = 0; i < n; ++i)   // iterate row
+            {
+                for(size_t j = 0; j < m; ++j)   // iterate column
+                {
+                    y[i] += M[i*m + j]*x[j];
+                }
+            }
+            return y;
+        }
+
+        vector<double> MultT(const vector<double> &y) const
+        {
+            vector<double> x(m,0);
+            for(size_t j = 0; j < m; ++j)   // iterate column
+            {
+                for(size_t i = 0; i < n; ++i)   // iterate row
+                {
+                    x[j] += M[i*m + j]*y[i];
+                }
+            }
+            return x;
+        }
+
+        void Print() const
+        {
+            for(size_t i = 0; i < n; ++i)   // iterate row
+            {
+                for(size_t j = 0; j < m; ++j)   // iterate column
+                {
+                    cout << M[i*m + j] << "  ";
+                }
+                cout << endl;
+            }
+            cout << endl;
+        }
+
+
+        DenseMatrix(size_t n, size_t m)
+        {
+            this->n = n;
+            this->m = m;
+            M = vector<double>(n*m);
+            size_t nm = max(n,m);
+
+            for(size_t i = 0; i < n; ++i)   // iterate row
+            {
+                for(size_t j = 0; j < m; ++j)   // iterate column
+                {
+                    M[i*m + j] = sigmoid(x_entry(i,nm))*sigmoid(x_entry(j,nm));
+                }
+            }
+        }
+};
--- a/ex1/ex1G_dense_matrices_access/ProductMatrix.h
+++ b/ex1/ex1G_dense_matrices_access/ProductMatrix.h
@ -0,0 +1,52 @@
+#pragma once
+#include <cmath>
+#include <iostream>
+#include <vector>
+using namespace std;
+
+class ProductMatrix
+{
+    private:
+        vector<double> u;
+        vector<double> v;
+        size_t n;
+        size_t m;
+
+    public:
+        vector<double> Mult(const vector<double> &x) const
+        {
+            vector<double> y(n,0);
+            for(int i = 0; i < n; ++i)
+            {
+                for(int j = 0; j < m; ++j)
+                {
+                    y[i] += v[j]*x[j];
+                }
+                y[i] *= u[i];
+            }
+            return y;
+        }
+
+        vector<double> MultT(const vector<double> &y) const
+        {
+            vector<double> x(m,0);
+            for(int j = 0; j < m; ++j)
+            {
+                for(int i = 0; i < n; ++i)
+                {
+                    x[j] += y[i]*u[i];
+                }
+                x[j] *= v[j];
+            }
+            return x;
+        }
+
+        ProductMatrix(const vector<double> &u, const vector<double> &v)
+        {
+            n = u.size();
+            m = v.size();
+            this->u = u;
+            this->v = v;
+            
+        }
+};
--- a/ex1/ex1G_dense_matrices_access/main
+++ b/ex1/ex1G_dense_matrices_access/main
--- a/ex1/ex1G_dense_matrices_access/main.cpp
+++ b/ex1/ex1G_dense_matrices_access/main.cpp
@ -0,0 +1,93 @@
+#include "../utils/timing.h"
+#include "DenseMatrix.h"
+#include "ProductMatrix.h"
+#include <algorithm>
+
+int main()
+{
+    // b) ------------------------------------------------------------------------------------------------------
+    DenseMatrix const M(5,3);
+    vector<double> const u{{1, 2, 3}};
+    vector<double> f1 = M.Mult(u);
+    vector<double> const v{{-1, 2, -3, 4, -5}};
+    vector<double> f2 = M.MultT(v);
+
+    M.Print();
+
+    for(size_t i = 0; i < f1.size(); ++i)
+        cout << f1[i] << endl;
+    cout << endl;
+
+
+    for(size_t j = 0; j < f2.size(); ++j)
+        cout << f2[j] << "  ";
+    cout << endl << "-------------------------------------------------" << endl;
+
+    // c) ------------------------------------------------------------------------------------------------------
+    size_t n = pow(10,3);
+    DenseMatrix const M_1(n,n);
+    vector<double> x(n, 1.0);
+
+    size_t n_loops = 100;
+    vector<double> y_1;
+    vector<double> y_2;
+
+    double time_1 = 0;
+    double time_2 = 0;
+
+    tic();
+    for(int l = 0; l < n_loops; ++l)
+        y_1 = M_1.Mult(x);
+    time_1 += toc();
+
+    tic();
+    for(int l = 0; l < n_loops; ++l)
+        y_2 = M_1.MultT(x);
+    time_2 += toc();
+
+    vector<double> error_vec(n,0);
+    for(int i = 0; i < n; ++i)
+        error_vec[i] = abs(y_1[i] - y_2[i]);
+    double sup_error = *max_element(error_vec.begin(), error_vec.end());
+
+
+    cout << "n = " << n << endl;
+    cout << "Average duration for Mult: " << time_1/n_loops << endl;
+    cout << "Average duration for MultT: " << time_2/n_loops << endl;
+    cout << "sup-error: " << sup_error << endl;
+    cout << "-------------------------------------------------" << endl;
+
+    // d) ------------------------------------------------------------------------------------------------------
+    vector<double> u_M(n,0);
+    for(int i = 0; i < n; ++i)
+        u_M[i] = sigmoid(x_entry(i, n));
+
+    ProductMatrix const M_2(u_M, u_M);
+
+    time_1 = 0;
+    time_2 = 0;
+
+    tic();
+    for(int l = 0; l < n_loops; ++l)
+        y_1 = M_2.Mult(x);
+    time_1 += toc();
+
+    tic();
+    for(int l = 0; l < n_loops; ++l)
+        y_2 = M_2.MultT(x);
+    time_2 += toc();
+
+    for(int i = 0; i < n; ++i)
+        error_vec[i] = abs(y_1[i] - y_2[i]);
+    sup_error = *max_element(error_vec.begin(), error_vec.end());
+
+
+    cout << "n = " << n << endl;
+    cout << "Average duration for Mult: " << time_1/n_loops << endl;
+    cout << "Average duration for MultT: " << time_2/n_loops << endl;
+    cout << "sup-error: " << sup_error << endl;
+    cout << "-------------------------------------------------" << endl;
+
+
+    return 0;
+}
--- a/ex1/ex1G_dense_matrices_access/sigmoid.h
+++ b/ex1/ex1G_dense_matrices_access/sigmoid.h
@ -0,0 +1,12 @@
+#pragma once
+#include <cmath>
+
+double sigmoid(double x)
+{
+    return 1./(1. + exp(-x));
+}
+
+double x_entry(size_t k, size_t nm)
+{
+    return (10.*k)/(nm - 1) - 5.;
+}