many changes

2025-11-03 22:35:52 +01:00 · 2025-11-03 22:35:52 +01:00 · 7e2626266e
commit 7e2626266e
parent 3e9f2d5053
35 changed files with 276 additions and 6417 deletions
--- a/ex1/ABCEFG/.vscode/settings.json
+++ b/ex1/ABCEFG/.vscode/settings.json
@ -1,64 +0,0 @@
-{
-    "files.associations": {
-        "algorithm": "cpp",
-        "format": "cpp",
-        "iostream": "cpp",
-        "ostream": "cpp",
-        "array": "cpp",
-        "atomic": "cpp",
-        "bit": "cpp",
-        "cctype": "cpp",
-        "charconv": "cpp",
-        "chrono": "cpp",
-        "clocale": "cpp",
-        "cmath": "cpp",
-        "compare": "cpp",
-        "concepts": "cpp",
-        "cstdarg": "cpp",
-        "cstddef": "cpp",
-        "cstdint": "cpp",
-        "cstdio": "cpp",
-        "cstdlib": "cpp",
-        "cstring": "cpp",
-        "ctime": "cpp",
-        "cwchar": "cpp",
-        "cwctype": "cpp",
-        "deque": "cpp",
-        "list": "cpp",
-        "string": "cpp",
-        "unordered_map": "cpp",
-        "vector": "cpp",
-        "exception": "cpp",
-        "functional": "cpp",
-        "iterator": "cpp",
-        "memory": "cpp",
-        "memory_resource": "cpp",
-        "numeric": "cpp",
-        "optional": "cpp",
-        "random": "cpp",
-        "ratio": "cpp",
-        "string_view": "cpp",
-        "system_error": "cpp",
-        "tuple": "cpp",
-        "type_traits": "cpp",
-        "utility": "cpp",
-        "fstream": "cpp",
-        "initializer_list": "cpp",
-        "iomanip": "cpp",
-        "iosfwd": "cpp",
-        "istream": "cpp",
-        "limits": "cpp",
-        "new": "cpp",
-        "numbers": "cpp",
-        "span": "cpp",
-        "sstream": "cpp",
-        "stdexcept": "cpp",
-        "streambuf": "cpp",
-        "cinttypes": "cpp",
-        "typeinfo": "cpp",
-        "variant": "cpp",
-        "thread": "cpp",
-        "semaphore": "cpp",
-        "stop_token": "cpp"
-    }
-}
--- a/ex1/ABCEFG/CLANG_default.mk
+++ b/ex1/ABCEFG/CLANG_default.mk
@ -1,123 +0,0 @@
-# Basic Defintions for using GNU-compiler suite sequentially
-# requires setting of COMPILER=CLANG_
-
-#CLANGPATH=//usr/lib/llvm-10/bin/
-CC     = ${CLANGPATH}clang
-CXX    = ${CLANGPATH}clang++
-#CXX   = ${CLANGPATH}clang++ -lomptarget  -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
-#F77   = gfortran
-LINKER = ${CXX}
-
-#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
-WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
-WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
-#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
-
-CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
-# don't use -Ofast
-# -ftrapv
-LINKFLAGS += -O3
-
-# different libraries in Ubuntu or manajaró
-ifndef UBUNTU
-UBUNTU=1
-endif
-
-# BLAS, LAPACK
-LINKFLAGS += -llapack -lblas
-# -lopenblas
-ifeq ($(UBUNTU),1)
-# ubuntu
-else
-# on  archlinux
-LINKFLAGS += -lcblas
-endif
-
-# interprocedural optimization
-CXXFLAGS  += -flto
-LINKFLAGS += -flto
-
-#   very good check
-# http://clang.llvm.org/extra/clang-tidy/
-#   good check, see:  http://llvm.org/docs/CodingStandards.html#include-style
-SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
-SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
-#READABILITY=,readability*${SWITCH_OFF}
-#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
-TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
-#TIDYFLAGS += -checks='modernize*
-#   ???
-#TIDYFLAGS = -checks='cert*'  -header-filter=.*
-#   MPI checks ??
-#TIDYFLAGS = -checks='mpi*'
-#   ??
-#TIDYFLAGS = -checks='performance*'   -header-filter=.*
-#TIDYFLAGS = -checks='portability-*'  -header-filter=.*
-#TIDYFLAGS = -checks='readability-*'  -header-filter=.*
-
-default: ${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	@rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	@rm -f *_ *~ *.bak *.log *.out *.tar
-
-codecheck: tidy_check
-tidy_check:
-	clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
-# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
-
-run: clean ${PROGRAM}
-#	time  ./${PROGRAM} ${PARAMS}
-	./${PROGRAM} ${PARAMS}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-#    some tools
-# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
-cache: ${PROGRAM}
-	valgrind --tool=callgrind --simulate-cache=yes ./$^  ${PARAMS}
-#	kcachegrind callgrind.out.<pid> &
-	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
-
-# Check for wrong memory accesses, memory leaks, ...
-# use smaller data sets
-mem: ${PROGRAM}
-	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^  ${PARAMS}
-
-#  Simple run time profiling of your code
-#  CXXFLAGS += -g -pg
-#  LINKFLAGS += -pg
-prof: ${PROGRAM}
-	perf record ./$^  ${PARAMS}
-	perf report
-#	gprof -b ./$^ > gp.out
-#	kprof -f gp.out -p gprof &
-
-codecheck: tidy_check
--- a/ex1/ABCEFG/GCC_AMD32_default.mk
+++ b/ex1/ABCEFG/GCC_AMD32_default.mk
@ -1,130 +0,0 @@
-# Basic Defintions for using GNU-compiler suite sequentially
-# requires setting of COMPILER=GCC_
-
-CC	= gcc
-CXX     = g++
-F77	= gfortran
-LINKER  = ${CXX}
-
-# on mephisto:
-#CXXFLAGS  += -I/share/apps/atlas/include
-#LINKFLAGS += -L/share/apps/atlas/lib
-#LINKFLAGS   += -lcblas -latlas
-
-#LINKFLAGS   += -lblas
-# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
-
-
-#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual  -Wfloat-equal -Wshadow
-WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual  -Wfloat-equal -Wshadow \
-           -Wredundant-decls -Winline -fmax-errors=1
-#           -Wunreachable-code
-#  -Wunreachable-code
-CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
-#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
-#-msse3
-# -ftree-vectorizer-verbose=2  -DNDEBUG
-# -ftree-vectorizer-verbose=5
-# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump  -fdump-tree-pre=stderr
-
-# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
-# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
-# #CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-# FFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-# LFLAGS  = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-LINKFLAGS   += -O3
-
-# BLAS, LAPACK
-OPENBLAS_DIR = /opt/openblas_GCCseq
-#OPENBLAS_DIR = /opt/openblas_GCC
-OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
-OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
-CXXFLAGS += -I${OPENBLAS_INCDIR}
-LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
-
-# interprocedural optimization
-CXXFLAGS += -flto
-LINKFLAGS += -flto
-
-# profiling tools
-#CXXFLAGS  += -pg
-#LINKFLAGS += -pg
-
-default: ${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	@rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
-	-@rm -r html
-
-run: clean ${PROGRAM}
-#	time  ./${PROGRAM}
-#	./${PROGRAM}
-	( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
-#            or  'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}'  in your ~/.bashrc 
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar:
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-#    some tools
-# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
-cache: ${PROGRAM}
-	valgrind --tool=callgrind --simulate-cache=yes ./$^
-#	kcachegrind callgrind.out.<pid> &
-	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
-
-# Check for wrong memory accesses, memory leaks, ...
-# use smaller data sets
-# no "-pg"  in compile/link options
-mem: ${PROGRAM}
-	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
-
-#  Simple run time profiling of your code
-#  CXXFLAGS += -g -pg
-#  LINKFLAGS += -pg
-prof: ${PROGRAM}
-	./$^
-	gprof -b ./$^ > gp.out
-#	kprof -f gp.out -p gprof &
-
-#Trace your heap:
-#> heaptrack ./main.GCC_
-#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
-heap: ${PROGRAM}
-	heaptrack ./$^ 11
-	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
-
-
-
-########################################################################
-#  get the detailed  status of all optimization flags
-info:
-	echo "detailed  status of all optimization flags"
-	$(CXX) --version
-	$(CXX) -Q $(CXXFLAGS) --help=optimizers
--- a/ex1/ABCEFG/GCC_default.mk
+++ b/ex1/ABCEFG/GCC_default.mk
@ -1,183 +0,0 @@
-# Basic Defintions for using GNU-compiler suite sequentially
-# requires setting of COMPILER=GCC_
-
-CC	= gcc
-CXX     = g++
-F77	= gfortran
-LINKER  = ${CXX}
-
-WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-           -Wredundant-decls
-#  -Wunreachable-code
-CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
-#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
-#-msse3
-# -ftree-vectorizer-verbose=2  -DNDEBUG
-# -ftree-vectorizer-verbose=5
-# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump  -fdump-tree-pre=stderr
-
-# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
-# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
-# #CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-# FFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-# LFLAGS  = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-LINKFLAGS   += -O3
-
-#architecture
-#CPU = -march=znver2
-CXXFLAGS  += ${CPU}
-LINKFLAGS += ${CPU}
-
-# different libraries in Ubuntu or manajaró
-ifndef UBUNTU
-UBUNTU=1
-endif
-
-# BLAS, LAPACK
-ifeq ($(UBUNTU),1)
-LINKFLAGS += -llapack -lblas
-# -lopenblas
-else
-# on  archlinux
-LINKFLAGS += -llapack -lopenblas -lcblas
-endif
-
-# interprocedural optimization
-CXXFLAGS  += -flto
-LINKFLAGS += -flto
-
-# for debugging purpose (save code)
-# -fsanitize=leak         # only one out the three can be used
-# -fsanitize=address
-# -fsanitize=thread
-SANITARY =  -fsanitize=address  -fsanitize=undefined -fsanitize=null -fsanitize=return \
- -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
- -fsanitize=bool -fsanitize=enum -fsanitize=vptr
-#CXXFLAGS  += ${SANITARY}
-#LINKFLAGS += ${SANITARY}
-
-# profiling tools
-#CXXFLAGS  += -pg
-#LINKFLAGS += -pg
-
-
-default: ${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	@rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
-	-@rm -rf html
-
-run: clean ${PROGRAM}
-#run: ${PROGRAM}
-#	time  ./${PROGRAM} ${PARAMS}
-	./${PROGRAM} ${PARAMS}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-#find . -size +10M > large_files
-#--exclude-from ${MY_DIR}/large_files
-
-zip: clean
-	@echo "Zip the directory: " ${MY_DIR}
-	@cd .. ;\
-	zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-.SUFFIXES: .f90
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-#	$(CXX) -c $(CXXFLAGS) -o $@ $<  2>&1 | tee -a $<.log 
-#	$(CXX) -c $(CXXFLAGS) -o $@ $<  2>&1 | tee -a $(<:.cpp=.log)
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-.f90.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-#    some tools
-# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
-cache: ${PROGRAM}
-	valgrind --tool=callgrind --simulate-cache=yes ./$^  ${PARAMS}
-#	kcachegrind callgrind.out.<pid> &
-	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
-
-# Check for wrong memory accesses, memory leaks, ...
-# use smaller data sets
-# no "-pg"  in compile/link options
-mem: ${PROGRAM}
-	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^  ${PARAMS}
-# Graphical interface
-# valkyrie
-
-#  Simple run time profiling of your code
-#  CXXFLAGS += -g -pg
-#  LINKFLAGS += -pg
-prof: ${PROGRAM}
-	perf record ./$^  ${PARAMS}
-	perf report
-#	gprof -b ./$^ > gp.out
-#	kprof -f gp.out -p gprof &
-
-#  perf in Ubuntu 20.04:   https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
-#  * install 
-#  * sudo vi /etc/sysctl.conf
-#                add   kernel.perf_event_paranoid = 0
-
-#Trace your heap:
-#> heaptrack ./main.GCC_
-#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
-heap: ${PROGRAM}
-	heaptrack ./$^  ${PARAMS}
-	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
-
-codecheck: $(SOURCES)
-	cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
-
-
-########################################################################
-#  get the detailed  status of all optimization flags
-info:
-	echo "detailed  status of all optimization flags"
-	$(CXX) --version
-	$(CXX) -Q $(CXXFLAGS) --help=optimizers
-	lscpu
-	inxi -C
-	lstopo
-
-# Excellent hardware info
-#	hardinfo
-# Life monitoring of CPU frequency etc.
-#	sudo i7z
-
-# Memory  consumption
-#	vmstat -at -SM 3
-#	xfce4-taskmanager
-
-
-# https://www.tecmint.com/check-linux-cpu-information/
-#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
-
-# Debugging:
-# https://wiki.archlinux.org/index.php/Debugging
--- a/ex1/ABCEFG/ICC_default.mk
+++ b/ex1/ABCEFG/ICC_default.mk
@ -1,137 +0,0 @@
-# Basic Defintions for using INTEL compiler suite sequentially
-# requires setting of COMPILER=ICC_
-
-#BINDIR = /opt/intel/bin/
-
-# special on my sony [GH]
-#BINDIR = /opt/save.intel/bin/
-# very special on my sony [GH]
-# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
-
-# Error with g++-4.8 using icpc14.0,x:
-#   find directory wherein bits/c++config.h is located
-#   'locate bits/c++config.h'
-#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
-
-
-CC	= ${BINDIR}icc
-CXX     = ${BINDIR}icpc
-F77	= ${BINDIR}ifort
-LINKER  = ${CXX}
-
-
-WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
-#    -Winline -Wredundant-decls -Wunreachable-code
-CXXFLAGS +=  -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
-# profiling tools
-#CXXFLAGS  += -pg
-#LINKFLAGS += -pg
-# -vec-report=3
-# -qopt-report=5 -qopt-report-phase=vec
-# -guide -parallel
-# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
-# -auto-p32 -simd
-CXXFLAGS += -align
-
-# use MKL by INTEL
-#  https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
-# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
-# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
-#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
-LINKFLAGS += -O3 -mkl=sequential
-
-# interprocedural optimization
-CXXFLAGS += -ipo
-LINKFLAGS += -ipo
-
-# annotated assembler file
-ANNOTED = -fsource-asm -S
-
-default:	${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	@rm -f *_ *~ *.bak *.log *.out *.tar
-
-run: clean ${PROGRAM}
-	./${PROGRAM}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-# #    some tools
-# # Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
-# cache: ${PROGRAM}
-# 	valgrind --tool=callgrind --simulate-cache=yes ./$^
-# #	kcachegrind callgrind.out.<pid> &
-#
-# # Check for wrong memory accesses, memory leaks, ...
-# # use smaller data sets
-# mem: ${PROGRAM}
-# 	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
-#
-# #  Simple run time profiling of your code
-# #  CXXFLAGS += -g -pg
-# #  LINKFLAGS += -pg
-# prof: ${PROGRAM}
-# 	./$^
-# 	gprof -b ./$^ > gp.out
-# #	kprof -f gp.out -p gprof &
-#
-
-
-mem: inspector
-prof: amplifier
-cache: amplifier
-
-gap_par_report:
-	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
-
-# GUI for performance report
-amplifier: ${PROGRAM}
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-#	alternatively to the solution above:
-            #edit file  /etc/sysctl.d/10-ptrace.conf     and set variable   kernel.yama.ptrace_scope   variable to 0 .
-	amplxe-gui &
-
-# GUI for Memory and Thread analyzer (race condition)
-inspector: ${PROGRAM}
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-	inspxe-gui &
-
-advisor:
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-	advixe-gui &
-
-icc-info:
-	icpc -# main.cpp
-
-
-
-
--- a/ex1/ABCEFG/ONEAPI_default.mk
+++ b/ex1/ABCEFG/ONEAPI_default.mk
@ -1,176 +0,0 @@
-# Basic Defintions for using INTEL compiler suite sequentially
-# requires setting of COMPILER=ONEAPI_
-
-#         https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
-# requires
-# source /opt/intel/oneapi/setvars.sh
-# on  AMD:    export MKL_DEBUG_CPU_TYPE=5
-
-#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
-#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
-#export KMP_AFFINITY=verbose,compact
-
-CC	= ${BINDIR}icc
-CXX     = ${BINDIR}dpcpp
-F77	= ${BINDIR}ifort
-LINKER  = ${CXX}
-
-## Compiler flags
-WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
-WARNINGS += -Wpessimizing-move -Wredundant-move
-#-wd2015,2012,2014 -wn3
-#    -Winline -Wredundant-decls -Wunreachable-code
-# -qopt-subscript-in-range
-# -vec-threshold0
-
-CXXFLAGS += -O3 -std=c++17  ${WARNINGS}
-#CXXFLAGS += -DMKL_ILP64  -I"${MKLROOT}/include"
-#CXXFLAGS += -DMKL_ILP32  -I"${MKLROOT}/include"
-LINKFLAGS += -O3
-
-# interprocedural optimization
-CXXFLAGS  += -ipo
-LINKFLAGS += -ipo
-LINKFLAGS += -flto
-
-# annotated Assembler file
-ANNOTED = -fsource-asm -S 
-
-#architecture
-CPU  = -march=core-avx2
-#CPU += -mtp=zen
-# -xCORE-AVX2
-# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
-CXXFLAGS  += ${CPU}
-LINKFLAGS += ${CPU}
-
-# use MKL by INTEL
-# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
-# sequential MKL
-#                              use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
-CXXFLAGS +=  -qmkl=sequential  -UMKL_ILP64
-LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
-#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
-
-# shared libs:  https://aur.archlinux.org/packages/intel-oneapi-compiler-static
-#     install intel-oneapi-compiler-static   
-# or 
-LINKFLAGS += -shared-intel
-
-
-OPENMP = -qopenmp
-CXXFLAGS += ${OPENMP}
-LINKFLAGS += ${OPENMP}
-
-
-# profiling tools
-#CXXFLAGS  += -pg
-#LINKFLAGS += -pg
-# -vec-report=3
-# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
-# -guide -parallel
-# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
-# -auto-p32 -simd
-
-# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
-#CXXFLAGS +=  -qopt-report=5 -qopt-report-phase=vec,par
-#CXXFLAGS +=  -qopt-report=5 -qopt-report-phase=cg
-# Redirect report from *.optrpt to stderr
-#    -qopt-report-file=stderr
-# Guided paralellization
-#    -guide -parallel
-#    -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
-#    -auto-p32 -simd
-
-## run time checks
-# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
-
-
-default:	${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	rm -f ${PROGRAM} ${OBJECTS} *.optrpt
-
-clean_all:: clean
-	@rm -f *_ *~ *.bak *.log *.out *.tar
-
-run: clean ${PROGRAM}
-	./${PROGRAM}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-#    some tools
-# Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
-# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
-
-mem: inspector
-prof: vtune
-cache: inspector
-
-gap_par_report:
-	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
-
-# GUI for performance report
-amplifier: ${PROGRAM}
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-	echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
-	amplxe-gui &
-
-# GUI for Memory and Thread analyzer (race condition)
-inspector: ${PROGRAM}
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-#	inspxe-gui &
-	vtune-gui ./${PROGRAM} &
-
-advisor:
-	source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
-#	/opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
-	advisor --collect=survey ./${PROGRAM} 
-#	advisor --collect=roofline ./${PROGRAM} 
-	advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
-
-vtune:
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-#	https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
-	export ADVIXE_EXPERIMENTAL=roofline
-	vtune -collect hotspots ./${PROGRAM}
-	vtune -report hotspots -r r000hs > vtune.out
-#	vtune-gui ./${PROGRAM} &	
-
-icc-info:
-	icpc -# main.cpp
-
-# MKL on AMD
-# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
-#
-# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
-# export MKL_DEBUG_CPU_TYPE=5
-# export MKL_NUM_THRAEDS=1
-# export MKL_DYNAMIC=false
-#  on Intel compiler
-# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
--- a/ex1/ABCEFG/PGI_default.mk
+++ b/ex1/ABCEFG/PGI_default.mk
@ -1,93 +0,0 @@
-# Basic Defintions for using PGI-compiler suite sequentially
-# requires setting of COMPILER=PGI_
-# OPTIRUN = optirun
-
-
-CC	= pgcc
-CXX     = pgc++
-F77	= pgfortran
-LINKER  = ${CXX}
-
-# on mephisto:
-#CXXFLAGS  += -I/share/apps/atlas/include
-#LINKFLAGS += -L/share/apps/atlas/lib
-#LINKFLAGS   += -lcblas -latlas
-
-#LINKFLAGS   += -lblas
-# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
-
-WARNINGS = -Minform=warn
-# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow  -Wredundant-decls
-#           -pedantic -Wunreachable-code -Wextra -Winline
-#  -Wunreachable-code
-
-#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
-PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
-# -Minfo
-# -Mprof=time
-# -Mprof=lines
-#        take care with option      -Msafeptr
-CXXFLAGS += -O3 -std=c++17  ${WARNINGS}
-#CXXFLAGS += -O3 -std=c++11  -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
-#  -fastsse  -fargument-noalias ${WARNINGS}  -msse3 -vec-report=3
-
-default:	${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	@rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	@rm -f *_ *~ *.bak *.log *.out *.tar
-
-run: clean ${PROGRAM}
-	./${PROGRAM}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-# #    some tools
-# #  Simple run time profiling of your code
-# #  CXXFLAGS += -g -pg
-# #  LINKFLAGS += -pg
-
-
-# Profiling options PGI, see: pgcollect -help
-# CPU_PROF = -allcache
-CPU_PROF = -time
-# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
-#GPU_PROF = -cuda=branch:cc20
-#
-PROF_FILE = pgprof.out
-
-cache: prof
-
-prof: ${PROGRAM}
-	${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
-	${OPTIRUN} ${BINDIR}pgprof -exe ./$^  $(PROF_FILE) &
-
-info:
-	pgaccelinfo -v
--- a/ex1/ABCEFG/skalar_stl/Makefile
+++ b/ex1/ABCEFG/skalar_stl/Makefile
@ -1,30 +0,0 @@
-#
-# use GNU-Compiler tools
-COMPILER=GCC_
-# alternatively from the shell
-# export COMPILER=GCC_
-# or, alternatively from the shell
-# make COMPILER=GCC_
-
-# use Intel compilers
-#COMPILER=ICC_
-
-# use PGI compilers
-# COMPILER=PGI_
-
-
-SOURCES = main.cpp mylib.cpp
-OBJECTS = $(SOURCES:.cpp=.o)
-
-PROGRAM	= main.${COMPILER}
-
-# uncomment the next to lines for debugging and detailed performance analysis
-CXXFLAGS += -g
-LINKFLAGS += -g
-# do not use -pg with PGI compilers
-
-ifndef COMPILER
-  COMPILER=GCC_
-endif
-
-include ../${COMPILER}default.mk
--- a/ex1/D/.vscode/settings.json
+++ b/ex1/D/.vscode/settings.json
@ -1,60 +0,0 @@
-{
-    "files.associations": {
-        "array": "cpp",
-        "atomic": "cpp",
-        "bit": "cpp",
-        "cctype": "cpp",
-        "charconv": "cpp",
-        "chrono": "cpp",
-        "clocale": "cpp",
-        "cmath": "cpp",
-        "compare": "cpp",
-        "complex": "cpp",
-        "concepts": "cpp",
-        "cstdarg": "cpp",
-        "cstddef": "cpp",
-        "cstdint": "cpp",
-        "cstdio": "cpp",
-        "cstdlib": "cpp",
-        "cstring": "cpp",
-        "ctime": "cpp",
-        "cwchar": "cpp",
-        "cwctype": "cpp",
-        "deque": "cpp",
-        "string": "cpp",
-        "unordered_map": "cpp",
-        "vector": "cpp",
-        "exception": "cpp",
-        "algorithm": "cpp",
-        "functional": "cpp",
-        "iterator": "cpp",
-        "memory": "cpp",
-        "memory_resource": "cpp",
-        "numeric": "cpp",
-        "optional": "cpp",
-        "random": "cpp",
-        "ratio": "cpp",
-        "string_view": "cpp",
-        "system_error": "cpp",
-        "tuple": "cpp",
-        "type_traits": "cpp",
-        "utility": "cpp",
-        "format": "cpp",
-        "initializer_list": "cpp",
-        "iomanip": "cpp",
-        "iosfwd": "cpp",
-        "iostream": "cpp",
-        "istream": "cpp",
-        "limits": "cpp",
-        "new": "cpp",
-        "numbers": "cpp",
-        "ostream": "cpp",
-        "span": "cpp",
-        "sstream": "cpp",
-        "stdexcept": "cpp",
-        "streambuf": "cpp",
-        "cinttypes": "cpp",
-        "typeinfo": "cpp",
-        "variant": "cpp"
-    }
-}
--- a/ex1/D/CLANG_default.mk
+++ b/ex1/D/CLANG_default.mk
@ -1,123 +0,0 @@
-# Basic Defintions for using GNU-compiler suite sequentially
-# requires setting of COMPILER=CLANG_
-
-#CLANGPATH=//usr/lib/llvm-10/bin/
-CC     = ${CLANGPATH}clang
-CXX    = ${CLANGPATH}clang++
-#CXX   = ${CLANGPATH}clang++ -lomptarget  -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
-#F77   = gfortran
-LINKER = ${CXX}
-
-#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
-WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
-WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
-#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
-
-CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
-# don't use -Ofast
-# -ftrapv
-LINKFLAGS += -O3
-
-# different libraries in Ubuntu or manajaró
-ifndef UBUNTU
-UBUNTU=1
-endif
-
-# BLAS, LAPACK
-LINKFLAGS += -llapack -lblas
-# -lopenblas
-ifeq ($(UBUNTU),1)
-# ubuntu
-else
-# on  archlinux
-LINKFLAGS += -lcblas
-endif
-
-# interprocedural optimization
-CXXFLAGS  += -flto
-LINKFLAGS += -flto
-
-#   very good check
-# http://clang.llvm.org/extra/clang-tidy/
-#   good check, see:  http://llvm.org/docs/CodingStandards.html#include-style
-SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
-SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
-#READABILITY=,readability*${SWITCH_OFF}
-#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
-TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
-#TIDYFLAGS += -checks='modernize*
-#   ???
-#TIDYFLAGS = -checks='cert*'  -header-filter=.*
-#   MPI checks ??
-#TIDYFLAGS = -checks='mpi*'
-#   ??
-#TIDYFLAGS = -checks='performance*'   -header-filter=.*
-#TIDYFLAGS = -checks='portability-*'  -header-filter=.*
-#TIDYFLAGS = -checks='readability-*'  -header-filter=.*
-
-default: ${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	@rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	@rm -f *_ *~ *.bak *.log *.out *.tar
-
-codecheck: tidy_check
-tidy_check:
-	clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
-# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
-
-run: clean ${PROGRAM}
-#	time  ./${PROGRAM} ${PARAMS}
-	./${PROGRAM} ${PARAMS}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-#    some tools
-# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
-cache: ${PROGRAM}
-	valgrind --tool=callgrind --simulate-cache=yes ./$^  ${PARAMS}
-#	kcachegrind callgrind.out.<pid> &
-	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
-
-# Check for wrong memory accesses, memory leaks, ...
-# use smaller data sets
-mem: ${PROGRAM}
-	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^  ${PARAMS}
-
-#  Simple run time profiling of your code
-#  CXXFLAGS += -g -pg
-#  LINKFLAGS += -pg
-prof: ${PROGRAM}
-	perf record ./$^  ${PARAMS}
-	perf report
-#	gprof -b ./$^ > gp.out
-#	kprof -f gp.out -p gprof &
-
-codecheck: tidy_check
--- a/ex1/D/GCC_AMD32_default.mk
+++ b/ex1/D/GCC_AMD32_default.mk
@ -1,130 +0,0 @@
-# Basic Defintions for using GNU-compiler suite sequentially
-# requires setting of COMPILER=GCC_
-
-CC	= gcc
-CXX     = g++
-F77	= gfortran
-LINKER  = ${CXX}
-
-# on mephisto:
-#CXXFLAGS  += -I/share/apps/atlas/include
-#LINKFLAGS += -L/share/apps/atlas/lib
-#LINKFLAGS   += -lcblas -latlas
-
-#LINKFLAGS   += -lblas
-# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
-
-
-#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual  -Wfloat-equal -Wshadow
-WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual  -Wfloat-equal -Wshadow \
-           -Wredundant-decls -Winline -fmax-errors=1
-#           -Wunreachable-code
-#  -Wunreachable-code
-CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
-#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
-#-msse3
-# -ftree-vectorizer-verbose=2  -DNDEBUG
-# -ftree-vectorizer-verbose=5
-# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump  -fdump-tree-pre=stderr
-
-# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
-# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
-# #CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-# FFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-# LFLAGS  = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-LINKFLAGS   += -O3
-
-# BLAS, LAPACK
-OPENBLAS_DIR = /opt/openblas_GCCseq
-#OPENBLAS_DIR = /opt/openblas_GCC
-OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
-OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
-CXXFLAGS += -I${OPENBLAS_INCDIR}
-LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
-
-# interprocedural optimization
-CXXFLAGS += -flto
-LINKFLAGS += -flto
-
-# profiling tools
-#CXXFLAGS  += -pg
-#LINKFLAGS += -pg
-
-default: ${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	@rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
-	-@rm -r html
-
-run: clean ${PROGRAM}
-#	time  ./${PROGRAM}
-#	./${PROGRAM}
-	( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
-#            or  'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}'  in your ~/.bashrc 
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar:
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-#    some tools
-# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
-cache: ${PROGRAM}
-	valgrind --tool=callgrind --simulate-cache=yes ./$^
-#	kcachegrind callgrind.out.<pid> &
-	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
-
-# Check for wrong memory accesses, memory leaks, ...
-# use smaller data sets
-# no "-pg"  in compile/link options
-mem: ${PROGRAM}
-	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
-
-#  Simple run time profiling of your code
-#  CXXFLAGS += -g -pg
-#  LINKFLAGS += -pg
-prof: ${PROGRAM}
-	./$^
-	gprof -b ./$^ > gp.out
-#	kprof -f gp.out -p gprof &
-
-#Trace your heap:
-#> heaptrack ./main.GCC_
-#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
-heap: ${PROGRAM}
-	heaptrack ./$^ 11
-	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
-
-
-
-########################################################################
-#  get the detailed  status of all optimization flags
-info:
-	echo "detailed  status of all optimization flags"
-	$(CXX) --version
-	$(CXX) -Q $(CXXFLAGS) --help=optimizers
--- a/ex1/D/GCC_default.mk
+++ b/ex1/D/GCC_default.mk
@ -1,183 +0,0 @@
-# Basic Defintions for using GNU-compiler suite sequentially
-# requires setting of COMPILER=GCC_
-
-CC	= gcc
-CXX     = g++
-F77	= gfortran
-LINKER  = ${CXX}
-
-WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-           -Wredundant-decls -Winline -fmax-errors=1
-#  -Wunreachable-code
-CXXFLAGS += -ffast-math -O1 -march=native -std=c++17 ${WARNINGS}
-#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
-#-msse3
-# -ftree-vectorizer-verbose=2  -DNDEBUG
-# -ftree-vectorizer-verbose=5
-# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump  -fdump-tree-pre=stderr
-
-# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
-# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
-# #CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-# FFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-# LFLAGS  = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
-LINKFLAGS   += -O1
-
-#architecture
-#CPU = -march=znver2
-CXXFLAGS  += ${CPU}
-LINKFLAGS += ${CPU}
-
-# different libraries in Ubuntu or manajaró
-ifndef UBUNTU
-UBUNTU=1
-endif
-
-# BLAS, LAPACK
-ifeq ($(UBUNTU),1)
-LINKFLAGS += -llapack -lblas
-# -lopenblas
-else
-# on  archlinux
-LINKFLAGS += -llapack -lopenblas -lcblas
-endif
-
-# interprocedural optimization
-CXXFLAGS  += -flto
-LINKFLAGS += -flto
-
-# for debugging purpose (save code)
-# -fsanitize=leak         # only one out the three can be used
-# -fsanitize=address
-# -fsanitize=thread
-SANITARY =  -fsanitize=address  -fsanitize=undefined -fsanitize=null -fsanitize=return \
- -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
- -fsanitize=bool -fsanitize=enum -fsanitize=vptr
-#CXXFLAGS  += ${SANITARY}
-#LINKFLAGS += ${SANITARY}
-
-# profiling tools
-#CXXFLAGS  += -pg
-#LINKFLAGS += -pg
-
-
-default: ${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	@rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
-	-@rm -rf html
-
-run: clean ${PROGRAM}
-#run: ${PROGRAM}
-#	time  ./${PROGRAM} ${PARAMS}
-	./${PROGRAM} ${PARAMS}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-#find . -size +10M > large_files
-#--exclude-from ${MY_DIR}/large_files
-
-zip: clean
-	@echo "Zip the directory: " ${MY_DIR}
-	@cd .. ;\
-	zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-.SUFFIXES: .f90
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-#	$(CXX) -c $(CXXFLAGS) -o $@ $<  2>&1 | tee -a $<.log 
-#	$(CXX) -c $(CXXFLAGS) -o $@ $<  2>&1 | tee -a $(<:.cpp=.log)
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-.f90.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-#    some tools
-# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
-cache: ${PROGRAM}
-	valgrind --tool=callgrind --simulate-cache=yes ./$^  ${PARAMS}
-#	kcachegrind callgrind.out.<pid> &
-	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
-
-# Check for wrong memory accesses, memory leaks, ...
-# use smaller data sets
-# no "-pg"  in compile/link options
-mem: ${PROGRAM}
-	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^  ${PARAMS}
-# Graphical interface
-# valkyrie
-
-#  Simple run time profiling of your code
-#  CXXFLAGS += -g -pg
-#  LINKFLAGS += -pg
-prof: ${PROGRAM}
-	perf record ./$^  ${PARAMS}
-	perf report
-#	gprof -b ./$^ > gp.out
-#	kprof -f gp.out -p gprof &
-
-#  perf in Ubuntu 20.04:   https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
-#  * install 
-#  * sudo vi /etc/sysctl.conf
-#                add   kernel.perf_event_paranoid = 0
-
-#Trace your heap:
-#> heaptrack ./main.GCC_
-#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
-heap: ${PROGRAM}
-	heaptrack ./$^  ${PARAMS}
-	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
-
-codecheck: $(SOURCES)
-	cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
-
-
-########################################################################
-#  get the detailed  status of all optimization flags
-info:
-	echo "detailed  status of all optimization flags"
-	$(CXX) --version
-	$(CXX) -Q $(CXXFLAGS) --help=optimizers
-	lscpu
-	inxi -C
-	lstopo
-
-# Excellent hardware info
-#	hardinfo
-# Life monitoring of CPU frequency etc.
-#	sudo i7z
-
-# Memory  consumption
-#	vmstat -at -SM 3
-#	xfce4-taskmanager
-
-
-# https://www.tecmint.com/check-linux-cpu-information/
-#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
-
-# Debugging:
-# https://wiki.archlinux.org/index.php/Debugging
--- a/ex1/D/ICC_default.mk
+++ b/ex1/D/ICC_default.mk
@ -1,137 +0,0 @@
-# Basic Defintions for using INTEL compiler suite sequentially
-# requires setting of COMPILER=ICC_
-
-#BINDIR = /opt/intel/bin/
-
-# special on my sony [GH]
-#BINDIR = /opt/save.intel/bin/
-# very special on my sony [GH]
-# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
-
-# Error with g++-4.8 using icpc14.0,x:
-#   find directory wherein bits/c++config.h is located
-#   'locate bits/c++config.h'
-#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
-
-
-CC	= ${BINDIR}icc
-CXX     = ${BINDIR}icpc
-F77	= ${BINDIR}ifort
-LINKER  = ${CXX}
-
-
-WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
-#    -Winline -Wredundant-decls -Wunreachable-code
-CXXFLAGS +=  -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
-# profiling tools
-#CXXFLAGS  += -pg
-#LINKFLAGS += -pg
-# -vec-report=3
-# -qopt-report=5 -qopt-report-phase=vec
-# -guide -parallel
-# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
-# -auto-p32 -simd
-CXXFLAGS += -align
-
-# use MKL by INTEL
-#  https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
-# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
-# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
-#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
-LINKFLAGS += -O3 -mkl=sequential
-
-# interprocedural optimization
-CXXFLAGS += -ipo
-LINKFLAGS += -ipo
-
-# annotated assembler file
-ANNOTED = -fsource-asm -S
-
-default:	${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	@rm -f *_ *~ *.bak *.log *.out *.tar
-
-run: clean ${PROGRAM}
-	./${PROGRAM}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-# #    some tools
-# # Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
-# cache: ${PROGRAM}
-# 	valgrind --tool=callgrind --simulate-cache=yes ./$^
-# #	kcachegrind callgrind.out.<pid> &
-#
-# # Check for wrong memory accesses, memory leaks, ...
-# # use smaller data sets
-# mem: ${PROGRAM}
-# 	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
-#
-# #  Simple run time profiling of your code
-# #  CXXFLAGS += -g -pg
-# #  LINKFLAGS += -pg
-# prof: ${PROGRAM}
-# 	./$^
-# 	gprof -b ./$^ > gp.out
-# #	kprof -f gp.out -p gprof &
-#
-
-
-mem: inspector
-prof: amplifier
-cache: amplifier
-
-gap_par_report:
-	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
-
-# GUI for performance report
-amplifier: ${PROGRAM}
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-#	alternatively to the solution above:
-            #edit file  /etc/sysctl.d/10-ptrace.conf     and set variable   kernel.yama.ptrace_scope   variable to 0 .
-	amplxe-gui &
-
-# GUI for Memory and Thread analyzer (race condition)
-inspector: ${PROGRAM}
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-	inspxe-gui &
-
-advisor:
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-	advixe-gui &
-
-icc-info:
-	icpc -# main.cpp
-
-
-
-
--- a/ex1/D/ONEAPI_default.mk
+++ b/ex1/D/ONEAPI_default.mk
@ -1,176 +0,0 @@
-# Basic Defintions for using INTEL compiler suite sequentially
-# requires setting of COMPILER=ONEAPI_
-
-#         https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
-# requires
-# source /opt/intel/oneapi/setvars.sh
-# on  AMD:    export MKL_DEBUG_CPU_TYPE=5
-
-#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
-#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
-#export KMP_AFFINITY=verbose,compact
-
-CC	= ${BINDIR}icc
-CXX     = ${BINDIR}dpcpp
-F77	= ${BINDIR}ifort
-LINKER  = ${CXX}
-
-## Compiler flags
-WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
-WARNINGS += -Wpessimizing-move -Wredundant-move
-#-wd2015,2012,2014 -wn3
-#    -Winline -Wredundant-decls -Wunreachable-code
-# -qopt-subscript-in-range
-# -vec-threshold0
-
-CXXFLAGS += -O3 -std=c++17  ${WARNINGS}
-#CXXFLAGS += -DMKL_ILP64  -I"${MKLROOT}/include"
-#CXXFLAGS += -DMKL_ILP32  -I"${MKLROOT}/include"
-LINKFLAGS += -O3
-
-# interprocedural optimization
-CXXFLAGS  += -ipo
-LINKFLAGS += -ipo
-LINKFLAGS += -flto
-
-# annotated Assembler file
-ANNOTED = -fsource-asm -S 
-
-#architecture
-CPU  = -march=core-avx2
-#CPU += -mtp=zen
-# -xCORE-AVX2
-# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
-CXXFLAGS  += ${CPU}
-LINKFLAGS += ${CPU}
-
-# use MKL by INTEL
-# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
-# sequential MKL
-#                              use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
-CXXFLAGS +=  -qmkl=sequential  -UMKL_ILP64
-LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
-#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
-
-# shared libs:  https://aur.archlinux.org/packages/intel-oneapi-compiler-static
-#     install intel-oneapi-compiler-static   
-# or 
-LINKFLAGS += -shared-intel
-
-
-OPENMP = -qopenmp
-CXXFLAGS += ${OPENMP}
-LINKFLAGS += ${OPENMP}
-
-
-# profiling tools
-#CXXFLAGS  += -pg
-#LINKFLAGS += -pg
-# -vec-report=3
-# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
-# -guide -parallel
-# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
-# -auto-p32 -simd
-
-# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
-#CXXFLAGS +=  -qopt-report=5 -qopt-report-phase=vec,par
-#CXXFLAGS +=  -qopt-report=5 -qopt-report-phase=cg
-# Redirect report from *.optrpt to stderr
-#    -qopt-report-file=stderr
-# Guided paralellization
-#    -guide -parallel
-#    -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
-#    -auto-p32 -simd
-
-## run time checks
-# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
-
-
-default:	${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	rm -f ${PROGRAM} ${OBJECTS} *.optrpt
-
-clean_all:: clean
-	@rm -f *_ *~ *.bak *.log *.out *.tar
-
-run: clean ${PROGRAM}
-	./${PROGRAM}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-#    some tools
-# Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
-# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
-
-mem: inspector
-prof: vtune
-cache: inspector
-
-gap_par_report:
-	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
-
-# GUI for performance report
-amplifier: ${PROGRAM}
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-	echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
-	amplxe-gui &
-
-# GUI for Memory and Thread analyzer (race condition)
-inspector: ${PROGRAM}
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-#	inspxe-gui &
-	vtune-gui ./${PROGRAM} &
-
-advisor:
-	source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
-#	/opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
-	advisor --collect=survey ./${PROGRAM} 
-#	advisor --collect=roofline ./${PROGRAM} 
-	advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
-
-vtune:
-	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
-#	https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
-	export ADVIXE_EXPERIMENTAL=roofline
-	vtune -collect hotspots ./${PROGRAM}
-	vtune -report hotspots -r r000hs > vtune.out
-#	vtune-gui ./${PROGRAM} &	
-
-icc-info:
-	icpc -# main.cpp
-
-# MKL on AMD
-# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
-#
-# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
-# export MKL_DEBUG_CPU_TYPE=5
-# export MKL_NUM_THRAEDS=1
-# export MKL_DYNAMIC=false
-#  on Intel compiler
-# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
--- a/ex1/D/PGI_default.mk
+++ b/ex1/D/PGI_default.mk
@ -1,93 +0,0 @@
-# Basic Defintions for using PGI-compiler suite sequentially
-# requires setting of COMPILER=PGI_
-# OPTIRUN = optirun
-
-
-CC	= pgcc
-CXX     = pgc++
-F77	= pgfortran
-LINKER  = ${CXX}
-
-# on mephisto:
-#CXXFLAGS  += -I/share/apps/atlas/include
-#LINKFLAGS += -L/share/apps/atlas/lib
-#LINKFLAGS   += -lcblas -latlas
-
-#LINKFLAGS   += -lblas
-# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
-
-WARNINGS = -Minform=warn
-# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow  -Wredundant-decls
-#           -pedantic -Wunreachable-code -Wextra -Winline
-#  -Wunreachable-code
-
-#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
-PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
-# -Minfo
-# -Mprof=time
-# -Mprof=lines
-#        take care with option      -Msafeptr
-CXXFLAGS += -O3 -std=c++17  ${WARNINGS}
-#CXXFLAGS += -O3 -std=c++11  -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
-#  -fastsse  -fargument-noalias ${WARNINGS}  -msse3 -vec-report=3
-
-default:	${PROGRAM}
-
-${PROGRAM}:	${OBJECTS}
-	$(LINKER)  $^  ${LINKFLAGS} -o $@
-
-clean:
-	@rm -f ${PROGRAM} ${OBJECTS}
-
-clean_all:: clean
-	@rm -f *_ *~ *.bak *.log *.out *.tar
-
-run: clean ${PROGRAM}
-	./${PROGRAM}
-
-# tar the current directory
-MY_DIR = `basename ${PWD}`
-tar: clean_all
-	@echo "Tar the directory: " ${MY_DIR}
-	@cd .. ;\
-	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
-	cd ${MY_DIR}
-# 	tar cf `basename ${PWD}`.tar *
-
-doc:
-	doxygen Doxyfile
-
-#########################################################################
-
-.cpp.o:
-	$(CXX) -c $(CXXFLAGS) -o $@ $<
-
-.c.o:
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-.f.o:
-	$(F77) -c $(FFLAGS) -o $@ $<
-
-##################################################################################################
-# #    some tools
-# #  Simple run time profiling of your code
-# #  CXXFLAGS += -g -pg
-# #  LINKFLAGS += -pg
-
-
-# Profiling options PGI, see: pgcollect -help
-# CPU_PROF = -allcache
-CPU_PROF = -time
-# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
-#GPU_PROF = -cuda=branch:cc20
-#
-PROF_FILE = pgprof.out
-
-cache: prof
-
-prof: ${PROGRAM}
-	${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
-	${OPTIRUN} ${BINDIR}pgprof -exe ./$^  $(PROF_FILE) &
-
-info:
-	pgaccelinfo -v
--- a/ex1/D/skalar_stl/Doxyfile
+++ b/ex1/D/skalar_stl/Doxyfile
--- a/ex1/D/skalar_stl/Makefile
+++ b/ex1/D/skalar_stl/Makefile
@ -1,30 +0,0 @@
-#
-# use GNU-Compiler tools
-COMPILER=GCC_
-# alternatively from the shell
-# export COMPILER=GCC_
-# or, alternatively from the shell
-# make COMPILER=GCC_
-
-# use Intel compilers
-#COMPILER=ICC_
-
-# use PGI compilers
-# COMPILER=PGI_
-
-
-SOURCES = main.cpp mylib.cpp
-OBJECTS = $(SOURCES:.cpp=.o)
-
-PROGRAM	= main.${COMPILER}
-
-# uncomment the next to lines for debugging and detailed performance analysis
-CXXFLAGS += -g
-LINKFLAGS += -g
-# do not use -pg with PGI compilers
-
-ifndef COMPILER
-  COMPILER=GCC_
-endif
-
-include ../${COMPILER}default.mk
--- a/ex1/D/skalar_stl/main.cpp
+++ b/ex1/D/skalar_stl/main.cpp
@ -1,124 +0,0 @@
-#include "mylib.h"
-#include <cassert>
-#include <chrono>           // timing
-#include <cmath>            // sqrt()
-#include <cstdlib>          // atoi()
-#include <cstring>          // strncmp()
-#include <ctime>
-#include <iostream>
-#include <sstream>
-using namespace std;
-using namespace std::chrono;  // timing
-
-int main(int argc, char **argv)
-{
-    int const NLOOPS = 50;        // chose a value such that the benchmark runs at least 10 sec.
-    unsigned int N = 50000001;
-//##########################################################################
-//   Read Paramater from command line  (C++ style)
-    cout << "Checking command line parameters for: -n <number> " << endl;
-    for (int i = 1; i < argc; i++)
-    {
-        cout << " arg[" << i << "] = " << argv[i] << endl;
-        if (std::strncmp(argv[i], "-n", 2) == 0 && i + 1 < argc) // found "-n" followed by another parameter
-        {
-            N = static_cast<unsigned int>(atoi(argv[i + 1]));
-        }
-        else
-        {
-            cout << "Corect call: " << argv[0] << " -n  <number>\n";
-        }
-    }
-
-    cout << "\nN = " << N << endl;
-
-//##########################################################################
-//  Memory allocation
-    cout << "Memory allocation\n";
-
-    vector<double> x(N), y(N);
-
-    cout.precision(2);
-    cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
-    cout.precision(6);
-
-//##########################################################################
-//  Data initialization
-//  Special:  x_i = i+1;  y_i = 1/x_i  ==> <x,y> == N
-    for (unsigned int i = 0; i < N; ++i)
-    {
-        x[i] = i + 1;
-        y[i] = 1.0 / pow(x[i], 2);
-    }
-
-//##########################################################################
-    cout << "\nStart Benchmarking Normal sum\n";
-
-// Do calculation
-    auto t1 = system_clock::now(); // start timer
-    double sk1(0.0),ss(0.0);
-    for (int i = 0; i < NLOOPS; ++i)
-    {
-        sk1 = normal_sum(y);
-        ss += sk1;                   // prevents the optimizer from removing unused calculation results.
-    }
-
-    auto t2 = system_clock::now();  // stop timer
-    auto duration = duration_cast<microseconds>(t2 - t1);        // duration in microseconds
-    double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
-    t_diff = t_diff/NLOOPS;     
-
-
-// Print result
-    printf("\nSum = %.16f\n", sk1);
-    
-//##########################################################################
-
-// Timings  and Performance
-    cout << endl;
-    cout.precision(2);
-    cout << "Timing in sec. : " << t_diff << endl;
-    cout << "GFLOPS         : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
-    cout << "GiByte/s       : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
-
-//##########################################################################
-
-    cout << "\nStart Benchmarking Kahan summation\n";
-
-// Do calculation
-    t1 = system_clock::now(); // start timer
-    double sk2(0.0),sss(0.0);
-    for (int i = 0; i < NLOOPS; ++i)
-    {
-        sk2 = Kahan_skalar(y);
-        sss += sk2;                   // prevents the optimizer from removing unused calculation results.
-    }
-
-    t2 = system_clock::now();  // stop timer
-    duration = duration_cast<microseconds>(t2 - t1);        // duration in microseconds
-    t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
-    t_diff = t_diff/NLOOPS;                                      // duration per loop seconds
-                                 // duration per loop seconds
-
-// Print result
-    printf("\nSum = %.16f\n", sk2);
-    
-
-//##########################################################################
-
-// Timings  and Performance
-    cout << endl;
-    cout.precision(2);
-    cout << "Timing in sec. : " << t_diff << endl;
-    cout << "GFLOPS         : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
-    cout << "GiByte/s       : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
-
-//##########################################################################
-
-// Print limit
-    printf("\nLimit = %.16f\n\n", pow(M_PI,2) / 6.0f);
-
-//##########################################################################
-
-    return 0;
-}  // memory for x and y will be deallocated by their destructors
--- a/ex1/D/skalar_stl/mylib.cpp
+++ b/ex1/D/skalar_stl/mylib.cpp
@ -1,29 +0,0 @@
-#include "mylib.h"
-#include <cassert>       // assert()
-#include <cmath>
-#include <vector>
-
-using namespace std;
-
-long double Kahan_skalar(vector<double> const &input)
-{
-    long double sum = 0.0;
-    long double c = 0.0;
-
-    for (long unsigned int i=0; i<input.size(); i++){
-        long double y = input[i] - c;
-        long double t = sum + y;
-        c = (t-sum) - y;
-        sum = t;
-    }
-    return sum;
-}
-
-long double normal_sum(vector<double> const &input)
-{
-    long double sum = 0.0;
-    for (long unsigned int i=0; i<input.size(); i++){
-        sum += input[i];
-    }
-    return sum;
-}
--- a/ex1/D/skalar_stl/mylib.h
+++ b/ex1/D/skalar_stl/mylib.h
@ -1,6 +0,0 @@
-#pragma once
-#include <vector>
-
-long double Kahan_skalar(std::vector<double> const &input);
-
-long double normal_sum(std::vector<double> const &input);
--- a/ex1/D/skalar_stl/small_Doxyfile
+++ b/ex1/D/skalar_stl/small_Doxyfile
--- a/ex1/code/Makefile
+++ b/ex1/code/Makefile
@ -0,0 +1,30 @@
+PROGRAM	= main
+
+SOURCES = main.cpp mylib.cpp
+OBJECTS = ${SOURCES:.cpp=.o}
+
+CXX     = g++
+LINKER  = g++
+
+WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
+           -Wredundant-decls -fmax-errors=1
+
+CXXFLAGS   = -g -flto -O3 -ffast-math -march=native ${WARNINGS}
+LINKFLAGS  = -g -flto -O3
+
+
+all: ${PROGRAM}
+
+%.o: %.cpp
+	${CXX} ${CXXFLAGS} -c $< -o $@
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER) ${OBJECTS} ${LINKFLAGS} -o ${PROGRAM}
+
+clean:
+	rm -f ${OBJECTS} ${PROGRAM}
+
+
+run: ${PROGRAM}
+# run: clean ${PROGRAM}
+	./${PROGRAM}
--- a/ex1/ABCEFG/skalar_stl/data_1.txt
+++ b/ex1/ABCEFG/skalar_stl/data_1.txt
--- a/ex1/ABCEFG/skalar_stl/main.cpp
+++ b/ex1/ABCEFG/skalar_stl/main.cpp
@ -1,3 +1,7 @@
+// g++ *.cpp -o main
+// g++ -g -ffast-math -O3 -march=native -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -fmax-errors=1 *.cpp -o main
+
+
 #include "mylib.h"
 #include "timing.h"

@ -12,6 +16,7 @@
 #include <list>
 #include <stdexcept>
 using namespace std;
+using namespace std::chrono;  // timing

 static void task_a() {
    printf("\n\n-------------- Task A --------------\n\n");
@ -98,11 +103,106 @@ static void task_c() {
        }
 }

+#ifdef __GNUC__
+#pragma GCC push_options
+#pragma GCC optimize("O1")
+#endif
+
 static void task_d() {
    printf("\n\n-------------- Task D --------------\n\n");
-    printf("See folder D.\n");
+    int const NLOOPS = 25;        // chose a value such that the benchmark runs at least 10 sec.
+    unsigned int N = 50000001;
+    //##########################################################################
+    //  Memory allocation
+        cout << "Memory allocation\n";
+
+        vector<double> x(N), y(N);
+
+        cout.precision(2);
+        cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
+        cout.precision(6);
+
+    //##########################################################################
+    //  Data initialization
+    //  Special:  x_i = i+1;  y_i = 1/x_i  ==> <x,y> == N
+        for (unsigned int i = 0; i < N; ++i)
+        {
+            x[i] = i + 1;
+            y[i] = 1.0 / pow(x[i], 2);
+        }
+
+    //##########################################################################
+        cout << "\nStart Benchmarking Normal sum\n";
+
+    // Do calculation
+        auto t1 = system_clock::now(); // start timer
+        double sk1(0.0),ss(0.0);
+        for (int i = 0; i < NLOOPS; ++i)
+        {
+            sk1 = normal_sum(y);
+            ss += sk1;                   // prevents the optimizer from removing unused calculation results.
+        }
+
+        auto t2 = system_clock::now();  // stop timer
+        auto duration = duration_cast<microseconds>(t2 - t1);        // duration in microseconds
+        double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
+        t_diff = t_diff/NLOOPS;     
+
+
+    // Print result
+        printf("\nSum = %.16f\n", sk1);
+        
+    //##########################################################################
+
+    // Timings  and Performance
+        cout << endl;
+        cout.precision(2);
+        cout << "Timing in sec. : " << t_diff << endl;
+        cout << "GFLOPS         : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
+        cout << "GiByte/s       : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
+
+    //##########################################################################
+
+        cout << "\nStart Benchmarking Kahan summation\n";
+
+    // Do calculation
+        t1 = system_clock::now(); // start timer
+        double sk2(0.0),sss(0.0);
+        for (int i = 0; i < NLOOPS; ++i)
+        {
+            sk2 = Kahan_skalar(y);
+            sss += sk2;                   // prevents the optimizer from removing unused calculation results.
+        }
+
+        t2 = system_clock::now();  // stop timer
+        duration = duration_cast<microseconds>(t2 - t1);        // duration in microseconds
+        t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
+        t_diff = t_diff/NLOOPS;                                      // duration per loop seconds
+                                    // duration per loop seconds
+
+    // Print result
+        printf("\nSum = %.16f\n", sk2);
+        
+
+    //##########################################################################
+
+    // Timings  and Performance
+        cout << endl;
+        cout.precision(2);
+        cout << "Timing in sec. : " << t_diff << endl;
+        cout << "GFLOPS         : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
+        cout << "GiByte/s       : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
+
+    //##########################################################################
+
+    // Print limit
+    printf("\nLimit = %.16f\n\n", pow(M_PI,2) / 6.0f);
 }

+#ifdef __GNUC__
+#pragma GCC pop_options
+#endif
+
 static void task_e() {
    printf("\n\n-------------- Task E --------------\n\n");

--- a/ex1/ABCEFG/skalar_stl/mayer_primes.h
+++ b/ex1/ABCEFG/skalar_stl/mayer_primes.h
--- a/ex1/ABCEFG/skalar_stl/mylib.cpp
+++ b/ex1/ABCEFG/skalar_stl/mylib.cpp
@ -126,6 +126,40 @@ double formula(int n)
    return sum;
 }

+// -------------- Task D --------------
+
+#ifdef __GNUC__
+#pragma GCC push_options
+#pragma GCC optimize("O1")
+#endif
+
+long double Kahan_skalar(vector<double> const &input)
+{
+    long double sum = 0.0;
+    long double c = 0.0;
+
+    for (long unsigned int i=0; i<input.size(); i++){
+        long double y = input[i] - c;
+        long double t = sum + y;
+        c = (t-sum) - y;
+        sum = t;
+    }
+    return sum;
+}
+
+long double normal_sum(vector<double> const &input)
+{
+    long double sum = 0.0;
+    for (long unsigned int i=0; i<input.size(); i++){
+        sum += input[i];
+    }
+    return sum;
+}
+
+#ifdef __GNUC__
+#pragma GCC pop_options
+#endif
+
 // -------------- Task E --------------

 void insert_into_vector(vector<int>& vec, int n) {
--- a/ex1/ABCEFG/skalar_stl/mylib.h
+++ b/ex1/ABCEFG/skalar_stl/mylib.h
@ -61,6 +61,12 @@ long int sum_of_spec(int n);
 // Sums up all positive integers less or equal n which are multiples of 3 or of 5 (including or!) by inclusion-exclusion principle. 
 double formula(int n);

+// -------------- Task D --------------
+
+long double Kahan_skalar(std::vector<double> const &input);
+
+long double normal_sum(std::vector<double> const &input);
+
 // -------------- Task E --------------

 // Inserts n random numbers into sorted vector v such that v remains sorted.
--- a/ex1/ABCEFG/skalar_stl/timing.h
+++ b/ex1/ABCEFG/skalar_stl/timing.h
--- a/ex1/ex1_results.txt
+++ b/ex1/ex1_results.txt
@ -0,0 +1,104 @@
+
+
+-------------- Task A --------------
+
+means(1,4,16)          = (7.000000, 4.000000, 2.285714)
+means(2,3,5)           = (3.333333, 3.107233, 2.903226)
+means(1000,4000,16000) = (7000.000000, 4000.000000, 2285.714286)
+means(4,8,15,16,23,42) = (18.000000, 13.965497, 10.499524)
+
+
+-------------- Task B --------------
+
+Minimum: 1.000000
+Maximum: 1000.000000
+Arithmetic: 498.184000
+Geometric: 364.411859
+Harmonic: 95.685690
+Deviation: 287.905085
+
+
+-------------- Task C --------------
+
+n = 15
+For-loop funtion: result = 60 | time = 0.018645 milliseconds
+Formula funtion:  result = 60 | time = 0.000038 milliseconds
+n = 1001
+For-loop funtion: result = 234168 | time = 1.219296 milliseconds
+Formula funtion:  result = 234168 | time = 0.000039 milliseconds
+n = 1432987
+For-loop funtion: result = 479139074204 | time = 1625.893479 milliseconds
+Formula funtion:  result = 479139074204 | time = 0.000106 milliseconds
+
+
+-------------- Task D --------------
+
+Memory allocation
+0.75 GByte Memory allocated
+
+Start Benchmarking Normal sum
+
+Sum = 1.6449340468482272
+
+Timing in sec. : 0.36
+GFLOPS         : 0.26
+GiByte/s       : 2.1
+
+Start Benchmarking Kahan summation
+
+Sum = 1.6449340468482272
+
+Timing in sec. : 0.31
+GFLOPS         : 0.3
+GiByte/s       : 2.4
+
+Limit = 1.6449340668482264
+
+
+
+-------------- Task E --------------
+
+Vector insertion time for n = 100: 26 microseconds.
+List insertion time for   n = 100: 45 microseconds.
+Vector insertion time for n = 1000: 121 microseconds.
+List insertion time for   n = 1000: 4059 microseconds.
+Vector insertion time for n = 10000: 5932 microseconds.
+List insertion time for   n = 10000: 568042 microseconds.
+
+
+-------------- Task F --------------
+
+single_goldbach(k = 694) = 19
+
+Decompositions for k = 694: 3 + 691, 11 + 683, 17 + 677, 41 + 653, 47 + 647, 53 + 641, 101 + 593, 107 + 587, 131 + 563, 137 + 557, 173 + 521, 191 + 503, 227 + 467, 233 + 461, 251 + 443, 263 + 431, 293 + 401, 311 + 383, 347 + 347, 
+
+NOTE: For n=2'000'000 it will take ~30 seconds.
+count_goldbach(n = 10000): k = 9240, decompositions = 329, time elapsed: 1.055767 milliseconds
+count_goldbach(n = 100000): k = 99330, decompositions = 2168, time elapsed: 42.025990 milliseconds
+count_goldbach(n = 400000): k = 390390, decompositions = 7094, time elapsed: 530.372813 milliseconds
+count_goldbach(n = 1000000): k = 990990, decompositions = 15594, time elapsed: 3973.183267 milliseconds
+
+
+-------------- Task G --------------
+
+M = 
+0.000045 0.000508 0.003346 
+0.000508 0.005754 0.037929 
+0.003346 0.037929 0.250000 
+0.006185 0.070104 0.462071 
+0.006648 0.075350 0.496654 
+
+u       = 1 2 3 
+M * u   = 0.011099 0.1258 0.8292 1.5326 1.6473 
+v       = -1 2 -3 4 -5 
+M^T * v = -0.017568 -0.19912 -1.3125 
+
+Results for 3000x3000 matrix vector multiplication doing 100 loops
+Time for initialization: 0.392896 seconds.
+Time for Mult          : 0.996829 seconds, 0.009968 per loop.
+Time for MultT         : 6.502881 seconds, 0.065029 per loop.
+
+Results for 3000x3000 matrix vector multiplication doing 100 loops taking advantage of tensor product structure of the matrix
+Time for initialization: 0.000088 seconds.
+Time for Mult          : 0.000164 seconds, 0.000002 per loop.
+Time for MultT         : 0.000197 seconds, 0.000002 per loop.
--- a/ex1/ex1_sheet.pdf
+++ b/ex1/ex1_sheet.pdf