diff --git a/Sheet5/Ex2_Second_Attempt/CLANG_default.mk b/Sheet5/Ex2_Second_Attempt/CLANG_default.mk new file mode 100644 index 0000000..4bc290d --- /dev/null +++ b/Sheet5/Ex2_Second_Attempt/CLANG_default.mk @@ -0,0 +1,123 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=CLANG_ + +#CLANGPATH=//usr/lib/llvm-10/bin/ +CC = ${CLANGPATH}clang +CXX = ${CLANGPATH}clang++ +#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0 +#F77 = gfortran +LINKER = ${CXX} + +#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages +WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1 +WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic +#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic + +CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS} +# don't use -Ofast +# -ftrapv +LINKFLAGS += -O3 + +# different libraries in Ubuntu or manajaró +ifndef UBUNTU +UBUNTU=1 +endif + +# BLAS, LAPACK +LINKFLAGS += -llapack -lblas +# -lopenblas +ifeq ($(UBUNTU),1) +# ubuntu +else +# on archlinux +LINKFLAGS += -lcblas +endif + +# interprocedural optimization +CXXFLAGS += -flto +LINKFLAGS += -flto + +# very good check +# http://clang.llvm.org/extra/clang-tidy/ +# good check, see: http://llvm.org/docs/CodingStandards.html#include-style +SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init +SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration +#READABILITY=,readability*${SWITCH_OFF} +#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" +TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" +#TIDYFLAGS += -checks='modernize* +# ??? +#TIDYFLAGS = -checks='cert*' -header-filter=.* +# MPI checks ?? +#TIDYFLAGS = -checks='mpi*' +# ?? +#TIDYFLAGS = -checks='performance*' -header-filter=.* +#TIDYFLAGS = -checks='portability-*' -header-filter=.* +#TIDYFLAGS = -checks='readability-*' -header-filter=.* + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +codecheck: tidy_check +tidy_check: + clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES} +# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html + +run: clean ${PROGRAM} +# time ./${PROGRAM} ${PARAMS} + ./${PROGRAM} ${PARAMS} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS} +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS} + +# Simple run time profiling of your code +# CXXFLAGS += -g -pg +# LINKFLAGS += -pg +prof: ${PROGRAM} + perf record ./$^ ${PARAMS} + perf report +# gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +codecheck: tidy_check diff --git a/Sheet5/Ex2_Second_Attempt/GCC_AMD32_default.mk b/Sheet5/Ex2_Second_Attempt/GCC_AMD32_default.mk new file mode 100644 index 0000000..a911b6b --- /dev/null +++ b/Sheet5/Ex2_Second_Attempt/GCC_AMD32_default.mk @@ -0,0 +1,130 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=GCC_ + +CC = gcc +CXX = g++ +F77 = gfortran +LINKER = ${CXX} + +# on mephisto: +#CXXFLAGS += -I/share/apps/atlas/include +#LINKFLAGS += -L/share/apps/atlas/lib +#LINKFLAGS += -lcblas -latlas + +#LINKFLAGS += -lblas +# Der Header muss mit extern "C" versehen werden, damit g++ alles findet. + + +#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow +WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \ + -Wredundant-decls -Winline -fmax-errors=1 +# -Wunreachable-code +# -Wunreachable-code +CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS} +#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS} +#-msse3 +# -ftree-vectorizer-verbose=2 -DNDEBUG +# -ftree-vectorizer-verbose=5 +# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr + +# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details +# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2 +# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +LINKFLAGS += -O3 + +# BLAS, LAPACK +OPENBLAS_DIR = /opt/openblas_GCCseq +#OPENBLAS_DIR = /opt/openblas_GCC +OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib +OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include +CXXFLAGS += -I${OPENBLAS_INCDIR} +LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas + +# interprocedural optimization +CXXFLAGS += -flto +LINKFLAGS += -flto + +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + -@rm -f *_ *~ *.bak *.log *.out *.tar *.orig + -@rm -r html + +run: clean ${PROGRAM} +# time ./${PROGRAM} +# ./${PROGRAM} + ( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} ) +# or 'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}' in your ~/.bashrc + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +# no "-pg" in compile/link options +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ + +# Simple run time profiling of your code +# CXXFLAGS += -g -pg +# LINKFLAGS += -pg +prof: ${PROGRAM} + ./$^ + gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +#Trace your heap: +#> heaptrack ./main.GCC_ +#> heaptrack_gui heaptrack.main.GCC_..gz +heap: ${PROGRAM} + heaptrack ./$^ 11 + heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` & + + + +######################################################################## +# get the detailed status of all optimization flags +info: + echo "detailed status of all optimization flags" + $(CXX) --version + $(CXX) -Q $(CXXFLAGS) --help=optimizers diff --git a/Sheet5/Ex2_Second_Attempt/GCC_default.mk b/Sheet5/Ex2_Second_Attempt/GCC_default.mk new file mode 100644 index 0000000..e9e0a0c --- /dev/null +++ b/Sheet5/Ex2_Second_Attempt/GCC_default.mk @@ -0,0 +1,196 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=GCC_ + +CC = gcc +CXX = g++ +F77 = gfortran +LINKER = ${CXX} + +WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \ + -Wredundant-decls -fmax-errors=1 +# -Wunreachable-code -Winline +CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS} +#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS} +#-msse3 +# -ftree-vectorizer-verbose=2 -DNDEBUG +# -ftree-vectorizer-verbose=5 +# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr + +# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details +# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2 +# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +LINKFLAGS += -O3 + +#architecture +#CPU = -march=znver2 +CXXFLAGS += ${CPU} +LINKFLAGS += ${CPU} + +# different libraries in Ubuntu or manajaró +ifndef UBUNTU +UBUNTU=1 +endif + +# BLAS, LAPACK +ifeq ($(UBUNTU),1) +LINKFLAGS += -llapack -lblas +# -lopenblas +else +# on archlinux +LINKFLAGS += -llapack -lopenblas -lcblas +endif + +# interprocedural optimization +#CXXFLAGS += -flto +#LINKFLAGS += -flto + +# for debugging purpose (save code) +# -fsanitize=leak # only one out the three can be used +# -fsanitize=address +# -fsanitize=thread +SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \ + -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \ + -fsanitize=bool -fsanitize=enum -fsanitize=vptr +#CXXFLAGS += ${SANITARY} +#LINKFLAGS += ${SANITARY} + +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg + + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + -@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt + -@rm -rf html + +run: clean ${PROGRAM} +#run: ${PROGRAM} +# time ./${PROGRAM} ${PARAMS} + ./${PROGRAM} ${PARAMS} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * +#find . -size +10M > large_files +#--exclude-from ${MY_DIR}/large_files + +zip: clean + @echo "Zip the directory: " ${MY_DIR} + @cd .. ;\ + zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} + +doc: + doxygen Doxyfile + +######################################################################### +.SUFFIXES: .f90 + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< +# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log +# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log) + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +.f90.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS} +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +# no "-pg" in compile/link options +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS} +# Graphical interface +# valkyrie + +# Simple run time profiling of your code +CXXFLAGS += -g -pg +LINKFLAGS += -pg +prof: ${PROGRAM} + ./$^ ${PARAMS} + gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +# sudo apt install gprofng-gui +# https://parallel.computer/presentations/PPoPP2023/2023-Ruud-Slides.pdf +# read §3 in https://sourceware.org/binutils/docs/gprofng.html +# /usr/bin/gp-collect-app -o test.1.er -p on -S on /home/ghaase/Lectures/Math2CPP/Codes/seq/jacobi_oo_stl/main.GCC_ +prof2: ${PROGRAM} + gprofng collect app -h auto ./$^ ${PARAMS} +# gprofng display text -functions `ls -1tdr test.*.er |tail -1` + gprofng display text -script gprofng_script2 `ls -1tdr test.*.er |tail -1` +# gprofng display text -script gprofng_script2 test.*.er +# gprofng display gui & + +prof3: ${PROGRAM} + perf record ./$^ ${PARAMS} + perf report +# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/ +# * install +# * sudo vi /etc/sysctl.conf +# add kernel.perf_event_paranoid = 0 + +#Trace your heap: +#> heaptrack ./main.GCC_ +#> heaptrack_gui heaptrack.main.GCC_..gz +heap: ${PROGRAM} + heaptrack ./$^ ${PARAMS} + heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` & + +codecheck: $(SOURCES) + cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^ + + +######################################################################## +# get the detailed status of all optimization flags +info: + echo "detailed status of all optimization flags" + $(CXX) --version + $(CXX) -Q $(CXXFLAGS) --help=optimizers + lscpu + inxi -C + lstopo + +# Excellent hardware info +# hardinfo +# Life monitoring of CPU frequency etc. +# sudo i7z + +# Memory consumption +# vmstat -at -SM 3 +# xfce4-taskmanager + + +# https://www.tecmint.com/check-linux-cpu-information/ +#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/ + +# Debugging: +# https://wiki.archlinux.org/index.php/Debugging diff --git a/Sheet5/Ex2_Second_Attempt/ICC_default.mk b/Sheet5/Ex2_Second_Attempt/ICC_default.mk new file mode 100644 index 0000000..d4bd4db --- /dev/null +++ b/Sheet5/Ex2_Second_Attempt/ICC_default.mk @@ -0,0 +1,137 @@ +# Basic Defintions for using INTEL compiler suite sequentially +# requires setting of COMPILER=ICC_ + +#BINDIR = /opt/intel/bin/ + +# special on my sony [GH] +#BINDIR = /opt/save.intel/bin/ +# very special on my sony [GH] +# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so + +# Error with g++-4.8 using icpc14.0,x: +# find directory wherein bits/c++config.h is located +# 'locate bits/c++config.h' +#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8 + + +CC = ${BINDIR}icc +CXX = ${BINDIR}icpc +F77 = ${BINDIR}ifort +LINKER = ${CXX} + + +WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3 +# -Winline -Wredundant-decls -Wunreachable-code +CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG} +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg +# -vec-report=3 +# -qopt-report=5 -qopt-report-phase=vec +# -guide -parallel +# -guide-opts=string -guide-par[=n] -guide-vec[=n] +# -auto-p32 -simd +CXXFLAGS += -align + +# use MKL by INTEL +# https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html +# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html +# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread +#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl +LINKFLAGS += -O3 -mkl=sequential + +# interprocedural optimization +CXXFLAGS += -ipo +LINKFLAGS += -ipo + +# annotated assembler file +ANNOTED = -fsource-asm -S + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +run: clean ${PROGRAM} + ./${PROGRAM} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# # some tools +# # Cache behaviour (CXXFLAGS += -g tracks down to source lines) +# cache: ${PROGRAM} +# valgrind --tool=callgrind --simulate-cache=yes ./$^ +# # kcachegrind callgrind.out. & +# +# # Check for wrong memory accesses, memory leaks, ... +# # use smaller data sets +# mem: ${PROGRAM} +# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ +# +# # Simple run time profiling of your code +# # CXXFLAGS += -g -pg +# # LINKFLAGS += -pg +# prof: ${PROGRAM} +# ./$^ +# gprof -b ./$^ > gp.out +# # kprof -f gp.out -p gprof & +# + + +mem: inspector +prof: amplifier +cache: amplifier + +gap_par_report: + ${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt + +# GUI for performance report +amplifier: ${PROGRAM} + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +# alternatively to the solution above: + #edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 . + amplxe-gui & + +# GUI for Memory and Thread analyzer (race condition) +inspector: ${PROGRAM} + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + inspxe-gui & + +advisor: + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + advixe-gui & + +icc-info: + icpc -# main.cpp + + + +