This commit is contained in:
Lisa Pizzo 2025-12-17 08:46:11 +01:00
commit 91794eed4a
4 changed files with 586 additions and 0 deletions

View file

@ -0,0 +1,123 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=CLANG_
#CLANGPATH=//usr/lib/llvm-10/bin/
CC = ${CLANGPATH}clang
CXX = ${CLANGPATH}clang++
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
#F77 = gfortran
LINKER = ${CXX}
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
# don't use -Ofast
# -ftrapv
LINKFLAGS += -O3
# different libraries in Ubuntu or manajaró
ifndef UBUNTU
UBUNTU=1
endif
# BLAS, LAPACK
LINKFLAGS += -llapack -lblas
# -lopenblas
ifeq ($(UBUNTU),1)
# ubuntu
else
# on archlinux
LINKFLAGS += -lcblas
endif
# interprocedural optimization
CXXFLAGS += -flto
LINKFLAGS += -flto
# very good check
# http://clang.llvm.org/extra/clang-tidy/
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
#READABILITY=,readability*${SWITCH_OFF}
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
#TIDYFLAGS += -checks='modernize*
# ???
#TIDYFLAGS = -checks='cert*' -header-filter=.*
# MPI checks ??
#TIDYFLAGS = -checks='mpi*'
# ??
#TIDYFLAGS = -checks='performance*' -header-filter=.*
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
codecheck: tidy_check
tidy_check:
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
run: clean ${PROGRAM}
# time ./${PROGRAM} ${PARAMS}
./${PROGRAM} ${PARAMS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^ ${PARAMS}
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
codecheck: tidy_check

View file

@ -0,0 +1,130 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
CC = gcc
CXX = g++
F77 = gfortran
LINKER = ${CXX}
# on mephisto:
#CXXFLAGS += -I/share/apps/atlas/include
#LINKFLAGS += -L/share/apps/atlas/lib
#LINKFLAGS += -lcblas -latlas
#LINKFLAGS += -lblas
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow
WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Winline -fmax-errors=1
# -Wunreachable-code
# -Wunreachable-code
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
#-msse3
# -ftree-vectorizer-verbose=2 -DNDEBUG
# -ftree-vectorizer-verbose=5
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
LINKFLAGS += -O3
# BLAS, LAPACK
OPENBLAS_DIR = /opt/openblas_GCCseq
#OPENBLAS_DIR = /opt/openblas_GCC
OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
CXXFLAGS += -I${OPENBLAS_INCDIR}
LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
# interprocedural optimization
CXXFLAGS += -flto
LINKFLAGS += -flto
# profiling tools
#CXXFLAGS += -pg
#LINKFLAGS += -pg
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
-@rm -r html
run: clean ${PROGRAM}
# time ./${PROGRAM}
# ./${PROGRAM}
( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
# or 'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}' in your ~/.bashrc
# tar the current directory
MY_DIR = `basename ${PWD}`
tar:
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
./$^
gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers

View file

@ -0,0 +1,196 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
CC = gcc
CXX = g++
F77 = gfortran
LINKER = ${CXX}
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -fmax-errors=1
# -Wunreachable-code -Winline
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
#-msse3
# -ftree-vectorizer-verbose=2 -DNDEBUG
# -ftree-vectorizer-verbose=5
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
LINKFLAGS += -O3
#architecture
#CPU = -march=znver2
CXXFLAGS += ${CPU}
LINKFLAGS += ${CPU}
# different libraries in Ubuntu or manajaró
ifndef UBUNTU
UBUNTU=1
endif
# BLAS, LAPACK
ifeq ($(UBUNTU),1)
LINKFLAGS += -llapack -lblas
# -lopenblas
else
# on archlinux
LINKFLAGS += -llapack -lopenblas -lcblas
endif
# interprocedural optimization
#CXXFLAGS += -flto
#LINKFLAGS += -flto
# for debugging purpose (save code)
# -fsanitize=leak # only one out the three can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS += ${SANITARY}
# profiling tools
#CXXFLAGS += -pg
#LINKFLAGS += -pg
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
-@rm -rf html
run: clean ${PROGRAM}
#run: ${PROGRAM}
# time ./${PROGRAM} ${PARAMS}
./${PROGRAM} ${PARAMS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
#find . -size +10M > large_files
#--exclude-from ${MY_DIR}/large_files
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.SUFFIXES: .f90
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
.f90.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
# Graphical interface
# valkyrie
# Simple run time profiling of your code
CXXFLAGS += -g -pg
LINKFLAGS += -pg
prof: ${PROGRAM}
./$^ ${PARAMS}
gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
# sudo apt install gprofng-gui
# https://parallel.computer/presentations/PPoPP2023/2023-Ruud-Slides.pdf
# read §3 in https://sourceware.org/binutils/docs/gprofng.html
# /usr/bin/gp-collect-app -o test.1.er -p on -S on /home/ghaase/Lectures/Math2CPP/Codes/seq/jacobi_oo_stl/main.GCC_
prof2: ${PROGRAM}
gprofng collect app -h auto ./$^ ${PARAMS}
# gprofng display text -functions `ls -1tdr test.*.er |tail -1`
gprofng display text -script gprofng_script2 `ls -1tdr test.*.er |tail -1`
# gprofng display text -script gprofng_script2 test.*.er
# gprofng display gui &
prof3: ${PROGRAM}
perf record ./$^ ${PARAMS}
perf report
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
# * install
# * sudo vi /etc/sysctl.conf
# add kernel.perf_event_paranoid = 0
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ ${PARAMS}
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers
lscpu
inxi -C
lstopo
# Excellent hardware info
# hardinfo
# Life monitoring of CPU frequency etc.
# sudo i7z
# Memory consumption
# vmstat -at -SM 3
# xfce4-taskmanager
# https://www.tecmint.com/check-linux-cpu-information/
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
# Debugging:
# https://wiki.archlinux.org/index.php/Debugging

View file

@ -0,0 +1,137 @@
# Basic Defintions for using INTEL compiler suite sequentially
# requires setting of COMPILER=ICC_
#BINDIR = /opt/intel/bin/
# special on my sony [GH]
#BINDIR = /opt/save.intel/bin/
# very special on my sony [GH]
# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
# Error with g++-4.8 using icpc14.0,x:
# find directory wherein bits/c++config.h is located
# 'locate bits/c++config.h'
#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
CC = ${BINDIR}icc
CXX = ${BINDIR}icpc
F77 = ${BINDIR}ifort
LINKER = ${CXX}
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
# -Winline -Wredundant-decls -Wunreachable-code
CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
# profiling tools
#CXXFLAGS += -pg
#LINKFLAGS += -pg
# -vec-report=3
# -qopt-report=5 -qopt-report-phase=vec
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
CXXFLAGS += -align
# use MKL by INTEL
# https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
LINKFLAGS += -O3 -mkl=sequential
# interprocedural optimization
CXXFLAGS += -ipo
LINKFLAGS += -ipo
# annotated assembler file
ANNOTED = -fsource-asm -S
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: clean ${PROGRAM}
./${PROGRAM}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
# alternatively to the solution above:
#edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 .
amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
inspxe-gui &
advisor:
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
advixe-gui &
icc-info:
icpc -# main.cpp