.
This commit is contained in:
parent
f19b2856e4
commit
0125d25357
3 changed files with 343 additions and 0 deletions
131
Sheet5/CLANG_default.mk
Normal file
131
Sheet5/CLANG_default.mk
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
# Basic Defintions for using GNU-compiler suite sequentially
|
||||||
|
# requires setting of COMPILER=CLANG_
|
||||||
|
# https://llvm.org/docs/CompileCudaWithLLVM.html
|
||||||
|
# https://llvm.org/docs/NVPTXUsage.html
|
||||||
|
|
||||||
|
#CLANGPATH=//usr/lib/llvm-10/bin/
|
||||||
|
CC = ${CLANGPATH}clang
|
||||||
|
CXX = ${CLANGPATH}clang++
|
||||||
|
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
|
||||||
|
#F77 = gfortran
|
||||||
|
LINKER = ${CXX}
|
||||||
|
|
||||||
|
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
|
||||||
|
WARNINGS += -pedantic -Weverything -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion
|
||||||
|
WARNINGS += -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
|
||||||
|
WARNINGS += -Wno-unsafe-buffer-usage
|
||||||
|
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
||||||
|
|
||||||
|
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
|
||||||
|
# don't use -Ofast
|
||||||
|
# -ftrapv
|
||||||
|
LINKFLAGS += -O3
|
||||||
|
|
||||||
|
# different libraries in Ubuntu or manajaró
|
||||||
|
ifndef UBUNTU
|
||||||
|
UBUNTU=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
# BLAS, LAPACK
|
||||||
|
LINKFLAGS += -llapack -lblas
|
||||||
|
# -lopenblas
|
||||||
|
ifeq ($(UBUNTU),1)
|
||||||
|
# ubuntu
|
||||||
|
else
|
||||||
|
# on archlinux
|
||||||
|
LINKFLAGS += -lcblas
|
||||||
|
endif
|
||||||
|
|
||||||
|
# interprocedural optimization
|
||||||
|
CXXFLAGS += -flto
|
||||||
|
LINKFLAGS += -flto
|
||||||
|
|
||||||
|
#sudo apt install libomp-dev
|
||||||
|
# OpenMP
|
||||||
|
CXXFLAGS += -fopenmp
|
||||||
|
LINKFLAGS += -fopenmp
|
||||||
|
|
||||||
|
# very good check
|
||||||
|
# http://clang.llvm.org/extra/clang-tidy/
|
||||||
|
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
|
||||||
|
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
|
||||||
|
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
|
||||||
|
#READABILITY=,readability*${SWITCH_OFF}
|
||||||
|
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||||
|
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||||
|
#TIDYFLAGS += -checks='modernize*
|
||||||
|
# ???
|
||||||
|
#TIDYFLAGS = -checks='cert*' -header-filter=.*
|
||||||
|
# MPI checks ??
|
||||||
|
#TIDYFLAGS = -checks='mpi*'
|
||||||
|
# ??
|
||||||
|
#TIDYFLAGS = -checks='performance*' -header-filter=.*
|
||||||
|
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
|
||||||
|
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
|
||||||
|
|
||||||
|
default: ${PROGRAM}
|
||||||
|
|
||||||
|
${PROGRAM}: ${OBJECTS}
|
||||||
|
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@rm -f ${PROGRAM} ${OBJECTS}
|
||||||
|
|
||||||
|
clean_all:: clean
|
||||||
|
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||||
|
|
||||||
|
codecheck: tidy_check
|
||||||
|
tidy_check:
|
||||||
|
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
|
||||||
|
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
|
||||||
|
|
||||||
|
run: clean ${PROGRAM}
|
||||||
|
# time ./${PROGRAM} ${PARAMS}
|
||||||
|
./${PROGRAM} ${PARAMS}
|
||||||
|
|
||||||
|
# tar the current directory
|
||||||
|
MY_DIR = `basename ${PWD}`
|
||||||
|
tar: clean_all
|
||||||
|
@echo "Tar the directory: " ${MY_DIR}
|
||||||
|
@cd .. ;\
|
||||||
|
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||||
|
cd ${MY_DIR}
|
||||||
|
# tar cf `basename ${PWD}`.tar *
|
||||||
|
|
||||||
|
doc:
|
||||||
|
doxygen Doxyfile
|
||||||
|
|
||||||
|
#########################################################################
|
||||||
|
|
||||||
|
.cpp.o:
|
||||||
|
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
.c.o:
|
||||||
|
$(CC) -c $(CFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
.f.o:
|
||||||
|
$(F77) -c $(FFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
##################################################################################################
|
||||||
|
# some tools
|
||||||
|
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||||
|
cache: ${PROGRAM}
|
||||||
|
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
||||||
|
# kcachegrind callgrind.out.<pid> &
|
||||||
|
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||||
|
|
||||||
|
# Check for wrong memory accesses, memory leaks, ...
|
||||||
|
# use smaller data sets
|
||||||
|
mem: ${PROGRAM}
|
||||||
|
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
||||||
|
|
||||||
|
# Simple run time profiling of your code
|
||||||
|
# CXXFLAGS += -g -pg
|
||||||
|
# LINKFLAGS += -pg
|
||||||
|
prof: ${PROGRAM}
|
||||||
|
perf record ./$^ ${PARAMS}
|
||||||
|
perf report
|
||||||
|
# gprof -b ./$^ > gp.out
|
||||||
|
# kprof -f gp.out -p gprof &
|
||||||
|
|
||||||
|
codecheck: tidy_check
|
||||||
182
Sheet5/GCC_default.mk
Normal file
182
Sheet5/GCC_default.mk
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
# Basic Defintions for using GNU-compiler suite sequentially
|
||||||
|
# requires setting of COMPILER=GCC_
|
||||||
|
|
||||||
|
CC = gcc
|
||||||
|
CXX = g++
|
||||||
|
F77 = gfortran
|
||||||
|
LINKER = ${CXX}
|
||||||
|
|
||||||
|
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||||
|
-Wredundant-decls -Winline -fmax-errors=1
|
||||||
|
# -Wunreachable-code
|
||||||
|
CXXFLAGS += -ffast-math -O3 -march=native -std=c++20 ${WARNINGS}
|
||||||
|
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
||||||
|
#-msse3
|
||||||
|
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
||||||
|
# -ftree-vectorizer-verbose=5
|
||||||
|
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
||||||
|
|
||||||
|
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||||
|
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||||
|
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||||
|
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||||
|
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||||
|
LINKFLAGS += -O3
|
||||||
|
|
||||||
|
#architecture
|
||||||
|
#CPU = -march=znver2
|
||||||
|
CXXFLAGS += ${CPU}
|
||||||
|
LINKFLAGS += ${CPU}
|
||||||
|
|
||||||
|
# different libraries in Ubuntu or manajaró
|
||||||
|
ifndef UBUNTU
|
||||||
|
UBUNTU=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
# BLAS, LAPACK
|
||||||
|
ifeq ($(UBUNTU),1)
|
||||||
|
LINKFLAGS += -llapack -lblas
|
||||||
|
# -lopenblas
|
||||||
|
else
|
||||||
|
# on archlinux
|
||||||
|
LINKFLAGS += -llapack -lopenblas -lcblas
|
||||||
|
endif
|
||||||
|
|
||||||
|
# interprocedural optimization
|
||||||
|
CXXFLAGS += -flto
|
||||||
|
LINKFLAGS += -flto
|
||||||
|
|
||||||
|
# for debugging purpose (save code)
|
||||||
|
# -fsanitize=leak # only one out the three can be used
|
||||||
|
# -fsanitize=address
|
||||||
|
# -fsanitize=thread
|
||||||
|
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||||
|
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||||
|
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||||
|
#CXXFLAGS += ${SANITARY}
|
||||||
|
#LINKFLAGS += ${SANITARY}
|
||||||
|
|
||||||
|
# OpenMP
|
||||||
|
CXXFLAGS += -fopenmp
|
||||||
|
LINKFLAGS += -fopenmp
|
||||||
|
|
||||||
|
default: ${PROGRAM}
|
||||||
|
|
||||||
|
${PROGRAM}: ${OBJECTS}
|
||||||
|
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@rm -f ${PROGRAM} ${OBJECTS}
|
||||||
|
|
||||||
|
clean_all:: clean
|
||||||
|
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
|
||||||
|
-@rm -rf html
|
||||||
|
|
||||||
|
run: clean ${PROGRAM}
|
||||||
|
#run: ${PROGRAM}
|
||||||
|
# time ./${PROGRAM} ${PARAMS}
|
||||||
|
./${PROGRAM} ${PARAMS}
|
||||||
|
|
||||||
|
# tar the current directory
|
||||||
|
MY_DIR = `basename ${PWD}`
|
||||||
|
tar: clean_all
|
||||||
|
@echo "Tar the directory: " ${MY_DIR}
|
||||||
|
@cd .. ;\
|
||||||
|
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||||
|
cd ${MY_DIR}
|
||||||
|
# tar cf `basename ${PWD}`.tar *
|
||||||
|
#find . -size +10M > large_files
|
||||||
|
#--exclude-from ${MY_DIR}/large_files
|
||||||
|
|
||||||
|
zip: clean
|
||||||
|
@echo "Zip the directory: " ${MY_DIR}
|
||||||
|
@cd .. ;\
|
||||||
|
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||||
|
cd ${MY_DIR}
|
||||||
|
|
||||||
|
doc:
|
||||||
|
doxygen Doxyfile
|
||||||
|
|
||||||
|
#########################################################################
|
||||||
|
.SUFFIXES: .f90
|
||||||
|
|
||||||
|
.cpp.o:
|
||||||
|
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||||
|
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
|
||||||
|
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
|
||||||
|
|
||||||
|
.c.o:
|
||||||
|
$(CC) -c $(CFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
.f.o:
|
||||||
|
$(F77) -c $(FFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
.f90.o:
|
||||||
|
$(F77) -c $(FFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
##################################################################################################
|
||||||
|
# some tools
|
||||||
|
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||||
|
cache: ${PROGRAM}
|
||||||
|
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
||||||
|
# kcachegrind callgrind.out.<pid> &
|
||||||
|
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||||
|
|
||||||
|
# Check for wrong memory accesses, memory leaks, ...
|
||||||
|
# use smaller data sets
|
||||||
|
# no "-pg" in compile/link options
|
||||||
|
mem: ${PROGRAM}
|
||||||
|
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
||||||
|
# Graphical interface
|
||||||
|
# valkyrie
|
||||||
|
|
||||||
|
# Simple run time profiling of your code
|
||||||
|
# CXXFLAGS += -g -pg
|
||||||
|
# LINKFLAGS += -pg
|
||||||
|
prof: ${PROGRAM}
|
||||||
|
perf record ./$^ ${PARAMS}
|
||||||
|
perf report
|
||||||
|
# gprof -b ./$^ > gp.out
|
||||||
|
# kprof -f gp.out -p gprof &
|
||||||
|
|
||||||
|
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
|
||||||
|
# * install
|
||||||
|
# * sudo vi /etc/sysctl.conf
|
||||||
|
# add kernel.perf_event_paranoid = 0
|
||||||
|
|
||||||
|
#Trace your heap:
|
||||||
|
#> heaptrack ./main.GCC_
|
||||||
|
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||||
|
heap: ${PROGRAM}
|
||||||
|
heaptrack ./$^ ${PARAMS} 11
|
||||||
|
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||||
|
|
||||||
|
codecheck: $(SOURCES)
|
||||||
|
cppcheck --enable=all --inconclusive --std=c++17 -I${CUDA_INC} --suppress=missingIncludeSystem $^
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
# get the detailed status of all optimization flags
|
||||||
|
info:
|
||||||
|
echo "detailed status of all optimization flags"
|
||||||
|
$(CXX) --version
|
||||||
|
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||||
|
lscpu
|
||||||
|
inxi -C
|
||||||
|
lstopo
|
||||||
|
|
||||||
|
# Excellent hardware info
|
||||||
|
# hardinfo
|
||||||
|
# Life monitoring of CPU frequency etc.
|
||||||
|
# sudo i7z
|
||||||
|
|
||||||
|
# Memory consumption
|
||||||
|
# vmstat -at -SM 3
|
||||||
|
# xfce4-taskmanager
|
||||||
|
|
||||||
|
|
||||||
|
# https://www.tecmint.com/check-linux-cpu-information/
|
||||||
|
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
|
||||||
|
|
||||||
|
# Debugging:
|
||||||
|
# https://wiki.archlinux.org/index.php/Debugging
|
||||||
30
Sheet5/Makefile
Normal file
30
Sheet5/Makefile
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
#
|
||||||
|
# use GNU-Compiler tools
|
||||||
|
COMPILER=GCC_
|
||||||
|
# alternatively from the shell
|
||||||
|
# export COMPILER=GCC_
|
||||||
|
# or, alternatively from the shell
|
||||||
|
# make COMPILER=GCC_
|
||||||
|
|
||||||
|
# use Intel compilers
|
||||||
|
#COMPILER=ICC_
|
||||||
|
|
||||||
|
# use PGI compilers
|
||||||
|
# COMPILER=PGI_
|
||||||
|
EX=Ex1
|
||||||
|
|
||||||
|
SOURCES = main${EX}.cpp mylib.cpp bench_funcs.cpp
|
||||||
|
OBJECTS = $(SOURCES:.cpp=.o)
|
||||||
|
|
||||||
|
PROGRAM = main.${COMPILER}
|
||||||
|
|
||||||
|
# uncomment the next to lines for debugging and detailed performance analysis
|
||||||
|
CXXFLAGS += -g
|
||||||
|
LINKFLAGS += -g
|
||||||
|
# do not use -pg with PGI compilers
|
||||||
|
|
||||||
|
ifndef COMPILER
|
||||||
|
COMPILER=GCC_
|
||||||
|
endif
|
||||||
|
|
||||||
|
include ${COMPILER}default.mk
|
||||||
Loading…
Add table
Add a link
Reference in a new issue