many changes
This commit is contained in:
parent
3e9f2d5053
commit
7e2626266e
35 changed files with 276 additions and 6417 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,3 +1,4 @@
|
||||||
*.o
|
*.o
|
||||||
*.GCC_
|
*.GCC_
|
||||||
|
**/.vscode/
|
||||||
ex1/ABCEFG/skalar_stl/out_1.txt
|
ex1/ABCEFG/skalar_stl/out_1.txt
|
||||||
64
ex1/ABCEFG/.vscode/settings.json
vendored
64
ex1/ABCEFG/.vscode/settings.json
vendored
|
|
@ -1,64 +0,0 @@
|
||||||
{
|
|
||||||
"files.associations": {
|
|
||||||
"algorithm": "cpp",
|
|
||||||
"format": "cpp",
|
|
||||||
"iostream": "cpp",
|
|
||||||
"ostream": "cpp",
|
|
||||||
"array": "cpp",
|
|
||||||
"atomic": "cpp",
|
|
||||||
"bit": "cpp",
|
|
||||||
"cctype": "cpp",
|
|
||||||
"charconv": "cpp",
|
|
||||||
"chrono": "cpp",
|
|
||||||
"clocale": "cpp",
|
|
||||||
"cmath": "cpp",
|
|
||||||
"compare": "cpp",
|
|
||||||
"concepts": "cpp",
|
|
||||||
"cstdarg": "cpp",
|
|
||||||
"cstddef": "cpp",
|
|
||||||
"cstdint": "cpp",
|
|
||||||
"cstdio": "cpp",
|
|
||||||
"cstdlib": "cpp",
|
|
||||||
"cstring": "cpp",
|
|
||||||
"ctime": "cpp",
|
|
||||||
"cwchar": "cpp",
|
|
||||||
"cwctype": "cpp",
|
|
||||||
"deque": "cpp",
|
|
||||||
"list": "cpp",
|
|
||||||
"string": "cpp",
|
|
||||||
"unordered_map": "cpp",
|
|
||||||
"vector": "cpp",
|
|
||||||
"exception": "cpp",
|
|
||||||
"functional": "cpp",
|
|
||||||
"iterator": "cpp",
|
|
||||||
"memory": "cpp",
|
|
||||||
"memory_resource": "cpp",
|
|
||||||
"numeric": "cpp",
|
|
||||||
"optional": "cpp",
|
|
||||||
"random": "cpp",
|
|
||||||
"ratio": "cpp",
|
|
||||||
"string_view": "cpp",
|
|
||||||
"system_error": "cpp",
|
|
||||||
"tuple": "cpp",
|
|
||||||
"type_traits": "cpp",
|
|
||||||
"utility": "cpp",
|
|
||||||
"fstream": "cpp",
|
|
||||||
"initializer_list": "cpp",
|
|
||||||
"iomanip": "cpp",
|
|
||||||
"iosfwd": "cpp",
|
|
||||||
"istream": "cpp",
|
|
||||||
"limits": "cpp",
|
|
||||||
"new": "cpp",
|
|
||||||
"numbers": "cpp",
|
|
||||||
"span": "cpp",
|
|
||||||
"sstream": "cpp",
|
|
||||||
"stdexcept": "cpp",
|
|
||||||
"streambuf": "cpp",
|
|
||||||
"cinttypes": "cpp",
|
|
||||||
"typeinfo": "cpp",
|
|
||||||
"variant": "cpp",
|
|
||||||
"thread": "cpp",
|
|
||||||
"semaphore": "cpp",
|
|
||||||
"stop_token": "cpp"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,123 +0,0 @@
|
||||||
# Basic Defintions for using GNU-compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=CLANG_
|
|
||||||
|
|
||||||
#CLANGPATH=//usr/lib/llvm-10/bin/
|
|
||||||
CC = ${CLANGPATH}clang
|
|
||||||
CXX = ${CLANGPATH}clang++
|
|
||||||
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
|
|
||||||
#F77 = gfortran
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
|
|
||||||
WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
|
|
||||||
WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
|
||||||
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
|
||||||
|
|
||||||
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
|
|
||||||
# don't use -Ofast
|
|
||||||
# -ftrapv
|
|
||||||
LINKFLAGS += -O3
|
|
||||||
|
|
||||||
# different libraries in Ubuntu or manajaró
|
|
||||||
ifndef UBUNTU
|
|
||||||
UBUNTU=1
|
|
||||||
endif
|
|
||||||
|
|
||||||
# BLAS, LAPACK
|
|
||||||
LINKFLAGS += -llapack -lblas
|
|
||||||
# -lopenblas
|
|
||||||
ifeq ($(UBUNTU),1)
|
|
||||||
# ubuntu
|
|
||||||
else
|
|
||||||
# on archlinux
|
|
||||||
LINKFLAGS += -lcblas
|
|
||||||
endif
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -flto
|
|
||||||
LINKFLAGS += -flto
|
|
||||||
|
|
||||||
# very good check
|
|
||||||
# http://clang.llvm.org/extra/clang-tidy/
|
|
||||||
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
|
|
||||||
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
|
|
||||||
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
|
|
||||||
#READABILITY=,readability*${SWITCH_OFF}
|
|
||||||
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
|
||||||
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
|
||||||
#TIDYFLAGS += -checks='modernize*
|
|
||||||
# ???
|
|
||||||
#TIDYFLAGS = -checks='cert*' -header-filter=.*
|
|
||||||
# MPI checks ??
|
|
||||||
#TIDYFLAGS = -checks='mpi*'
|
|
||||||
# ??
|
|
||||||
#TIDYFLAGS = -checks='performance*' -header-filter=.*
|
|
||||||
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
|
|
||||||
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
||||||
|
|
||||||
codecheck: tidy_check
|
|
||||||
tidy_check:
|
|
||||||
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
|
|
||||||
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
# time ./${PROGRAM} ${PARAMS}
|
|
||||||
./${PROGRAM} ${PARAMS}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# some tools
|
|
||||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
|
||||||
cache: ${PROGRAM}
|
|
||||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
|
||||||
# kcachegrind callgrind.out.<pid> &
|
|
||||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
|
||||||
|
|
||||||
# Check for wrong memory accesses, memory leaks, ...
|
|
||||||
# use smaller data sets
|
|
||||||
mem: ${PROGRAM}
|
|
||||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
|
||||||
|
|
||||||
# Simple run time profiling of your code
|
|
||||||
# CXXFLAGS += -g -pg
|
|
||||||
# LINKFLAGS += -pg
|
|
||||||
prof: ${PROGRAM}
|
|
||||||
perf record ./$^ ${PARAMS}
|
|
||||||
perf report
|
|
||||||
# gprof -b ./$^ > gp.out
|
|
||||||
# kprof -f gp.out -p gprof &
|
|
||||||
|
|
||||||
codecheck: tidy_check
|
|
||||||
|
|
@ -1,130 +0,0 @@
|
||||||
# Basic Defintions for using GNU-compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=GCC_
|
|
||||||
|
|
||||||
CC = gcc
|
|
||||||
CXX = g++
|
|
||||||
F77 = gfortran
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
# on mephisto:
|
|
||||||
#CXXFLAGS += -I/share/apps/atlas/include
|
|
||||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
|
||||||
#LINKFLAGS += -lcblas -latlas
|
|
||||||
|
|
||||||
#LINKFLAGS += -lblas
|
|
||||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
|
||||||
|
|
||||||
|
|
||||||
#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow
|
|
||||||
WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
|
||||||
-Wredundant-decls -Winline -fmax-errors=1
|
|
||||||
# -Wunreachable-code
|
|
||||||
# -Wunreachable-code
|
|
||||||
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
|
|
||||||
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
|
||||||
#-msse3
|
|
||||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
|
||||||
# -ftree-vectorizer-verbose=5
|
|
||||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
|
||||||
|
|
||||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
|
||||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
|
||||||
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
LINKFLAGS += -O3
|
|
||||||
|
|
||||||
# BLAS, LAPACK
|
|
||||||
OPENBLAS_DIR = /opt/openblas_GCCseq
|
|
||||||
#OPENBLAS_DIR = /opt/openblas_GCC
|
|
||||||
OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
|
|
||||||
OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
|
|
||||||
CXXFLAGS += -I${OPENBLAS_INCDIR}
|
|
||||||
LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -flto
|
|
||||||
LINKFLAGS += -flto
|
|
||||||
|
|
||||||
# profiling tools
|
|
||||||
#CXXFLAGS += -pg
|
|
||||||
#LINKFLAGS += -pg
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
|
||||||
-@rm -r html
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
# time ./${PROGRAM}
|
|
||||||
# ./${PROGRAM}
|
|
||||||
( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
|
|
||||||
# or 'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}' in your ~/.bashrc
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar:
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# some tools
|
|
||||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
|
||||||
cache: ${PROGRAM}
|
|
||||||
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
|
||||||
# kcachegrind callgrind.out.<pid> &
|
|
||||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
|
||||||
|
|
||||||
# Check for wrong memory accesses, memory leaks, ...
|
|
||||||
# use smaller data sets
|
|
||||||
# no "-pg" in compile/link options
|
|
||||||
mem: ${PROGRAM}
|
|
||||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
|
||||||
|
|
||||||
# Simple run time profiling of your code
|
|
||||||
# CXXFLAGS += -g -pg
|
|
||||||
# LINKFLAGS += -pg
|
|
||||||
prof: ${PROGRAM}
|
|
||||||
./$^
|
|
||||||
gprof -b ./$^ > gp.out
|
|
||||||
# kprof -f gp.out -p gprof &
|
|
||||||
|
|
||||||
#Trace your heap:
|
|
||||||
#> heaptrack ./main.GCC_
|
|
||||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
|
||||||
heap: ${PROGRAM}
|
|
||||||
heaptrack ./$^ 11
|
|
||||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
# get the detailed status of all optimization flags
|
|
||||||
info:
|
|
||||||
echo "detailed status of all optimization flags"
|
|
||||||
$(CXX) --version
|
|
||||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
|
||||||
|
|
@ -1,183 +0,0 @@
|
||||||
# Basic Defintions for using GNU-compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=GCC_
|
|
||||||
|
|
||||||
CC = gcc
|
|
||||||
CXX = g++
|
|
||||||
F77 = gfortran
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
|
||||||
-Wredundant-decls
|
|
||||||
# -Wunreachable-code
|
|
||||||
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
|
|
||||||
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
|
||||||
#-msse3
|
|
||||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
|
||||||
# -ftree-vectorizer-verbose=5
|
|
||||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
|
||||||
|
|
||||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
|
||||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
|
||||||
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
LINKFLAGS += -O3
|
|
||||||
|
|
||||||
#architecture
|
|
||||||
#CPU = -march=znver2
|
|
||||||
CXXFLAGS += ${CPU}
|
|
||||||
LINKFLAGS += ${CPU}
|
|
||||||
|
|
||||||
# different libraries in Ubuntu or manajaró
|
|
||||||
ifndef UBUNTU
|
|
||||||
UBUNTU=1
|
|
||||||
endif
|
|
||||||
|
|
||||||
# BLAS, LAPACK
|
|
||||||
ifeq ($(UBUNTU),1)
|
|
||||||
LINKFLAGS += -llapack -lblas
|
|
||||||
# -lopenblas
|
|
||||||
else
|
|
||||||
# on archlinux
|
|
||||||
LINKFLAGS += -llapack -lopenblas -lcblas
|
|
||||||
endif
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -flto
|
|
||||||
LINKFLAGS += -flto
|
|
||||||
|
|
||||||
# for debugging purpose (save code)
|
|
||||||
# -fsanitize=leak # only one out the three can be used
|
|
||||||
# -fsanitize=address
|
|
||||||
# -fsanitize=thread
|
|
||||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
|
||||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
|
||||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
|
||||||
#CXXFLAGS += ${SANITARY}
|
|
||||||
#LINKFLAGS += ${SANITARY}
|
|
||||||
|
|
||||||
# profiling tools
|
|
||||||
#CXXFLAGS += -pg
|
|
||||||
#LINKFLAGS += -pg
|
|
||||||
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
|
|
||||||
-@rm -rf html
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
#run: ${PROGRAM}
|
|
||||||
# time ./${PROGRAM} ${PARAMS}
|
|
||||||
./${PROGRAM} ${PARAMS}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
#find . -size +10M > large_files
|
|
||||||
#--exclude-from ${MY_DIR}/large_files
|
|
||||||
|
|
||||||
zip: clean
|
|
||||||
@echo "Zip the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
.SUFFIXES: .f90
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
|
|
||||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f90.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# some tools
|
|
||||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
|
||||||
cache: ${PROGRAM}
|
|
||||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
|
||||||
# kcachegrind callgrind.out.<pid> &
|
|
||||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
|
||||||
|
|
||||||
# Check for wrong memory accesses, memory leaks, ...
|
|
||||||
# use smaller data sets
|
|
||||||
# no "-pg" in compile/link options
|
|
||||||
mem: ${PROGRAM}
|
|
||||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
|
||||||
# Graphical interface
|
|
||||||
# valkyrie
|
|
||||||
|
|
||||||
# Simple run time profiling of your code
|
|
||||||
# CXXFLAGS += -g -pg
|
|
||||||
# LINKFLAGS += -pg
|
|
||||||
prof: ${PROGRAM}
|
|
||||||
perf record ./$^ ${PARAMS}
|
|
||||||
perf report
|
|
||||||
# gprof -b ./$^ > gp.out
|
|
||||||
# kprof -f gp.out -p gprof &
|
|
||||||
|
|
||||||
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
|
|
||||||
# * install
|
|
||||||
# * sudo vi /etc/sysctl.conf
|
|
||||||
# add kernel.perf_event_paranoid = 0
|
|
||||||
|
|
||||||
#Trace your heap:
|
|
||||||
#> heaptrack ./main.GCC_
|
|
||||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
|
||||||
heap: ${PROGRAM}
|
|
||||||
heaptrack ./$^ ${PARAMS}
|
|
||||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
|
||||||
|
|
||||||
codecheck: $(SOURCES)
|
|
||||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
|
||||||
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
# get the detailed status of all optimization flags
|
|
||||||
info:
|
|
||||||
echo "detailed status of all optimization flags"
|
|
||||||
$(CXX) --version
|
|
||||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
|
||||||
lscpu
|
|
||||||
inxi -C
|
|
||||||
lstopo
|
|
||||||
|
|
||||||
# Excellent hardware info
|
|
||||||
# hardinfo
|
|
||||||
# Life monitoring of CPU frequency etc.
|
|
||||||
# sudo i7z
|
|
||||||
|
|
||||||
# Memory consumption
|
|
||||||
# vmstat -at -SM 3
|
|
||||||
# xfce4-taskmanager
|
|
||||||
|
|
||||||
|
|
||||||
# https://www.tecmint.com/check-linux-cpu-information/
|
|
||||||
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
|
|
||||||
|
|
||||||
# Debugging:
|
|
||||||
# https://wiki.archlinux.org/index.php/Debugging
|
|
||||||
|
|
@ -1,137 +0,0 @@
|
||||||
# Basic Defintions for using INTEL compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=ICC_
|
|
||||||
|
|
||||||
#BINDIR = /opt/intel/bin/
|
|
||||||
|
|
||||||
# special on my sony [GH]
|
|
||||||
#BINDIR = /opt/save.intel/bin/
|
|
||||||
# very special on my sony [GH]
|
|
||||||
# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
|
|
||||||
|
|
||||||
# Error with g++-4.8 using icpc14.0,x:
|
|
||||||
# find directory wherein bits/c++config.h is located
|
|
||||||
# 'locate bits/c++config.h'
|
|
||||||
#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
|
|
||||||
|
|
||||||
|
|
||||||
CC = ${BINDIR}icc
|
|
||||||
CXX = ${BINDIR}icpc
|
|
||||||
F77 = ${BINDIR}ifort
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
|
|
||||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
|
|
||||||
# -Winline -Wredundant-decls -Wunreachable-code
|
|
||||||
CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
|
|
||||||
# profiling tools
|
|
||||||
#CXXFLAGS += -pg
|
|
||||||
#LINKFLAGS += -pg
|
|
||||||
# -vec-report=3
|
|
||||||
# -qopt-report=5 -qopt-report-phase=vec
|
|
||||||
# -guide -parallel
|
|
||||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
|
||||||
# -auto-p32 -simd
|
|
||||||
CXXFLAGS += -align
|
|
||||||
|
|
||||||
# use MKL by INTEL
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
|
|
||||||
# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
|
||||||
#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
|
|
||||||
LINKFLAGS += -O3 -mkl=sequential
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -ipo
|
|
||||||
LINKFLAGS += -ipo
|
|
||||||
|
|
||||||
# annotated assembler file
|
|
||||||
ANNOTED = -fsource-asm -S
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
./${PROGRAM}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# # some tools
|
|
||||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
|
||||||
# cache: ${PROGRAM}
|
|
||||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
|
||||||
# # kcachegrind callgrind.out.<pid> &
|
|
||||||
#
|
|
||||||
# # Check for wrong memory accesses, memory leaks, ...
|
|
||||||
# # use smaller data sets
|
|
||||||
# mem: ${PROGRAM}
|
|
||||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
|
||||||
#
|
|
||||||
# # Simple run time profiling of your code
|
|
||||||
# # CXXFLAGS += -g -pg
|
|
||||||
# # LINKFLAGS += -pg
|
|
||||||
# prof: ${PROGRAM}
|
|
||||||
# ./$^
|
|
||||||
# gprof -b ./$^ > gp.out
|
|
||||||
# # kprof -f gp.out -p gprof &
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
mem: inspector
|
|
||||||
prof: amplifier
|
|
||||||
cache: amplifier
|
|
||||||
|
|
||||||
gap_par_report:
|
|
||||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
|
||||||
|
|
||||||
# GUI for performance report
|
|
||||||
amplifier: ${PROGRAM}
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
# alternatively to the solution above:
|
|
||||||
#edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 .
|
|
||||||
amplxe-gui &
|
|
||||||
|
|
||||||
# GUI for Memory and Thread analyzer (race condition)
|
|
||||||
inspector: ${PROGRAM}
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
inspxe-gui &
|
|
||||||
|
|
||||||
advisor:
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
advixe-gui &
|
|
||||||
|
|
||||||
icc-info:
|
|
||||||
icpc -# main.cpp
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,176 +0,0 @@
|
||||||
# Basic Defintions for using INTEL compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=ONEAPI_
|
|
||||||
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
|
||||||
# requires
|
|
||||||
# source /opt/intel/oneapi/setvars.sh
|
|
||||||
# on AMD: export MKL_DEBUG_CPU_TYPE=5
|
|
||||||
|
|
||||||
#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
|
|
||||||
#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
|
|
||||||
#export KMP_AFFINITY=verbose,compact
|
|
||||||
|
|
||||||
CC = ${BINDIR}icc
|
|
||||||
CXX = ${BINDIR}dpcpp
|
|
||||||
F77 = ${BINDIR}ifort
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
## Compiler flags
|
|
||||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
|
|
||||||
WARNINGS += -Wpessimizing-move -Wredundant-move
|
|
||||||
#-wd2015,2012,2014 -wn3
|
|
||||||
# -Winline -Wredundant-decls -Wunreachable-code
|
|
||||||
# -qopt-subscript-in-range
|
|
||||||
# -vec-threshold0
|
|
||||||
|
|
||||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
|
||||||
#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include"
|
|
||||||
#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include"
|
|
||||||
LINKFLAGS += -O3
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -ipo
|
|
||||||
LINKFLAGS += -ipo
|
|
||||||
LINKFLAGS += -flto
|
|
||||||
|
|
||||||
# annotated Assembler file
|
|
||||||
ANNOTED = -fsource-asm -S
|
|
||||||
|
|
||||||
#architecture
|
|
||||||
CPU = -march=core-avx2
|
|
||||||
#CPU += -mtp=zen
|
|
||||||
# -xCORE-AVX2
|
|
||||||
# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
|
|
||||||
CXXFLAGS += ${CPU}
|
|
||||||
LINKFLAGS += ${CPU}
|
|
||||||
|
|
||||||
# use MKL by INTEL
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
|
||||||
# sequential MKL
|
|
||||||
# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
|
|
||||||
CXXFLAGS += -qmkl=sequential -UMKL_ILP64
|
|
||||||
LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
|
||||||
#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
|
||||||
|
|
||||||
# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static
|
|
||||||
# install intel-oneapi-compiler-static
|
|
||||||
# or
|
|
||||||
LINKFLAGS += -shared-intel
|
|
||||||
|
|
||||||
|
|
||||||
OPENMP = -qopenmp
|
|
||||||
CXXFLAGS += ${OPENMP}
|
|
||||||
LINKFLAGS += ${OPENMP}
|
|
||||||
|
|
||||||
|
|
||||||
# profiling tools
|
|
||||||
#CXXFLAGS += -pg
|
|
||||||
#LINKFLAGS += -pg
|
|
||||||
# -vec-report=3
|
|
||||||
# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
|
|
||||||
# -guide -parallel
|
|
||||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
|
||||||
# -auto-p32 -simd
|
|
||||||
|
|
||||||
# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
|
|
||||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par
|
|
||||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg
|
|
||||||
# Redirect report from *.optrpt to stderr
|
|
||||||
# -qopt-report-file=stderr
|
|
||||||
# Guided paralellization
|
|
||||||
# -guide -parallel
|
|
||||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
|
||||||
# -auto-p32 -simd
|
|
||||||
|
|
||||||
## run time checks
|
|
||||||
# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
|
|
||||||
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f ${PROGRAM} ${OBJECTS} *.optrpt
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
./${PROGRAM}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# some tools
|
|
||||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
|
|
||||||
|
|
||||||
mem: inspector
|
|
||||||
prof: vtune
|
|
||||||
cache: inspector
|
|
||||||
|
|
||||||
gap_par_report:
|
|
||||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
|
||||||
|
|
||||||
# GUI for performance report
|
|
||||||
amplifier: ${PROGRAM}
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
|
||||||
amplxe-gui &
|
|
||||||
|
|
||||||
# GUI for Memory and Thread analyzer (race condition)
|
|
||||||
inspector: ${PROGRAM}
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
# inspxe-gui &
|
|
||||||
vtune-gui ./${PROGRAM} &
|
|
||||||
|
|
||||||
advisor:
|
|
||||||
source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
|
|
||||||
# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
|
|
||||||
advisor --collect=survey ./${PROGRAM}
|
|
||||||
# advisor --collect=roofline ./${PROGRAM}
|
|
||||||
advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
|
|
||||||
|
|
||||||
vtune:
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
|
|
||||||
export ADVIXE_EXPERIMENTAL=roofline
|
|
||||||
vtune -collect hotspots ./${PROGRAM}
|
|
||||||
vtune -report hotspots -r r000hs > vtune.out
|
|
||||||
# vtune-gui ./${PROGRAM} &
|
|
||||||
|
|
||||||
icc-info:
|
|
||||||
icpc -# main.cpp
|
|
||||||
|
|
||||||
# MKL on AMD
|
|
||||||
# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
|
|
||||||
#
|
|
||||||
# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
|
|
||||||
# export MKL_DEBUG_CPU_TYPE=5
|
|
||||||
# export MKL_NUM_THRAEDS=1
|
|
||||||
# export MKL_DYNAMIC=false
|
|
||||||
# on Intel compiler
|
|
||||||
# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
|
|
||||||
|
|
@ -1,93 +0,0 @@
|
||||||
# Basic Defintions for using PGI-compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=PGI_
|
|
||||||
# OPTIRUN = optirun
|
|
||||||
|
|
||||||
|
|
||||||
CC = pgcc
|
|
||||||
CXX = pgc++
|
|
||||||
F77 = pgfortran
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
# on mephisto:
|
|
||||||
#CXXFLAGS += -I/share/apps/atlas/include
|
|
||||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
|
||||||
#LINKFLAGS += -lcblas -latlas
|
|
||||||
|
|
||||||
#LINKFLAGS += -lblas
|
|
||||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
|
||||||
|
|
||||||
WARNINGS = -Minform=warn
|
|
||||||
# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow -Wredundant-decls
|
|
||||||
# -pedantic -Wunreachable-code -Wextra -Winline
|
|
||||||
# -Wunreachable-code
|
|
||||||
|
|
||||||
#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
|
|
||||||
PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
|
|
||||||
# -Minfo
|
|
||||||
# -Mprof=time
|
|
||||||
# -Mprof=lines
|
|
||||||
# take care with option -Msafeptr
|
|
||||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
|
||||||
#CXXFLAGS += -O3 -std=c++11 -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
|
|
||||||
# -fastsse -fargument-noalias ${WARNINGS} -msse3 -vec-report=3
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
./${PROGRAM}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# # some tools
|
|
||||||
# # Simple run time profiling of your code
|
|
||||||
# # CXXFLAGS += -g -pg
|
|
||||||
# # LINKFLAGS += -pg
|
|
||||||
|
|
||||||
|
|
||||||
# Profiling options PGI, see: pgcollect -help
|
|
||||||
# CPU_PROF = -allcache
|
|
||||||
CPU_PROF = -time
|
|
||||||
# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
|
|
||||||
#GPU_PROF = -cuda=branch:cc20
|
|
||||||
#
|
|
||||||
PROF_FILE = pgprof.out
|
|
||||||
|
|
||||||
cache: prof
|
|
||||||
|
|
||||||
prof: ${PROGRAM}
|
|
||||||
${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
|
|
||||||
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
|
|
||||||
|
|
||||||
info:
|
|
||||||
pgaccelinfo -v
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
#
|
|
||||||
# use GNU-Compiler tools
|
|
||||||
COMPILER=GCC_
|
|
||||||
# alternatively from the shell
|
|
||||||
# export COMPILER=GCC_
|
|
||||||
# or, alternatively from the shell
|
|
||||||
# make COMPILER=GCC_
|
|
||||||
|
|
||||||
# use Intel compilers
|
|
||||||
#COMPILER=ICC_
|
|
||||||
|
|
||||||
# use PGI compilers
|
|
||||||
# COMPILER=PGI_
|
|
||||||
|
|
||||||
|
|
||||||
SOURCES = main.cpp mylib.cpp
|
|
||||||
OBJECTS = $(SOURCES:.cpp=.o)
|
|
||||||
|
|
||||||
PROGRAM = main.${COMPILER}
|
|
||||||
|
|
||||||
# uncomment the next to lines for debugging and detailed performance analysis
|
|
||||||
CXXFLAGS += -g
|
|
||||||
LINKFLAGS += -g
|
|
||||||
# do not use -pg with PGI compilers
|
|
||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
COMPILER=GCC_
|
|
||||||
endif
|
|
||||||
|
|
||||||
include ../${COMPILER}default.mk
|
|
||||||
60
ex1/D/.vscode/settings.json
vendored
60
ex1/D/.vscode/settings.json
vendored
|
|
@ -1,60 +0,0 @@
|
||||||
{
|
|
||||||
"files.associations": {
|
|
||||||
"array": "cpp",
|
|
||||||
"atomic": "cpp",
|
|
||||||
"bit": "cpp",
|
|
||||||
"cctype": "cpp",
|
|
||||||
"charconv": "cpp",
|
|
||||||
"chrono": "cpp",
|
|
||||||
"clocale": "cpp",
|
|
||||||
"cmath": "cpp",
|
|
||||||
"compare": "cpp",
|
|
||||||
"complex": "cpp",
|
|
||||||
"concepts": "cpp",
|
|
||||||
"cstdarg": "cpp",
|
|
||||||
"cstddef": "cpp",
|
|
||||||
"cstdint": "cpp",
|
|
||||||
"cstdio": "cpp",
|
|
||||||
"cstdlib": "cpp",
|
|
||||||
"cstring": "cpp",
|
|
||||||
"ctime": "cpp",
|
|
||||||
"cwchar": "cpp",
|
|
||||||
"cwctype": "cpp",
|
|
||||||
"deque": "cpp",
|
|
||||||
"string": "cpp",
|
|
||||||
"unordered_map": "cpp",
|
|
||||||
"vector": "cpp",
|
|
||||||
"exception": "cpp",
|
|
||||||
"algorithm": "cpp",
|
|
||||||
"functional": "cpp",
|
|
||||||
"iterator": "cpp",
|
|
||||||
"memory": "cpp",
|
|
||||||
"memory_resource": "cpp",
|
|
||||||
"numeric": "cpp",
|
|
||||||
"optional": "cpp",
|
|
||||||
"random": "cpp",
|
|
||||||
"ratio": "cpp",
|
|
||||||
"string_view": "cpp",
|
|
||||||
"system_error": "cpp",
|
|
||||||
"tuple": "cpp",
|
|
||||||
"type_traits": "cpp",
|
|
||||||
"utility": "cpp",
|
|
||||||
"format": "cpp",
|
|
||||||
"initializer_list": "cpp",
|
|
||||||
"iomanip": "cpp",
|
|
||||||
"iosfwd": "cpp",
|
|
||||||
"iostream": "cpp",
|
|
||||||
"istream": "cpp",
|
|
||||||
"limits": "cpp",
|
|
||||||
"new": "cpp",
|
|
||||||
"numbers": "cpp",
|
|
||||||
"ostream": "cpp",
|
|
||||||
"span": "cpp",
|
|
||||||
"sstream": "cpp",
|
|
||||||
"stdexcept": "cpp",
|
|
||||||
"streambuf": "cpp",
|
|
||||||
"cinttypes": "cpp",
|
|
||||||
"typeinfo": "cpp",
|
|
||||||
"variant": "cpp"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,123 +0,0 @@
|
||||||
# Basic Defintions for using GNU-compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=CLANG_
|
|
||||||
|
|
||||||
#CLANGPATH=//usr/lib/llvm-10/bin/
|
|
||||||
CC = ${CLANGPATH}clang
|
|
||||||
CXX = ${CLANGPATH}clang++
|
|
||||||
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
|
|
||||||
#F77 = gfortran
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
|
|
||||||
WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
|
|
||||||
WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
|
||||||
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
|
||||||
|
|
||||||
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
|
|
||||||
# don't use -Ofast
|
|
||||||
# -ftrapv
|
|
||||||
LINKFLAGS += -O3
|
|
||||||
|
|
||||||
# different libraries in Ubuntu or manajaró
|
|
||||||
ifndef UBUNTU
|
|
||||||
UBUNTU=1
|
|
||||||
endif
|
|
||||||
|
|
||||||
# BLAS, LAPACK
|
|
||||||
LINKFLAGS += -llapack -lblas
|
|
||||||
# -lopenblas
|
|
||||||
ifeq ($(UBUNTU),1)
|
|
||||||
# ubuntu
|
|
||||||
else
|
|
||||||
# on archlinux
|
|
||||||
LINKFLAGS += -lcblas
|
|
||||||
endif
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -flto
|
|
||||||
LINKFLAGS += -flto
|
|
||||||
|
|
||||||
# very good check
|
|
||||||
# http://clang.llvm.org/extra/clang-tidy/
|
|
||||||
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
|
|
||||||
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
|
|
||||||
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
|
|
||||||
#READABILITY=,readability*${SWITCH_OFF}
|
|
||||||
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
|
||||||
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
|
||||||
#TIDYFLAGS += -checks='modernize*
|
|
||||||
# ???
|
|
||||||
#TIDYFLAGS = -checks='cert*' -header-filter=.*
|
|
||||||
# MPI checks ??
|
|
||||||
#TIDYFLAGS = -checks='mpi*'
|
|
||||||
# ??
|
|
||||||
#TIDYFLAGS = -checks='performance*' -header-filter=.*
|
|
||||||
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
|
|
||||||
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
||||||
|
|
||||||
codecheck: tidy_check
|
|
||||||
tidy_check:
|
|
||||||
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
|
|
||||||
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
# time ./${PROGRAM} ${PARAMS}
|
|
||||||
./${PROGRAM} ${PARAMS}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# some tools
|
|
||||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
|
||||||
cache: ${PROGRAM}
|
|
||||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
|
||||||
# kcachegrind callgrind.out.<pid> &
|
|
||||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
|
||||||
|
|
||||||
# Check for wrong memory accesses, memory leaks, ...
|
|
||||||
# use smaller data sets
|
|
||||||
mem: ${PROGRAM}
|
|
||||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
|
||||||
|
|
||||||
# Simple run time profiling of your code
|
|
||||||
# CXXFLAGS += -g -pg
|
|
||||||
# LINKFLAGS += -pg
|
|
||||||
prof: ${PROGRAM}
|
|
||||||
perf record ./$^ ${PARAMS}
|
|
||||||
perf report
|
|
||||||
# gprof -b ./$^ > gp.out
|
|
||||||
# kprof -f gp.out -p gprof &
|
|
||||||
|
|
||||||
codecheck: tidy_check
|
|
||||||
|
|
@ -1,130 +0,0 @@
|
||||||
# Basic Defintions for using GNU-compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=GCC_
|
|
||||||
|
|
||||||
CC = gcc
|
|
||||||
CXX = g++
|
|
||||||
F77 = gfortran
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
# on mephisto:
|
|
||||||
#CXXFLAGS += -I/share/apps/atlas/include
|
|
||||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
|
||||||
#LINKFLAGS += -lcblas -latlas
|
|
||||||
|
|
||||||
#LINKFLAGS += -lblas
|
|
||||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
|
||||||
|
|
||||||
|
|
||||||
#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow
|
|
||||||
WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
|
||||||
-Wredundant-decls -Winline -fmax-errors=1
|
|
||||||
# -Wunreachable-code
|
|
||||||
# -Wunreachable-code
|
|
||||||
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
|
|
||||||
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
|
||||||
#-msse3
|
|
||||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
|
||||||
# -ftree-vectorizer-verbose=5
|
|
||||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
|
||||||
|
|
||||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
|
||||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
|
||||||
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
LINKFLAGS += -O3
|
|
||||||
|
|
||||||
# BLAS, LAPACK
|
|
||||||
OPENBLAS_DIR = /opt/openblas_GCCseq
|
|
||||||
#OPENBLAS_DIR = /opt/openblas_GCC
|
|
||||||
OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
|
|
||||||
OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
|
|
||||||
CXXFLAGS += -I${OPENBLAS_INCDIR}
|
|
||||||
LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -flto
|
|
||||||
LINKFLAGS += -flto
|
|
||||||
|
|
||||||
# profiling tools
|
|
||||||
#CXXFLAGS += -pg
|
|
||||||
#LINKFLAGS += -pg
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
|
||||||
-@rm -r html
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
# time ./${PROGRAM}
|
|
||||||
# ./${PROGRAM}
|
|
||||||
( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
|
|
||||||
# or 'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}' in your ~/.bashrc
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar:
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# some tools
|
|
||||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
|
||||||
cache: ${PROGRAM}
|
|
||||||
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
|
||||||
# kcachegrind callgrind.out.<pid> &
|
|
||||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
|
||||||
|
|
||||||
# Check for wrong memory accesses, memory leaks, ...
|
|
||||||
# use smaller data sets
|
|
||||||
# no "-pg" in compile/link options
|
|
||||||
mem: ${PROGRAM}
|
|
||||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
|
||||||
|
|
||||||
# Simple run time profiling of your code
|
|
||||||
# CXXFLAGS += -g -pg
|
|
||||||
# LINKFLAGS += -pg
|
|
||||||
prof: ${PROGRAM}
|
|
||||||
./$^
|
|
||||||
gprof -b ./$^ > gp.out
|
|
||||||
# kprof -f gp.out -p gprof &
|
|
||||||
|
|
||||||
#Trace your heap:
|
|
||||||
#> heaptrack ./main.GCC_
|
|
||||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
|
||||||
heap: ${PROGRAM}
|
|
||||||
heaptrack ./$^ 11
|
|
||||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
# get the detailed status of all optimization flags
|
|
||||||
info:
|
|
||||||
echo "detailed status of all optimization flags"
|
|
||||||
$(CXX) --version
|
|
||||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
|
||||||
|
|
@ -1,183 +0,0 @@
|
||||||
# Basic Defintions for using GNU-compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=GCC_
|
|
||||||
|
|
||||||
CC = gcc
|
|
||||||
CXX = g++
|
|
||||||
F77 = gfortran
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
|
||||||
-Wredundant-decls -Winline -fmax-errors=1
|
|
||||||
# -Wunreachable-code
|
|
||||||
CXXFLAGS += -ffast-math -O1 -march=native -std=c++17 ${WARNINGS}
|
|
||||||
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
|
||||||
#-msse3
|
|
||||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
|
||||||
# -ftree-vectorizer-verbose=5
|
|
||||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
|
||||||
|
|
||||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
|
||||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
|
||||||
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
|
||||||
LINKFLAGS += -O1
|
|
||||||
|
|
||||||
#architecture
|
|
||||||
#CPU = -march=znver2
|
|
||||||
CXXFLAGS += ${CPU}
|
|
||||||
LINKFLAGS += ${CPU}
|
|
||||||
|
|
||||||
# different libraries in Ubuntu or manajaró
|
|
||||||
ifndef UBUNTU
|
|
||||||
UBUNTU=1
|
|
||||||
endif
|
|
||||||
|
|
||||||
# BLAS, LAPACK
|
|
||||||
ifeq ($(UBUNTU),1)
|
|
||||||
LINKFLAGS += -llapack -lblas
|
|
||||||
# -lopenblas
|
|
||||||
else
|
|
||||||
# on archlinux
|
|
||||||
LINKFLAGS += -llapack -lopenblas -lcblas
|
|
||||||
endif
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -flto
|
|
||||||
LINKFLAGS += -flto
|
|
||||||
|
|
||||||
# for debugging purpose (save code)
|
|
||||||
# -fsanitize=leak # only one out the three can be used
|
|
||||||
# -fsanitize=address
|
|
||||||
# -fsanitize=thread
|
|
||||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
|
||||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
|
||||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
|
||||||
#CXXFLAGS += ${SANITARY}
|
|
||||||
#LINKFLAGS += ${SANITARY}
|
|
||||||
|
|
||||||
# profiling tools
|
|
||||||
#CXXFLAGS += -pg
|
|
||||||
#LINKFLAGS += -pg
|
|
||||||
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
|
|
||||||
-@rm -rf html
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
#run: ${PROGRAM}
|
|
||||||
# time ./${PROGRAM} ${PARAMS}
|
|
||||||
./${PROGRAM} ${PARAMS}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
#find . -size +10M > large_files
|
|
||||||
#--exclude-from ${MY_DIR}/large_files
|
|
||||||
|
|
||||||
zip: clean
|
|
||||||
@echo "Zip the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
.SUFFIXES: .f90
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
|
|
||||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f90.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# some tools
|
|
||||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
|
||||||
cache: ${PROGRAM}
|
|
||||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
|
||||||
# kcachegrind callgrind.out.<pid> &
|
|
||||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
|
||||||
|
|
||||||
# Check for wrong memory accesses, memory leaks, ...
|
|
||||||
# use smaller data sets
|
|
||||||
# no "-pg" in compile/link options
|
|
||||||
mem: ${PROGRAM}
|
|
||||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
|
||||||
# Graphical interface
|
|
||||||
# valkyrie
|
|
||||||
|
|
||||||
# Simple run time profiling of your code
|
|
||||||
# CXXFLAGS += -g -pg
|
|
||||||
# LINKFLAGS += -pg
|
|
||||||
prof: ${PROGRAM}
|
|
||||||
perf record ./$^ ${PARAMS}
|
|
||||||
perf report
|
|
||||||
# gprof -b ./$^ > gp.out
|
|
||||||
# kprof -f gp.out -p gprof &
|
|
||||||
|
|
||||||
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
|
|
||||||
# * install
|
|
||||||
# * sudo vi /etc/sysctl.conf
|
|
||||||
# add kernel.perf_event_paranoid = 0
|
|
||||||
|
|
||||||
#Trace your heap:
|
|
||||||
#> heaptrack ./main.GCC_
|
|
||||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
|
||||||
heap: ${PROGRAM}
|
|
||||||
heaptrack ./$^ ${PARAMS}
|
|
||||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
|
||||||
|
|
||||||
codecheck: $(SOURCES)
|
|
||||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
|
||||||
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
# get the detailed status of all optimization flags
|
|
||||||
info:
|
|
||||||
echo "detailed status of all optimization flags"
|
|
||||||
$(CXX) --version
|
|
||||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
|
||||||
lscpu
|
|
||||||
inxi -C
|
|
||||||
lstopo
|
|
||||||
|
|
||||||
# Excellent hardware info
|
|
||||||
# hardinfo
|
|
||||||
# Life monitoring of CPU frequency etc.
|
|
||||||
# sudo i7z
|
|
||||||
|
|
||||||
# Memory consumption
|
|
||||||
# vmstat -at -SM 3
|
|
||||||
# xfce4-taskmanager
|
|
||||||
|
|
||||||
|
|
||||||
# https://www.tecmint.com/check-linux-cpu-information/
|
|
||||||
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
|
|
||||||
|
|
||||||
# Debugging:
|
|
||||||
# https://wiki.archlinux.org/index.php/Debugging
|
|
||||||
|
|
@ -1,137 +0,0 @@
|
||||||
# Basic Defintions for using INTEL compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=ICC_
|
|
||||||
|
|
||||||
#BINDIR = /opt/intel/bin/
|
|
||||||
|
|
||||||
# special on my sony [GH]
|
|
||||||
#BINDIR = /opt/save.intel/bin/
|
|
||||||
# very special on my sony [GH]
|
|
||||||
# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
|
|
||||||
|
|
||||||
# Error with g++-4.8 using icpc14.0,x:
|
|
||||||
# find directory wherein bits/c++config.h is located
|
|
||||||
# 'locate bits/c++config.h'
|
|
||||||
#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
|
|
||||||
|
|
||||||
|
|
||||||
CC = ${BINDIR}icc
|
|
||||||
CXX = ${BINDIR}icpc
|
|
||||||
F77 = ${BINDIR}ifort
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
|
|
||||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
|
|
||||||
# -Winline -Wredundant-decls -Wunreachable-code
|
|
||||||
CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
|
|
||||||
# profiling tools
|
|
||||||
#CXXFLAGS += -pg
|
|
||||||
#LINKFLAGS += -pg
|
|
||||||
# -vec-report=3
|
|
||||||
# -qopt-report=5 -qopt-report-phase=vec
|
|
||||||
# -guide -parallel
|
|
||||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
|
||||||
# -auto-p32 -simd
|
|
||||||
CXXFLAGS += -align
|
|
||||||
|
|
||||||
# use MKL by INTEL
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
|
|
||||||
# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
|
||||||
#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
|
|
||||||
LINKFLAGS += -O3 -mkl=sequential
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -ipo
|
|
||||||
LINKFLAGS += -ipo
|
|
||||||
|
|
||||||
# annotated assembler file
|
|
||||||
ANNOTED = -fsource-asm -S
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
./${PROGRAM}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# # some tools
|
|
||||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
|
||||||
# cache: ${PROGRAM}
|
|
||||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
|
||||||
# # kcachegrind callgrind.out.<pid> &
|
|
||||||
#
|
|
||||||
# # Check for wrong memory accesses, memory leaks, ...
|
|
||||||
# # use smaller data sets
|
|
||||||
# mem: ${PROGRAM}
|
|
||||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
|
||||||
#
|
|
||||||
# # Simple run time profiling of your code
|
|
||||||
# # CXXFLAGS += -g -pg
|
|
||||||
# # LINKFLAGS += -pg
|
|
||||||
# prof: ${PROGRAM}
|
|
||||||
# ./$^
|
|
||||||
# gprof -b ./$^ > gp.out
|
|
||||||
# # kprof -f gp.out -p gprof &
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
mem: inspector
|
|
||||||
prof: amplifier
|
|
||||||
cache: amplifier
|
|
||||||
|
|
||||||
gap_par_report:
|
|
||||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
|
||||||
|
|
||||||
# GUI for performance report
|
|
||||||
amplifier: ${PROGRAM}
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
# alternatively to the solution above:
|
|
||||||
#edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 .
|
|
||||||
amplxe-gui &
|
|
||||||
|
|
||||||
# GUI for Memory and Thread analyzer (race condition)
|
|
||||||
inspector: ${PROGRAM}
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
inspxe-gui &
|
|
||||||
|
|
||||||
advisor:
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
advixe-gui &
|
|
||||||
|
|
||||||
icc-info:
|
|
||||||
icpc -# main.cpp
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,176 +0,0 @@
|
||||||
# Basic Defintions for using INTEL compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=ONEAPI_
|
|
||||||
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
|
||||||
# requires
|
|
||||||
# source /opt/intel/oneapi/setvars.sh
|
|
||||||
# on AMD: export MKL_DEBUG_CPU_TYPE=5
|
|
||||||
|
|
||||||
#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
|
|
||||||
#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
|
|
||||||
#export KMP_AFFINITY=verbose,compact
|
|
||||||
|
|
||||||
CC = ${BINDIR}icc
|
|
||||||
CXX = ${BINDIR}dpcpp
|
|
||||||
F77 = ${BINDIR}ifort
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
## Compiler flags
|
|
||||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
|
|
||||||
WARNINGS += -Wpessimizing-move -Wredundant-move
|
|
||||||
#-wd2015,2012,2014 -wn3
|
|
||||||
# -Winline -Wredundant-decls -Wunreachable-code
|
|
||||||
# -qopt-subscript-in-range
|
|
||||||
# -vec-threshold0
|
|
||||||
|
|
||||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
|
||||||
#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include"
|
|
||||||
#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include"
|
|
||||||
LINKFLAGS += -O3
|
|
||||||
|
|
||||||
# interprocedural optimization
|
|
||||||
CXXFLAGS += -ipo
|
|
||||||
LINKFLAGS += -ipo
|
|
||||||
LINKFLAGS += -flto
|
|
||||||
|
|
||||||
# annotated Assembler file
|
|
||||||
ANNOTED = -fsource-asm -S
|
|
||||||
|
|
||||||
#architecture
|
|
||||||
CPU = -march=core-avx2
|
|
||||||
#CPU += -mtp=zen
|
|
||||||
# -xCORE-AVX2
|
|
||||||
# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
|
|
||||||
CXXFLAGS += ${CPU}
|
|
||||||
LINKFLAGS += ${CPU}
|
|
||||||
|
|
||||||
# use MKL by INTEL
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
|
||||||
# sequential MKL
|
|
||||||
# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
|
|
||||||
CXXFLAGS += -qmkl=sequential -UMKL_ILP64
|
|
||||||
LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
|
||||||
#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
|
||||||
|
|
||||||
# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static
|
|
||||||
# install intel-oneapi-compiler-static
|
|
||||||
# or
|
|
||||||
LINKFLAGS += -shared-intel
|
|
||||||
|
|
||||||
|
|
||||||
OPENMP = -qopenmp
|
|
||||||
CXXFLAGS += ${OPENMP}
|
|
||||||
LINKFLAGS += ${OPENMP}
|
|
||||||
|
|
||||||
|
|
||||||
# profiling tools
|
|
||||||
#CXXFLAGS += -pg
|
|
||||||
#LINKFLAGS += -pg
|
|
||||||
# -vec-report=3
|
|
||||||
# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
|
|
||||||
# -guide -parallel
|
|
||||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
|
||||||
# -auto-p32 -simd
|
|
||||||
|
|
||||||
# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
|
|
||||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par
|
|
||||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg
|
|
||||||
# Redirect report from *.optrpt to stderr
|
|
||||||
# -qopt-report-file=stderr
|
|
||||||
# Guided paralellization
|
|
||||||
# -guide -parallel
|
|
||||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
|
||||||
# -auto-p32 -simd
|
|
||||||
|
|
||||||
## run time checks
|
|
||||||
# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
|
|
||||||
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f ${PROGRAM} ${OBJECTS} *.optrpt
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
./${PROGRAM}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# some tools
|
|
||||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
|
|
||||||
|
|
||||||
mem: inspector
|
|
||||||
prof: vtune
|
|
||||||
cache: inspector
|
|
||||||
|
|
||||||
gap_par_report:
|
|
||||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
|
||||||
|
|
||||||
# GUI for performance report
|
|
||||||
amplifier: ${PROGRAM}
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
|
||||||
amplxe-gui &
|
|
||||||
|
|
||||||
# GUI for Memory and Thread analyzer (race condition)
|
|
||||||
inspector: ${PROGRAM}
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
# inspxe-gui &
|
|
||||||
vtune-gui ./${PROGRAM} &
|
|
||||||
|
|
||||||
advisor:
|
|
||||||
source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
|
|
||||||
# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
|
|
||||||
advisor --collect=survey ./${PROGRAM}
|
|
||||||
# advisor --collect=roofline ./${PROGRAM}
|
|
||||||
advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
|
|
||||||
|
|
||||||
vtune:
|
|
||||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
||||||
# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
|
|
||||||
export ADVIXE_EXPERIMENTAL=roofline
|
|
||||||
vtune -collect hotspots ./${PROGRAM}
|
|
||||||
vtune -report hotspots -r r000hs > vtune.out
|
|
||||||
# vtune-gui ./${PROGRAM} &
|
|
||||||
|
|
||||||
icc-info:
|
|
||||||
icpc -# main.cpp
|
|
||||||
|
|
||||||
# MKL on AMD
|
|
||||||
# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
|
|
||||||
#
|
|
||||||
# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
|
|
||||||
# export MKL_DEBUG_CPU_TYPE=5
|
|
||||||
# export MKL_NUM_THRAEDS=1
|
|
||||||
# export MKL_DYNAMIC=false
|
|
||||||
# on Intel compiler
|
|
||||||
# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
|
|
||||||
|
|
@ -1,93 +0,0 @@
|
||||||
# Basic Defintions for using PGI-compiler suite sequentially
|
|
||||||
# requires setting of COMPILER=PGI_
|
|
||||||
# OPTIRUN = optirun
|
|
||||||
|
|
||||||
|
|
||||||
CC = pgcc
|
|
||||||
CXX = pgc++
|
|
||||||
F77 = pgfortran
|
|
||||||
LINKER = ${CXX}
|
|
||||||
|
|
||||||
# on mephisto:
|
|
||||||
#CXXFLAGS += -I/share/apps/atlas/include
|
|
||||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
|
||||||
#LINKFLAGS += -lcblas -latlas
|
|
||||||
|
|
||||||
#LINKFLAGS += -lblas
|
|
||||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
|
||||||
|
|
||||||
WARNINGS = -Minform=warn
|
|
||||||
# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow -Wredundant-decls
|
|
||||||
# -pedantic -Wunreachable-code -Wextra -Winline
|
|
||||||
# -Wunreachable-code
|
|
||||||
|
|
||||||
#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
|
|
||||||
PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
|
|
||||||
# -Minfo
|
|
||||||
# -Mprof=time
|
|
||||||
# -Mprof=lines
|
|
||||||
# take care with option -Msafeptr
|
|
||||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
|
||||||
#CXXFLAGS += -O3 -std=c++11 -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
|
|
||||||
# -fastsse -fargument-noalias ${WARNINGS} -msse3 -vec-report=3
|
|
||||||
|
|
||||||
default: ${PROGRAM}
|
|
||||||
|
|
||||||
${PROGRAM}: ${OBJECTS}
|
|
||||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f ${PROGRAM} ${OBJECTS}
|
|
||||||
|
|
||||||
clean_all:: clean
|
|
||||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
||||||
|
|
||||||
run: clean ${PROGRAM}
|
|
||||||
./${PROGRAM}
|
|
||||||
|
|
||||||
# tar the current directory
|
|
||||||
MY_DIR = `basename ${PWD}`
|
|
||||||
tar: clean_all
|
|
||||||
@echo "Tar the directory: " ${MY_DIR}
|
|
||||||
@cd .. ;\
|
|
||||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
||||||
cd ${MY_DIR}
|
|
||||||
# tar cf `basename ${PWD}`.tar *
|
|
||||||
|
|
||||||
doc:
|
|
||||||
doxygen Doxyfile
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
|
|
||||||
.cpp.o:
|
|
||||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(F77) -c $(FFLAGS) -o $@ $<
|
|
||||||
|
|
||||||
##################################################################################################
|
|
||||||
# # some tools
|
|
||||||
# # Simple run time profiling of your code
|
|
||||||
# # CXXFLAGS += -g -pg
|
|
||||||
# # LINKFLAGS += -pg
|
|
||||||
|
|
||||||
|
|
||||||
# Profiling options PGI, see: pgcollect -help
|
|
||||||
# CPU_PROF = -allcache
|
|
||||||
CPU_PROF = -time
|
|
||||||
# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
|
|
||||||
#GPU_PROF = -cuda=branch:cc20
|
|
||||||
#
|
|
||||||
PROF_FILE = pgprof.out
|
|
||||||
|
|
||||||
cache: prof
|
|
||||||
|
|
||||||
prof: ${PROGRAM}
|
|
||||||
${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
|
|
||||||
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
|
|
||||||
|
|
||||||
info:
|
|
||||||
pgaccelinfo -v
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,30 +0,0 @@
|
||||||
#
|
|
||||||
# use GNU-Compiler tools
|
|
||||||
COMPILER=GCC_
|
|
||||||
# alternatively from the shell
|
|
||||||
# export COMPILER=GCC_
|
|
||||||
# or, alternatively from the shell
|
|
||||||
# make COMPILER=GCC_
|
|
||||||
|
|
||||||
# use Intel compilers
|
|
||||||
#COMPILER=ICC_
|
|
||||||
|
|
||||||
# use PGI compilers
|
|
||||||
# COMPILER=PGI_
|
|
||||||
|
|
||||||
|
|
||||||
SOURCES = main.cpp mylib.cpp
|
|
||||||
OBJECTS = $(SOURCES:.cpp=.o)
|
|
||||||
|
|
||||||
PROGRAM = main.${COMPILER}
|
|
||||||
|
|
||||||
# uncomment the next to lines for debugging and detailed performance analysis
|
|
||||||
CXXFLAGS += -g
|
|
||||||
LINKFLAGS += -g
|
|
||||||
# do not use -pg with PGI compilers
|
|
||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
COMPILER=GCC_
|
|
||||||
endif
|
|
||||||
|
|
||||||
include ../${COMPILER}default.mk
|
|
||||||
|
|
@ -1,124 +0,0 @@
|
||||||
#include "mylib.h"
|
|
||||||
#include <cassert>
|
|
||||||
#include <chrono> // timing
|
|
||||||
#include <cmath> // sqrt()
|
|
||||||
#include <cstdlib> // atoi()
|
|
||||||
#include <cstring> // strncmp()
|
|
||||||
#include <ctime>
|
|
||||||
#include <iostream>
|
|
||||||
#include <sstream>
|
|
||||||
using namespace std;
|
|
||||||
using namespace std::chrono; // timing
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
int const NLOOPS = 50; // chose a value such that the benchmark runs at least 10 sec.
|
|
||||||
unsigned int N = 50000001;
|
|
||||||
//##########################################################################
|
|
||||||
// Read Paramater from command line (C++ style)
|
|
||||||
cout << "Checking command line parameters for: -n <number> " << endl;
|
|
||||||
for (int i = 1; i < argc; i++)
|
|
||||||
{
|
|
||||||
cout << " arg[" << i << "] = " << argv[i] << endl;
|
|
||||||
if (std::strncmp(argv[i], "-n", 2) == 0 && i + 1 < argc) // found "-n" followed by another parameter
|
|
||||||
{
|
|
||||||
N = static_cast<unsigned int>(atoi(argv[i + 1]));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cout << "Corect call: " << argv[0] << " -n <number>\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cout << "\nN = " << N << endl;
|
|
||||||
|
|
||||||
//##########################################################################
|
|
||||||
// Memory allocation
|
|
||||||
cout << "Memory allocation\n";
|
|
||||||
|
|
||||||
vector<double> x(N), y(N);
|
|
||||||
|
|
||||||
cout.precision(2);
|
|
||||||
cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
|
||||||
cout.precision(6);
|
|
||||||
|
|
||||||
//##########################################################################
|
|
||||||
// Data initialization
|
|
||||||
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
|
|
||||||
for (unsigned int i = 0; i < N; ++i)
|
|
||||||
{
|
|
||||||
x[i] = i + 1;
|
|
||||||
y[i] = 1.0 / pow(x[i], 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
//##########################################################################
|
|
||||||
cout << "\nStart Benchmarking Normal sum\n";
|
|
||||||
|
|
||||||
// Do calculation
|
|
||||||
auto t1 = system_clock::now(); // start timer
|
|
||||||
double sk1(0.0),ss(0.0);
|
|
||||||
for (int i = 0; i < NLOOPS; ++i)
|
|
||||||
{
|
|
||||||
sk1 = normal_sum(y);
|
|
||||||
ss += sk1; // prevents the optimizer from removing unused calculation results.
|
|
||||||
}
|
|
||||||
|
|
||||||
auto t2 = system_clock::now(); // stop timer
|
|
||||||
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
|
||||||
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
|
||||||
t_diff = t_diff/NLOOPS;
|
|
||||||
|
|
||||||
|
|
||||||
// Print result
|
|
||||||
printf("\nSum = %.16f\n", sk1);
|
|
||||||
|
|
||||||
//##########################################################################
|
|
||||||
|
|
||||||
// Timings and Performance
|
|
||||||
cout << endl;
|
|
||||||
cout.precision(2);
|
|
||||||
cout << "Timing in sec. : " << t_diff << endl;
|
|
||||||
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
|
|
||||||
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
|
|
||||||
|
|
||||||
//##########################################################################
|
|
||||||
|
|
||||||
cout << "\nStart Benchmarking Kahan summation\n";
|
|
||||||
|
|
||||||
// Do calculation
|
|
||||||
t1 = system_clock::now(); // start timer
|
|
||||||
double sk2(0.0),sss(0.0);
|
|
||||||
for (int i = 0; i < NLOOPS; ++i)
|
|
||||||
{
|
|
||||||
sk2 = Kahan_skalar(y);
|
|
||||||
sss += sk2; // prevents the optimizer from removing unused calculation results.
|
|
||||||
}
|
|
||||||
|
|
||||||
t2 = system_clock::now(); // stop timer
|
|
||||||
duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
|
||||||
t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
|
||||||
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
|
||||||
// duration per loop seconds
|
|
||||||
|
|
||||||
// Print result
|
|
||||||
printf("\nSum = %.16f\n", sk2);
|
|
||||||
|
|
||||||
|
|
||||||
//##########################################################################
|
|
||||||
|
|
||||||
// Timings and Performance
|
|
||||||
cout << endl;
|
|
||||||
cout.precision(2);
|
|
||||||
cout << "Timing in sec. : " << t_diff << endl;
|
|
||||||
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
|
|
||||||
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
|
|
||||||
|
|
||||||
//##########################################################################
|
|
||||||
|
|
||||||
// Print limit
|
|
||||||
printf("\nLimit = %.16f\n\n", pow(M_PI,2) / 6.0f);
|
|
||||||
|
|
||||||
//##########################################################################
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
} // memory for x and y will be deallocated by their destructors
|
|
||||||
|
|
@ -1,29 +0,0 @@
|
||||||
#include "mylib.h"
|
|
||||||
#include <cassert> // assert()
|
|
||||||
#include <cmath>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
long double Kahan_skalar(vector<double> const &input)
|
|
||||||
{
|
|
||||||
long double sum = 0.0;
|
|
||||||
long double c = 0.0;
|
|
||||||
|
|
||||||
for (long unsigned int i=0; i<input.size(); i++){
|
|
||||||
long double y = input[i] - c;
|
|
||||||
long double t = sum + y;
|
|
||||||
c = (t-sum) - y;
|
|
||||||
sum = t;
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
long double normal_sum(vector<double> const &input)
|
|
||||||
{
|
|
||||||
long double sum = 0.0;
|
|
||||||
for (long unsigned int i=0; i<input.size(); i++){
|
|
||||||
sum += input[i];
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
#pragma once
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
long double Kahan_skalar(std::vector<double> const &input);
|
|
||||||
|
|
||||||
long double normal_sum(std::vector<double> const &input);
|
|
||||||
File diff suppressed because it is too large
Load diff
30
ex1/code/Makefile
Normal file
30
ex1/code/Makefile
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
PROGRAM = main
|
||||||
|
|
||||||
|
SOURCES = main.cpp mylib.cpp
|
||||||
|
OBJECTS = ${SOURCES:.cpp=.o}
|
||||||
|
|
||||||
|
CXX = g++
|
||||||
|
LINKER = g++
|
||||||
|
|
||||||
|
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||||
|
-Wredundant-decls -fmax-errors=1
|
||||||
|
|
||||||
|
CXXFLAGS = -g -flto -O3 -ffast-math -march=native ${WARNINGS}
|
||||||
|
LINKFLAGS = -g -flto -O3
|
||||||
|
|
||||||
|
|
||||||
|
all: ${PROGRAM}
|
||||||
|
|
||||||
|
%.o: %.cpp
|
||||||
|
${CXX} ${CXXFLAGS} -c $< -o $@
|
||||||
|
|
||||||
|
${PROGRAM}: ${OBJECTS}
|
||||||
|
$(LINKER) ${OBJECTS} ${LINKFLAGS} -o ${PROGRAM}
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f ${OBJECTS} ${PROGRAM}
|
||||||
|
|
||||||
|
|
||||||
|
run: ${PROGRAM}
|
||||||
|
# run: clean ${PROGRAM}
|
||||||
|
./${PROGRAM}
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
|
// g++ *.cpp -o main
|
||||||
|
// g++ -g -ffast-math -O3 -march=native -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -fmax-errors=1 *.cpp -o main
|
||||||
|
|
||||||
|
|
||||||
#include "mylib.h"
|
#include "mylib.h"
|
||||||
#include "timing.h"
|
#include "timing.h"
|
||||||
|
|
||||||
|
|
@ -12,6 +16,7 @@
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
using namespace std::chrono; // timing
|
||||||
|
|
||||||
static void task_a() {
|
static void task_a() {
|
||||||
printf("\n\n-------------- Task A --------------\n\n");
|
printf("\n\n-------------- Task A --------------\n\n");
|
||||||
|
|
@ -98,11 +103,106 @@ static void task_c() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC optimize("O1")
|
||||||
|
#endif
|
||||||
|
|
||||||
static void task_d() {
|
static void task_d() {
|
||||||
printf("\n\n-------------- Task D --------------\n\n");
|
printf("\n\n-------------- Task D --------------\n\n");
|
||||||
printf("See folder D.\n");
|
int const NLOOPS = 25; // chose a value such that the benchmark runs at least 10 sec.
|
||||||
|
unsigned int N = 50000001;
|
||||||
|
//##########################################################################
|
||||||
|
// Memory allocation
|
||||||
|
cout << "Memory allocation\n";
|
||||||
|
|
||||||
|
vector<double> x(N), y(N);
|
||||||
|
|
||||||
|
cout.precision(2);
|
||||||
|
cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||||
|
cout.precision(6);
|
||||||
|
|
||||||
|
//##########################################################################
|
||||||
|
// Data initialization
|
||||||
|
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
|
||||||
|
for (unsigned int i = 0; i < N; ++i)
|
||||||
|
{
|
||||||
|
x[i] = i + 1;
|
||||||
|
y[i] = 1.0 / pow(x[i], 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
//##########################################################################
|
||||||
|
cout << "\nStart Benchmarking Normal sum\n";
|
||||||
|
|
||||||
|
// Do calculation
|
||||||
|
auto t1 = system_clock::now(); // start timer
|
||||||
|
double sk1(0.0),ss(0.0);
|
||||||
|
for (int i = 0; i < NLOOPS; ++i)
|
||||||
|
{
|
||||||
|
sk1 = normal_sum(y);
|
||||||
|
ss += sk1; // prevents the optimizer from removing unused calculation results.
|
||||||
|
}
|
||||||
|
|
||||||
|
auto t2 = system_clock::now(); // stop timer
|
||||||
|
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||||
|
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||||
|
t_diff = t_diff/NLOOPS;
|
||||||
|
|
||||||
|
|
||||||
|
// Print result
|
||||||
|
printf("\nSum = %.16f\n", sk1);
|
||||||
|
|
||||||
|
//##########################################################################
|
||||||
|
|
||||||
|
// Timings and Performance
|
||||||
|
cout << endl;
|
||||||
|
cout.precision(2);
|
||||||
|
cout << "Timing in sec. : " << t_diff << endl;
|
||||||
|
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
|
||||||
|
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
|
||||||
|
|
||||||
|
//##########################################################################
|
||||||
|
|
||||||
|
cout << "\nStart Benchmarking Kahan summation\n";
|
||||||
|
|
||||||
|
// Do calculation
|
||||||
|
t1 = system_clock::now(); // start timer
|
||||||
|
double sk2(0.0),sss(0.0);
|
||||||
|
for (int i = 0; i < NLOOPS; ++i)
|
||||||
|
{
|
||||||
|
sk2 = Kahan_skalar(y);
|
||||||
|
sss += sk2; // prevents the optimizer from removing unused calculation results.
|
||||||
|
}
|
||||||
|
|
||||||
|
t2 = system_clock::now(); // stop timer
|
||||||
|
duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||||
|
t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||||
|
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||||
|
// duration per loop seconds
|
||||||
|
|
||||||
|
// Print result
|
||||||
|
printf("\nSum = %.16f\n", sk2);
|
||||||
|
|
||||||
|
|
||||||
|
//##########################################################################
|
||||||
|
|
||||||
|
// Timings and Performance
|
||||||
|
cout << endl;
|
||||||
|
cout.precision(2);
|
||||||
|
cout << "Timing in sec. : " << t_diff << endl;
|
||||||
|
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
|
||||||
|
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
|
||||||
|
|
||||||
|
//##########################################################################
|
||||||
|
|
||||||
|
// Print limit
|
||||||
|
printf("\nLimit = %.16f\n\n", pow(M_PI,2) / 6.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
|
||||||
static void task_e() {
|
static void task_e() {
|
||||||
printf("\n\n-------------- Task E --------------\n\n");
|
printf("\n\n-------------- Task E --------------\n\n");
|
||||||
|
|
||||||
|
|
@ -126,6 +126,40 @@ double formula(int n)
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------- Task D --------------
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC optimize("O1")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
long double Kahan_skalar(vector<double> const &input)
|
||||||
|
{
|
||||||
|
long double sum = 0.0;
|
||||||
|
long double c = 0.0;
|
||||||
|
|
||||||
|
for (long unsigned int i=0; i<input.size(); i++){
|
||||||
|
long double y = input[i] - c;
|
||||||
|
long double t = sum + y;
|
||||||
|
c = (t-sum) - y;
|
||||||
|
sum = t;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
long double normal_sum(vector<double> const &input)
|
||||||
|
{
|
||||||
|
long double sum = 0.0;
|
||||||
|
for (long unsigned int i=0; i<input.size(); i++){
|
||||||
|
sum += input[i];
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
|
||||||
// -------------- Task E --------------
|
// -------------- Task E --------------
|
||||||
|
|
||||||
void insert_into_vector(vector<int>& vec, int n) {
|
void insert_into_vector(vector<int>& vec, int n) {
|
||||||
|
|
@ -61,6 +61,12 @@ long int sum_of_spec(int n);
|
||||||
// Sums up all positive integers less or equal n which are multiples of 3 or of 5 (including or!) by inclusion-exclusion principle.
|
// Sums up all positive integers less or equal n which are multiples of 3 or of 5 (including or!) by inclusion-exclusion principle.
|
||||||
double formula(int n);
|
double formula(int n);
|
||||||
|
|
||||||
|
// -------------- Task D --------------
|
||||||
|
|
||||||
|
long double Kahan_skalar(std::vector<double> const &input);
|
||||||
|
|
||||||
|
long double normal_sum(std::vector<double> const &input);
|
||||||
|
|
||||||
// -------------- Task E --------------
|
// -------------- Task E --------------
|
||||||
|
|
||||||
// Inserts n random numbers into sorted vector v such that v remains sorted.
|
// Inserts n random numbers into sorted vector v such that v remains sorted.
|
||||||
104
ex1/ex1_results.txt
Normal file
104
ex1/ex1_results.txt
Normal file
|
|
@ -0,0 +1,104 @@
|
||||||
|
|
||||||
|
|
||||||
|
-------------- Task A --------------
|
||||||
|
|
||||||
|
means(1,4,16) = (7.000000, 4.000000, 2.285714)
|
||||||
|
means(2,3,5) = (3.333333, 3.107233, 2.903226)
|
||||||
|
means(1000,4000,16000) = (7000.000000, 4000.000000, 2285.714286)
|
||||||
|
means(4,8,15,16,23,42) = (18.000000, 13.965497, 10.499524)
|
||||||
|
|
||||||
|
|
||||||
|
-------------- Task B --------------
|
||||||
|
|
||||||
|
Minimum: 1.000000
|
||||||
|
Maximum: 1000.000000
|
||||||
|
Arithmetic: 498.184000
|
||||||
|
Geometric: 364.411859
|
||||||
|
Harmonic: 95.685690
|
||||||
|
Deviation: 287.905085
|
||||||
|
|
||||||
|
|
||||||
|
-------------- Task C --------------
|
||||||
|
|
||||||
|
n = 15
|
||||||
|
For-loop funtion: result = 60 | time = 0.018645 milliseconds
|
||||||
|
Formula funtion: result = 60 | time = 0.000038 milliseconds
|
||||||
|
n = 1001
|
||||||
|
For-loop funtion: result = 234168 | time = 1.219296 milliseconds
|
||||||
|
Formula funtion: result = 234168 | time = 0.000039 milliseconds
|
||||||
|
n = 1432987
|
||||||
|
For-loop funtion: result = 479139074204 | time = 1625.893479 milliseconds
|
||||||
|
Formula funtion: result = 479139074204 | time = 0.000106 milliseconds
|
||||||
|
|
||||||
|
|
||||||
|
-------------- Task D --------------
|
||||||
|
|
||||||
|
Memory allocation
|
||||||
|
0.75 GByte Memory allocated
|
||||||
|
|
||||||
|
Start Benchmarking Normal sum
|
||||||
|
|
||||||
|
Sum = 1.6449340468482272
|
||||||
|
|
||||||
|
Timing in sec. : 0.36
|
||||||
|
GFLOPS : 0.26
|
||||||
|
GiByte/s : 2.1
|
||||||
|
|
||||||
|
Start Benchmarking Kahan summation
|
||||||
|
|
||||||
|
Sum = 1.6449340468482272
|
||||||
|
|
||||||
|
Timing in sec. : 0.31
|
||||||
|
GFLOPS : 0.3
|
||||||
|
GiByte/s : 2.4
|
||||||
|
|
||||||
|
Limit = 1.6449340668482264
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-------------- Task E --------------
|
||||||
|
|
||||||
|
Vector insertion time for n = 100: 26 microseconds.
|
||||||
|
List insertion time for n = 100: 45 microseconds.
|
||||||
|
Vector insertion time for n = 1000: 121 microseconds.
|
||||||
|
List insertion time for n = 1000: 4059 microseconds.
|
||||||
|
Vector insertion time for n = 10000: 5932 microseconds.
|
||||||
|
List insertion time for n = 10000: 568042 microseconds.
|
||||||
|
|
||||||
|
|
||||||
|
-------------- Task F --------------
|
||||||
|
|
||||||
|
single_goldbach(k = 694) = 19
|
||||||
|
|
||||||
|
Decompositions for k = 694: 3 + 691, 11 + 683, 17 + 677, 41 + 653, 47 + 647, 53 + 641, 101 + 593, 107 + 587, 131 + 563, 137 + 557, 173 + 521, 191 + 503, 227 + 467, 233 + 461, 251 + 443, 263 + 431, 293 + 401, 311 + 383, 347 + 347,
|
||||||
|
|
||||||
|
NOTE: For n=2'000'000 it will take ~30 seconds.
|
||||||
|
count_goldbach(n = 10000): k = 9240, decompositions = 329, time elapsed: 1.055767 milliseconds
|
||||||
|
count_goldbach(n = 100000): k = 99330, decompositions = 2168, time elapsed: 42.025990 milliseconds
|
||||||
|
count_goldbach(n = 400000): k = 390390, decompositions = 7094, time elapsed: 530.372813 milliseconds
|
||||||
|
count_goldbach(n = 1000000): k = 990990, decompositions = 15594, time elapsed: 3973.183267 milliseconds
|
||||||
|
|
||||||
|
|
||||||
|
-------------- Task G --------------
|
||||||
|
|
||||||
|
M =
|
||||||
|
0.000045 0.000508 0.003346
|
||||||
|
0.000508 0.005754 0.037929
|
||||||
|
0.003346 0.037929 0.250000
|
||||||
|
0.006185 0.070104 0.462071
|
||||||
|
0.006648 0.075350 0.496654
|
||||||
|
|
||||||
|
u = 1 2 3
|
||||||
|
M * u = 0.011099 0.1258 0.8292 1.5326 1.6473
|
||||||
|
v = -1 2 -3 4 -5
|
||||||
|
M^T * v = -0.017568 -0.19912 -1.3125
|
||||||
|
|
||||||
|
Results for 3000x3000 matrix vector multiplication doing 100 loops
|
||||||
|
Time for initialization: 0.392896 seconds.
|
||||||
|
Time for Mult : 0.996829 seconds, 0.009968 per loop.
|
||||||
|
Time for MultT : 6.502881 seconds, 0.065029 per loop.
|
||||||
|
|
||||||
|
Results for 3000x3000 matrix vector multiplication doing 100 loops taking advantage of tensor product structure of the matrix
|
||||||
|
Time for initialization: 0.000088 seconds.
|
||||||
|
Time for Mult : 0.000164 seconds, 0.000002 per loop.
|
||||||
|
Time for MultT : 0.000197 seconds, 0.000002 per loop.
|
||||||
BIN
ex2/task_c.png
Normal file
BIN
ex2/task_c.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
Loading…
Add table
Add a link
Reference in a new issue