ex1
This commit is contained in:
commit
1c6d7546ce
41 changed files with 7862 additions and 0 deletions
64
ex1/ABCEFG/.vscode/settings.json
vendored
Normal file
64
ex1/ABCEFG/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"algorithm": "cpp",
|
||||
"format": "cpp",
|
||||
"iostream": "cpp",
|
||||
"ostream": "cpp",
|
||||
"array": "cpp",
|
||||
"atomic": "cpp",
|
||||
"bit": "cpp",
|
||||
"cctype": "cpp",
|
||||
"charconv": "cpp",
|
||||
"chrono": "cpp",
|
||||
"clocale": "cpp",
|
||||
"cmath": "cpp",
|
||||
"compare": "cpp",
|
||||
"concepts": "cpp",
|
||||
"cstdarg": "cpp",
|
||||
"cstddef": "cpp",
|
||||
"cstdint": "cpp",
|
||||
"cstdio": "cpp",
|
||||
"cstdlib": "cpp",
|
||||
"cstring": "cpp",
|
||||
"ctime": "cpp",
|
||||
"cwchar": "cpp",
|
||||
"cwctype": "cpp",
|
||||
"deque": "cpp",
|
||||
"list": "cpp",
|
||||
"string": "cpp",
|
||||
"unordered_map": "cpp",
|
||||
"vector": "cpp",
|
||||
"exception": "cpp",
|
||||
"functional": "cpp",
|
||||
"iterator": "cpp",
|
||||
"memory": "cpp",
|
||||
"memory_resource": "cpp",
|
||||
"numeric": "cpp",
|
||||
"optional": "cpp",
|
||||
"random": "cpp",
|
||||
"ratio": "cpp",
|
||||
"string_view": "cpp",
|
||||
"system_error": "cpp",
|
||||
"tuple": "cpp",
|
||||
"type_traits": "cpp",
|
||||
"utility": "cpp",
|
||||
"fstream": "cpp",
|
||||
"initializer_list": "cpp",
|
||||
"iomanip": "cpp",
|
||||
"iosfwd": "cpp",
|
||||
"istream": "cpp",
|
||||
"limits": "cpp",
|
||||
"new": "cpp",
|
||||
"numbers": "cpp",
|
||||
"span": "cpp",
|
||||
"sstream": "cpp",
|
||||
"stdexcept": "cpp",
|
||||
"streambuf": "cpp",
|
||||
"cinttypes": "cpp",
|
||||
"typeinfo": "cpp",
|
||||
"variant": "cpp",
|
||||
"thread": "cpp",
|
||||
"semaphore": "cpp",
|
||||
"stop_token": "cpp"
|
||||
}
|
||||
}
|
||||
123
ex1/ABCEFG/CLANG_default.mk
Normal file
123
ex1/ABCEFG/CLANG_default.mk
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=CLANG_
|
||||
|
||||
#CLANGPATH=//usr/lib/llvm-10/bin/
|
||||
CC = ${CLANGPATH}clang
|
||||
CXX = ${CLANGPATH}clang++
|
||||
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
|
||||
#F77 = gfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
|
||||
WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
|
||||
WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
||||
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
||||
|
||||
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
|
||||
# don't use -Ofast
|
||||
# -ftrapv
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# different libraries in Ubuntu or manajaró
|
||||
ifndef UBUNTU
|
||||
UBUNTU=1
|
||||
endif
|
||||
|
||||
# BLAS, LAPACK
|
||||
LINKFLAGS += -llapack -lblas
|
||||
# -lopenblas
|
||||
ifeq ($(UBUNTU),1)
|
||||
# ubuntu
|
||||
else
|
||||
# on archlinux
|
||||
LINKFLAGS += -lcblas
|
||||
endif
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -flto
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# very good check
|
||||
# http://clang.llvm.org/extra/clang-tidy/
|
||||
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
|
||||
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
|
||||
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
|
||||
#READABILITY=,readability*${SWITCH_OFF}
|
||||
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
#TIDYFLAGS += -checks='modernize*
|
||||
# ???
|
||||
#TIDYFLAGS = -checks='cert*' -header-filter=.*
|
||||
# MPI checks ??
|
||||
#TIDYFLAGS = -checks='mpi*'
|
||||
# ??
|
||||
#TIDYFLAGS = -checks='performance*' -header-filter=.*
|
||||
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
|
||||
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
codecheck: tidy_check
|
||||
tidy_check:
|
||||
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
|
||||
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
# time ./${PROGRAM} ${PARAMS}
|
||||
./${PROGRAM} ${PARAMS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^ ${PARAMS}
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
codecheck: tidy_check
|
||||
130
ex1/ABCEFG/GCC_AMD32_default.mk
Normal file
130
ex1/ABCEFG/GCC_AMD32_default.mk
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
CC = gcc
|
||||
CXX = g++
|
||||
F77 = gfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
# on mephisto:
|
||||
#CXXFLAGS += -I/share/apps/atlas/include
|
||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
||||
#LINKFLAGS += -lcblas -latlas
|
||||
|
||||
#LINKFLAGS += -lblas
|
||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
||||
|
||||
|
||||
#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow
|
||||
WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -Winline -fmax-errors=1
|
||||
# -Wunreachable-code
|
||||
# -Wunreachable-code
|
||||
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
||||
#-msse3
|
||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=5
|
||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
||||
|
||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# BLAS, LAPACK
|
||||
OPENBLAS_DIR = /opt/openblas_GCCseq
|
||||
#OPENBLAS_DIR = /opt/openblas_GCC
|
||||
OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
|
||||
OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
|
||||
CXXFLAGS += -I${OPENBLAS_INCDIR}
|
||||
LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -flto
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
||||
-@rm -r html
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
# time ./${PROGRAM}
|
||||
# ./${PROGRAM}
|
||||
( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
|
||||
# or 'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}' in your ~/.bashrc
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar:
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
./$^
|
||||
gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ 11
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
183
ex1/ABCEFG/GCC_default.mk
Normal file
183
ex1/ABCEFG/GCC_default.mk
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
CC = gcc
|
||||
CXX = g++
|
||||
F77 = gfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls
|
||||
# -Wunreachable-code
|
||||
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
||||
#-msse3
|
||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=5
|
||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
||||
|
||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
LINKFLAGS += -O3
|
||||
|
||||
#architecture
|
||||
#CPU = -march=znver2
|
||||
CXXFLAGS += ${CPU}
|
||||
LINKFLAGS += ${CPU}
|
||||
|
||||
# different libraries in Ubuntu or manajaró
|
||||
ifndef UBUNTU
|
||||
UBUNTU=1
|
||||
endif
|
||||
|
||||
# BLAS, LAPACK
|
||||
ifeq ($(UBUNTU),1)
|
||||
LINKFLAGS += -llapack -lblas
|
||||
# -lopenblas
|
||||
else
|
||||
# on archlinux
|
||||
LINKFLAGS += -llapack -lopenblas -lcblas
|
||||
endif
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -flto
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# for debugging purpose (save code)
|
||||
# -fsanitize=leak # only one out the three can be used
|
||||
# -fsanitize=address
|
||||
# -fsanitize=thread
|
||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||
#CXXFLAGS += ${SANITARY}
|
||||
#LINKFLAGS += ${SANITARY}
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
|
||||
-@rm -rf html
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
#run: ${PROGRAM}
|
||||
# time ./${PROGRAM} ${PARAMS}
|
||||
./${PROGRAM} ${PARAMS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
#find . -size +10M > large_files
|
||||
#--exclude-from ${MY_DIR}/large_files
|
||||
|
||||
zip: clean
|
||||
@echo "Zip the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
.SUFFIXES: .f90
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
|
||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
.f90.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
||||
# Graphical interface
|
||||
# valkyrie
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^ ${PARAMS}
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
|
||||
# * install
|
||||
# * sudo vi /etc/sysctl.conf
|
||||
# add kernel.perf_event_paranoid = 0
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ ${PARAMS}
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
codecheck: $(SOURCES)
|
||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
lscpu
|
||||
inxi -C
|
||||
lstopo
|
||||
|
||||
# Excellent hardware info
|
||||
# hardinfo
|
||||
# Life monitoring of CPU frequency etc.
|
||||
# sudo i7z
|
||||
|
||||
# Memory consumption
|
||||
# vmstat -at -SM 3
|
||||
# xfce4-taskmanager
|
||||
|
||||
|
||||
# https://www.tecmint.com/check-linux-cpu-information/
|
||||
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
|
||||
|
||||
# Debugging:
|
||||
# https://wiki.archlinux.org/index.php/Debugging
|
||||
137
ex1/ABCEFG/ICC_default.mk
Normal file
137
ex1/ABCEFG/ICC_default.mk
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
# Basic Defintions for using INTEL compiler suite sequentially
|
||||
# requires setting of COMPILER=ICC_
|
||||
|
||||
#BINDIR = /opt/intel/bin/
|
||||
|
||||
# special on my sony [GH]
|
||||
#BINDIR = /opt/save.intel/bin/
|
||||
# very special on my sony [GH]
|
||||
# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
|
||||
|
||||
# Error with g++-4.8 using icpc14.0,x:
|
||||
# find directory wherein bits/c++config.h is located
|
||||
# 'locate bits/c++config.h'
|
||||
#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
|
||||
|
||||
|
||||
CC = ${BINDIR}icc
|
||||
CXX = ${BINDIR}icpc
|
||||
F77 = ${BINDIR}ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
|
||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
|
||||
# -Winline -Wredundant-decls -Wunreachable-code
|
||||
CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
# -vec-report=3
|
||||
# -qopt-report=5 -qopt-report-phase=vec
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
CXXFLAGS += -align
|
||||
|
||||
# use MKL by INTEL
|
||||
# https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
|
||||
# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
|
||||
# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
|
||||
LINKFLAGS += -O3 -mkl=sequential
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -ipo
|
||||
LINKFLAGS += -ipo
|
||||
|
||||
# annotated assembler file
|
||||
ANNOTED = -fsource-asm -S
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# alternatively to the solution above:
|
||||
#edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 .
|
||||
amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
inspxe-gui &
|
||||
|
||||
advisor:
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
advixe-gui &
|
||||
|
||||
icc-info:
|
||||
icpc -# main.cpp
|
||||
|
||||
|
||||
|
||||
|
||||
176
ex1/ABCEFG/ONEAPI_default.mk
Normal file
176
ex1/ABCEFG/ONEAPI_default.mk
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
# Basic Defintions for using INTEL compiler suite sequentially
|
||||
# requires setting of COMPILER=ONEAPI_
|
||||
|
||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
||||
# requires
|
||||
# source /opt/intel/oneapi/setvars.sh
|
||||
# on AMD: export MKL_DEBUG_CPU_TYPE=5
|
||||
|
||||
#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
|
||||
#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
|
||||
#export KMP_AFFINITY=verbose,compact
|
||||
|
||||
CC = ${BINDIR}icc
|
||||
CXX = ${BINDIR}dpcpp
|
||||
F77 = ${BINDIR}ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
## Compiler flags
|
||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
|
||||
WARNINGS += -Wpessimizing-move -Wredundant-move
|
||||
#-wd2015,2012,2014 -wn3
|
||||
# -Winline -Wredundant-decls -Wunreachable-code
|
||||
# -qopt-subscript-in-range
|
||||
# -vec-threshold0
|
||||
|
||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include"
|
||||
#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include"
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -ipo
|
||||
LINKFLAGS += -ipo
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# annotated Assembler file
|
||||
ANNOTED = -fsource-asm -S
|
||||
|
||||
#architecture
|
||||
CPU = -march=core-avx2
|
||||
#CPU += -mtp=zen
|
||||
# -xCORE-AVX2
|
||||
# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
|
||||
CXXFLAGS += ${CPU}
|
||||
LINKFLAGS += ${CPU}
|
||||
|
||||
# use MKL by INTEL
|
||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
||||
# sequential MKL
|
||||
# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
|
||||
CXXFLAGS += -qmkl=sequential -UMKL_ILP64
|
||||
LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
|
||||
# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static
|
||||
# install intel-oneapi-compiler-static
|
||||
# or
|
||||
LINKFLAGS += -shared-intel
|
||||
|
||||
|
||||
OPENMP = -qopenmp
|
||||
CXXFLAGS += ${OPENMP}
|
||||
LINKFLAGS += ${OPENMP}
|
||||
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
# -vec-report=3
|
||||
# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
|
||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par
|
||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg
|
||||
# Redirect report from *.optrpt to stderr
|
||||
# -qopt-report-file=stderr
|
||||
# Guided paralellization
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
## run time checks
|
||||
# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
|
||||
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS} *.optrpt
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
|
||||
|
||||
mem: inspector
|
||||
prof: vtune
|
||||
cache: inspector
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
||||
amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# inspxe-gui &
|
||||
vtune-gui ./${PROGRAM} &
|
||||
|
||||
advisor:
|
||||
source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
|
||||
# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
|
||||
advisor --collect=survey ./${PROGRAM}
|
||||
# advisor --collect=roofline ./${PROGRAM}
|
||||
advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
|
||||
|
||||
vtune:
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
|
||||
export ADVIXE_EXPERIMENTAL=roofline
|
||||
vtune -collect hotspots ./${PROGRAM}
|
||||
vtune -report hotspots -r r000hs > vtune.out
|
||||
# vtune-gui ./${PROGRAM} &
|
||||
|
||||
icc-info:
|
||||
icpc -# main.cpp
|
||||
|
||||
# MKL on AMD
|
||||
# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
|
||||
#
|
||||
# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
|
||||
# export MKL_DEBUG_CPU_TYPE=5
|
||||
# export MKL_NUM_THRAEDS=1
|
||||
# export MKL_DYNAMIC=false
|
||||
# on Intel compiler
|
||||
# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
|
||||
93
ex1/ABCEFG/PGI_default.mk
Normal file
93
ex1/ABCEFG/PGI_default.mk
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
# Basic Defintions for using PGI-compiler suite sequentially
|
||||
# requires setting of COMPILER=PGI_
|
||||
# OPTIRUN = optirun
|
||||
|
||||
|
||||
CC = pgcc
|
||||
CXX = pgc++
|
||||
F77 = pgfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
# on mephisto:
|
||||
#CXXFLAGS += -I/share/apps/atlas/include
|
||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
||||
#LINKFLAGS += -lcblas -latlas
|
||||
|
||||
#LINKFLAGS += -lblas
|
||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
||||
|
||||
WARNINGS = -Minform=warn
|
||||
# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow -Wredundant-decls
|
||||
# -pedantic -Wunreachable-code -Wextra -Winline
|
||||
# -Wunreachable-code
|
||||
|
||||
#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
|
||||
PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
|
||||
# -Minfo
|
||||
# -Mprof=time
|
||||
# -Mprof=lines
|
||||
# take care with option -Msafeptr
|
||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -O3 -std=c++11 -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
|
||||
# -fastsse -fargument-noalias ${WARNINGS} -msse3 -vec-report=3
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
|
||||
|
||||
# Profiling options PGI, see: pgcollect -help
|
||||
# CPU_PROF = -allcache
|
||||
CPU_PROF = -time
|
||||
# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
|
||||
#GPU_PROF = -cuda=branch:cc20
|
||||
#
|
||||
PROF_FILE = pgprof.out
|
||||
|
||||
cache: prof
|
||||
|
||||
prof: ${PROGRAM}
|
||||
${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
|
||||
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
|
||||
|
||||
info:
|
||||
pgaccelinfo -v
|
||||
BIN
ex1/ABCEFG/skalar_stl/.swo
Normal file
BIN
ex1/ABCEFG/skalar_stl/.swo
Normal file
Binary file not shown.
BIN
ex1/ABCEFG/skalar_stl/.swp
Normal file
BIN
ex1/ABCEFG/skalar_stl/.swp
Normal file
Binary file not shown.
63
ex1/ABCEFG/skalar_stl/.vscode/settings.json
vendored
Normal file
63
ex1/ABCEFG/skalar_stl/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"iostream": "cpp",
|
||||
"array": "cpp",
|
||||
"atomic": "cpp",
|
||||
"bit": "cpp",
|
||||
"*.tcc": "cpp",
|
||||
"cctype": "cpp",
|
||||
"charconv": "cpp",
|
||||
"chrono": "cpp",
|
||||
"clocale": "cpp",
|
||||
"cmath": "cpp",
|
||||
"compare": "cpp",
|
||||
"complex": "cpp",
|
||||
"concepts": "cpp",
|
||||
"cstdarg": "cpp",
|
||||
"cstddef": "cpp",
|
||||
"cstdint": "cpp",
|
||||
"cstdio": "cpp",
|
||||
"cstdlib": "cpp",
|
||||
"cstring": "cpp",
|
||||
"ctime": "cpp",
|
||||
"cwchar": "cpp",
|
||||
"cwctype": "cpp",
|
||||
"deque": "cpp",
|
||||
"string": "cpp",
|
||||
"unordered_map": "cpp",
|
||||
"vector": "cpp",
|
||||
"exception": "cpp",
|
||||
"algorithm": "cpp",
|
||||
"functional": "cpp",
|
||||
"iterator": "cpp",
|
||||
"memory": "cpp",
|
||||
"memory_resource": "cpp",
|
||||
"numeric": "cpp",
|
||||
"optional": "cpp",
|
||||
"random": "cpp",
|
||||
"ratio": "cpp",
|
||||
"string_view": "cpp",
|
||||
"system_error": "cpp",
|
||||
"tuple": "cpp",
|
||||
"type_traits": "cpp",
|
||||
"utility": "cpp",
|
||||
"format": "cpp",
|
||||
"initializer_list": "cpp",
|
||||
"iomanip": "cpp",
|
||||
"iosfwd": "cpp",
|
||||
"istream": "cpp",
|
||||
"limits": "cpp",
|
||||
"new": "cpp",
|
||||
"numbers": "cpp",
|
||||
"ostream": "cpp",
|
||||
"span": "cpp",
|
||||
"sstream": "cpp",
|
||||
"stdexcept": "cpp",
|
||||
"streambuf": "cpp",
|
||||
"cinttypes": "cpp",
|
||||
"typeinfo": "cpp",
|
||||
"variant": "cpp",
|
||||
"cassert": "cpp",
|
||||
"fstream": "cpp"
|
||||
}
|
||||
}
|
||||
28
ex1/ABCEFG/skalar_stl/.vscode/tasks.json
vendored
Normal file
28
ex1/ABCEFG/skalar_stl/.vscode/tasks.json
vendored
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
{
|
||||
"tasks": [
|
||||
{
|
||||
"type": "cppbuild",
|
||||
"label": "C/C++: g++ build active file",
|
||||
"command": "/usr/bin/g++",
|
||||
"args": [
|
||||
"-fdiagnostics-color=always",
|
||||
"-g",
|
||||
"${file}",
|
||||
"-o",
|
||||
"${fileDirname}/${fileBasenameNoExtension}"
|
||||
],
|
||||
"options": {
|
||||
"cwd": "${fileDirname}"
|
||||
},
|
||||
"problemMatcher": [
|
||||
"$gcc"
|
||||
],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"detail": "Task generated by Debugger."
|
||||
}
|
||||
],
|
||||
"version": "2.0.0"
|
||||
}
|
||||
7
ex1/ABCEFG/skalar_stl/DenseMatrix.cpp
Normal file
7
ex1/ABCEFG/skalar_stl/DenseMatrix.cpp
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <cassert>
|
||||
using namespace std;
|
||||
|
||||
4
ex1/ABCEFG/skalar_stl/DenseMatrix.h
Normal file
4
ex1/ABCEFG/skalar_stl/DenseMatrix.h
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
BIN
ex1/ABCEFG/skalar_stl/DenseMatrix.o
Normal file
BIN
ex1/ABCEFG/skalar_stl/DenseMatrix.o
Normal file
Binary file not shown.
30
ex1/ABCEFG/skalar_stl/Makefile
Normal file
30
ex1/ABCEFG/skalar_stl/Makefile
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
# use Intel compilers
|
||||
#COMPILER=ICC_
|
||||
|
||||
# use PGI compilers
|
||||
# COMPILER=PGI_
|
||||
|
||||
|
||||
SOURCES = main.cpp mylib.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = main.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
LINKFLAGS += -g
|
||||
# do not use -pg with PGI compilers
|
||||
|
||||
ifndef COMPILER
|
||||
COMPILER=GCC_
|
||||
endif
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
500
ex1/ABCEFG/skalar_stl/data_1.txt
Normal file
500
ex1/ABCEFG/skalar_stl/data_1.txt
Normal file
|
|
@ -0,0 +1,500 @@
|
|||
141
|
||||
261
|
||||
87
|
||||
430
|
||||
258
|
||||
298
|
||||
425
|
||||
120
|
||||
496
|
||||
707
|
||||
244
|
||||
786
|
||||
75
|
||||
394
|
||||
4
|
||||
221
|
||||
2
|
||||
190
|
||||
143
|
||||
269
|
||||
175
|
||||
139
|
||||
599
|
||||
902
|
||||
940
|
||||
222
|
||||
483
|
||||
377
|
||||
524
|
||||
265
|
||||
69
|
||||
437
|
||||
174
|
||||
27
|
||||
955
|
||||
431
|
||||
962
|
||||
763
|
||||
8
|
||||
681
|
||||
706
|
||||
646
|
||||
553
|
||||
219
|
||||
773
|
||||
229
|
||||
371
|
||||
891
|
||||
857
|
||||
403
|
||||
319
|
||||
609
|
||||
911
|
||||
910
|
||||
592
|
||||
333
|
||||
854
|
||||
443
|
||||
905
|
||||
34
|
||||
533
|
||||
717
|
||||
180
|
||||
337
|
||||
188
|
||||
322
|
||||
404
|
||||
549
|
||||
49
|
||||
553
|
||||
275
|
||||
242
|
||||
244
|
||||
155
|
||||
957
|
||||
936
|
||||
819
|
||||
729
|
||||
176
|
||||
361
|
||||
189
|
||||
2
|
||||
317
|
||||
700
|
||||
626
|
||||
544
|
||||
440
|
||||
288
|
||||
502
|
||||
762
|
||||
763
|
||||
577
|
||||
748
|
||||
646
|
||||
124
|
||||
505
|
||||
348
|
||||
93
|
||||
148
|
||||
199
|
||||
673
|
||||
432
|
||||
695
|
||||
257
|
||||
10
|
||||
533
|
||||
280
|
||||
947
|
||||
907
|
||||
393
|
||||
25
|
||||
672
|
||||
838
|
||||
972
|
||||
57
|
||||
451
|
||||
583
|
||||
687
|
||||
720
|
||||
651
|
||||
727
|
||||
374
|
||||
582
|
||||
117
|
||||
58
|
||||
980
|
||||
285
|
||||
595
|
||||
963
|
||||
186
|
||||
194
|
||||
342
|
||||
933
|
||||
391
|
||||
274
|
||||
152
|
||||
398
|
||||
375
|
||||
132
|
||||
436
|
||||
92
|
||||
615
|
||||
11
|
||||
574
|
||||
790
|
||||
236
|
||||
449
|
||||
570
|
||||
62
|
||||
497
|
||||
643
|
||||
222
|
||||
838
|
||||
972
|
||||
847
|
||||
506
|
||||
279
|
||||
747
|
||||
237
|
||||
958
|
||||
621
|
||||
601
|
||||
173
|
||||
91
|
||||
256
|
||||
859
|
||||
912
|
||||
700
|
||||
726
|
||||
230
|
||||
577
|
||||
811
|
||||
404
|
||||
989
|
||||
90
|
||||
321
|
||||
512
|
||||
61
|
||||
726
|
||||
557
|
||||
530
|
||||
830
|
||||
859
|
||||
790
|
||||
318
|
||||
453
|
||||
753
|
||||
110
|
||||
110
|
||||
270
|
||||
525
|
||||
973
|
||||
711
|
||||
312
|
||||
292
|
||||
851
|
||||
912
|
||||
640
|
||||
256
|
||||
89
|
||||
839
|
||||
585
|
||||
949
|
||||
62
|
||||
585
|
||||
286
|
||||
828
|
||||
191
|
||||
443
|
||||
394
|
||||
827
|
||||
677
|
||||
208
|
||||
319
|
||||
134
|
||||
672
|
||||
571
|
||||
170
|
||||
148
|
||||
477
|
||||
909
|
||||
553
|
||||
33
|
||||
54
|
||||
806
|
||||
452
|
||||
383
|
||||
790
|
||||
365
|
||||
533
|
||||
712
|
||||
872
|
||||
329
|
||||
651
|
||||
975
|
||||
76
|
||||
588
|
||||
414
|
||||
310
|
||||
264
|
||||
759
|
||||
996
|
||||
187
|
||||
782
|
||||
196
|
||||
993
|
||||
803
|
||||
425
|
||||
729
|
||||
499
|
||||
809
|
||||
357
|
||||
74
|
||||
591
|
||||
911
|
||||
194
|
||||
433
|
||||
750
|
||||
40
|
||||
947
|
||||
764
|
||||
559
|
||||
184
|
||||
498
|
||||
518
|
||||
995
|
||||
855
|
||||
963
|
||||
679
|
||||
404
|
||||
935
|
||||
480
|
||||
232
|
||||
397
|
||||
706
|
||||
559
|
||||
757
|
||||
996
|
||||
963
|
||||
536
|
||||
964
|
||||
116
|
||||
52
|
||||
305
|
||||
581
|
||||
531
|
||||
902
|
||||
541
|
||||
432
|
||||
543
|
||||
713
|
||||
17
|
||||
801
|
||||
143
|
||||
479
|
||||
257
|
||||
370
|
||||
662
|
||||
170
|
||||
279
|
||||
199
|
||||
196
|
||||
327
|
||||
881
|
||||
472
|
||||
404
|
||||
180
|
||||
969
|
||||
408
|
||||
845
|
||||
616
|
||||
377
|
||||
878
|
||||
785
|
||||
465
|
||||
814
|
||||
899
|
||||
430
|
||||
335
|
||||
597
|
||||
902
|
||||
703
|
||||
378
|
||||
735
|
||||
955
|
||||
543
|
||||
541
|
||||
312
|
||||
72
|
||||
182
|
||||
93
|
||||
464
|
||||
10
|
||||
916
|
||||
643
|
||||
2
|
||||
31
|
||||
209
|
||||
455
|
||||
128
|
||||
9
|
||||
728
|
||||
355
|
||||
781
|
||||
437
|
||||
437
|
||||
50
|
||||
50
|
||||
92
|
||||
595
|
||||
242
|
||||
842
|
||||
858
|
||||
964
|
||||
489
|
||||
221
|
||||
227
|
||||
537
|
||||
763
|
||||
348
|
||||
462
|
||||
640
|
||||
918
|
||||
162
|
||||
716
|
||||
578
|
||||
434
|
||||
885
|
||||
394
|
||||
179
|
||||
634
|
||||
625
|
||||
328
|
||||
803
|
||||
1000
|
||||
981
|
||||
128
|
||||
233
|
||||
24
|
||||
608
|
||||
111
|
||||
408
|
||||
885
|
||||
549
|
||||
370
|
||||
209
|
||||
441
|
||||
957
|
||||
125
|
||||
471
|
||||
857
|
||||
44
|
||||
692
|
||||
979
|
||||
284
|
||||
134
|
||||
686
|
||||
910
|
||||
611
|
||||
900
|
||||
194
|
||||
755
|
||||
347
|
||||
419
|
||||
156
|
||||
820
|
||||
625
|
||||
739
|
||||
806
|
||||
68
|
||||
951
|
||||
498
|
||||
756
|
||||
743
|
||||
832
|
||||
157
|
||||
458
|
||||
619
|
||||
933
|
||||
836
|
||||
896
|
||||
583
|
||||
583
|
||||
855
|
||||
35
|
||||
886
|
||||
408
|
||||
37
|
||||
747
|
||||
155
|
||||
144
|
||||
606
|
||||
255
|
||||
325
|
||||
402
|
||||
407
|
||||
387
|
||||
610
|
||||
167
|
||||
189
|
||||
95
|
||||
324
|
||||
770
|
||||
235
|
||||
741
|
||||
693
|
||||
825
|
||||
828
|
||||
294
|
||||
310
|
||||
524
|
||||
326
|
||||
832
|
||||
811
|
||||
557
|
||||
263
|
||||
681
|
||||
234
|
||||
457
|
||||
385
|
||||
539
|
||||
992
|
||||
756
|
||||
981
|
||||
235
|
||||
529
|
||||
52
|
||||
757
|
||||
602
|
||||
858
|
||||
989
|
||||
930
|
||||
410
|
||||
1
|
||||
541
|
||||
208
|
||||
220
|
||||
326
|
||||
96
|
||||
748
|
||||
749
|
||||
544
|
||||
339
|
||||
833
|
||||
553
|
||||
958
|
||||
893
|
||||
357
|
||||
547
|
||||
347
|
||||
623
|
||||
797
|
||||
746
|
||||
126
|
||||
823
|
||||
26
|
||||
415
|
||||
732
|
||||
782
|
||||
368
|
||||
BIN
ex1/ABCEFG/skalar_stl/main.GCC_
Executable file
BIN
ex1/ABCEFG/skalar_stl/main.GCC_
Executable file
Binary file not shown.
290
ex1/ABCEFG/skalar_stl/main.cpp
Normal file
290
ex1/ABCEFG/skalar_stl/main.cpp
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
#include "mylib.h"
|
||||
#include "timing.h"
|
||||
#include "DenseMatrix.h"
|
||||
|
||||
#include <cassert> // assert
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <list>
|
||||
#include <stdexcept>
|
||||
using namespace std;
|
||||
|
||||
void task_a() {
|
||||
printf("\n\n-------------- Task A --------------\n\n");
|
||||
|
||||
auto [a,b,c] = means0(1,4,16);
|
||||
auto [d,e,f] = means0(2,3,5);
|
||||
auto [g,h,i] = means0(1000,4000,16000);
|
||||
printf("means(1,4,16) = (%f, %f, %f)\n", a, b, c);
|
||||
printf("means(2,3,5) = (%f, %f, %f)\n", d, e, f);
|
||||
printf("means(1000,4000,16000) = (%f, %f, %f)\n", g, h, i);
|
||||
|
||||
vector<double> v = {4,8,15,16,23,42};
|
||||
auto [j,k,l] = means(v);
|
||||
printf("means(4,8,15,16,23,42) = (%f, %f, %f)\n", j, k, l);
|
||||
|
||||
|
||||
}
|
||||
|
||||
void task_b() {
|
||||
printf("\n\n-------------- Task B --------------\n\n");
|
||||
|
||||
// Read vector
|
||||
vector<double> a;
|
||||
read_vector_from_file("data_1.txt", a);
|
||||
|
||||
// Print numbers
|
||||
// for (unsigned int k=0; k<a.size(); ++k)
|
||||
// {
|
||||
// cout << " " << a.at(k);
|
||||
// }
|
||||
// cout << endl;
|
||||
|
||||
// min and max
|
||||
auto min = min_element(a.begin(), a.end());
|
||||
auto max = max_element(a.begin(), a.end());
|
||||
printf("Minimum: %f\n", *min);
|
||||
printf("Maximum: %f\n", *max);
|
||||
|
||||
// means
|
||||
auto [x,y,z] = means(a);
|
||||
printf("Arithmetic: %f\n", x);
|
||||
printf("Geometric: %f\n", y);
|
||||
printf("Harmonic: %f\n", z);
|
||||
|
||||
// deviation
|
||||
double deviation;
|
||||
for (long unsigned int i=0; i<a.size(); i++){
|
||||
deviation += pow(x - a.at(i),2);
|
||||
}
|
||||
deviation = sqrt(deviation/a.size());
|
||||
printf("Deviation: %f\n", deviation);
|
||||
|
||||
// write results to file
|
||||
vector<double> b = {*min,*max,x,y,z,deviation};
|
||||
write_vector_to_file("out_1.txt", b);
|
||||
|
||||
}
|
||||
|
||||
void task_c() {
|
||||
printf("\n\n-------------- Task C --------------\n\n");
|
||||
|
||||
vector<int> n_values = {15, 1001, 1432987};
|
||||
|
||||
for (int n : n_values) {
|
||||
printf("n = %d\n", n);
|
||||
double sum = 0;
|
||||
double loops = 1000;
|
||||
|
||||
// Timing first function
|
||||
tic();
|
||||
for (int i=0; i<loops; i++){
|
||||
sum = sum_of_spec(n);
|
||||
}
|
||||
double sec1 = toc();
|
||||
printf("For-loop funtion: result = %.f | time = %f milliseconds\n", sum, sec1*1000);
|
||||
|
||||
// Timing second function
|
||||
tic();
|
||||
for (int i=0; i<loops; i++){
|
||||
sum = formula(n);
|
||||
}
|
||||
double sec2 = toc();
|
||||
printf("Formula funtion: result = %.f | time = %f milliseconds\n", sum, sec2*1000);
|
||||
}
|
||||
}
|
||||
|
||||
void task_d() {
|
||||
printf("\n\n-------------- Task D --------------\n\n");
|
||||
printf("See folder D.\n");
|
||||
}
|
||||
|
||||
void task_e() {
|
||||
printf("\n\n-------------- Task E --------------\n\n");
|
||||
|
||||
for (int n : {100, 1000, 10000}) {
|
||||
vector<int> vec(n);
|
||||
list<int> lst(n);
|
||||
|
||||
// Initialize
|
||||
for (int i = 1; i < n+1; ++i) {
|
||||
vec.push_back(i);
|
||||
lst.push_back(i);
|
||||
}
|
||||
|
||||
// Insert into vector
|
||||
tic();
|
||||
insert_into_vector(vec, n);
|
||||
double sec1 = toc();
|
||||
printf("Vector insertion time for n = %d: %.f microseconds.\n", n, sec1*1000*1000);
|
||||
|
||||
// Insert into list
|
||||
tic();
|
||||
insert_into_list(lst, n);
|
||||
double sec2 = toc();
|
||||
printf("List insertion time for n = %d: %.f microseconds.\n", n, sec2*1000*1000);
|
||||
}
|
||||
}
|
||||
|
||||
void task_f() {
|
||||
printf("\n\n-------------- Task F --------------\n\n");
|
||||
|
||||
// single_goldbach(k)
|
||||
int k = 694;
|
||||
printf("single_goldbach(k = %d) = %d\n", k, single_goldbach(k));
|
||||
|
||||
// Prints decompositions
|
||||
print_decomps(k);
|
||||
|
||||
// count_goldbach(n)
|
||||
printf("\nNOTE: For n=2'000'000 it will take ~30 seconds.\n");
|
||||
for (int n : {10'000, 100'000, 400'000, 1'000'000}) { //, 2'000'000}) {
|
||||
tic();
|
||||
vector<int> counts = count_goldbach(n);
|
||||
double sec = toc();
|
||||
|
||||
auto max = max_element(counts.begin(), counts.end());
|
||||
printf("count_goldbach(n = %d): k = %ld, decompositions = %d, time elapsed: %f milliseconds\n", n, max-counts.begin(), *max, sec*1000);
|
||||
}
|
||||
|
||||
// Results
|
||||
// count_goldbach(n = 10'000): k = 9240, decompositions = 329, time elapsed: 1.235096 milliseconds
|
||||
// count_goldbach(n = 100'000): k = 99330, decompositions = 2168, time elapsed: 39.003922 milliseconds
|
||||
// count_goldbach(n = 400'000): k = 390390, decompositions = 7094, time elapsed: 497.282572 milliseconds
|
||||
// count_goldbach(n = 1'000'000): k = 990990, decompositions = 15594, time elapsed: 3236.044944 milliseconds
|
||||
// count_goldbach(n = 2'000'000): k = 1981980, decompositions = 27988, time elapsed: 29864.384370 milliseconds
|
||||
// count_goldbach(n = 10'000'000): k = 9699690, decompositions = 124180, time elapsed: 825392.110981 milliseconds
|
||||
|
||||
}
|
||||
|
||||
void task_g() {
|
||||
printf("\n\n-------------- Task G --------------\n\n");
|
||||
|
||||
DenseMatrix const M(5,3);
|
||||
|
||||
vector<double> const u{{1,2,3}};
|
||||
vector<double> f1 = M.Mult(u);
|
||||
|
||||
vector<double> const v{{-1,2,-3,4,-5}};
|
||||
vector<double> f2 = M.MultT(v);
|
||||
|
||||
|
||||
cout << "M = " << endl;
|
||||
M.print();
|
||||
cout << endl << "u = ";
|
||||
for (size_t i=0; i<u.size(); i++){ cout << u[i] << " ";}
|
||||
cout << endl << "M * u = ";
|
||||
for (size_t i=0; i<f1.size(); i++){cout << f1[i] << " ";}
|
||||
cout << endl << "v = ";
|
||||
for (size_t i=0; i<v.size(); i++){cout << v[i] << " ";}
|
||||
cout << endl << "M^T * v = ";
|
||||
for (size_t i=0; i<f2.size(); i++){cout << f2[i] << " ";}
|
||||
cout << endl << endl;
|
||||
|
||||
// #######################################################
|
||||
|
||||
int const NLOOPS = 100;
|
||||
int const n = 1000;
|
||||
|
||||
// Time initialization
|
||||
tic();
|
||||
DenseMatrix const M2(n,n);
|
||||
vector<double> w(n,0);
|
||||
for (int i=0; i<n; i++){w[i]=i+1;}
|
||||
double t1 = toc();
|
||||
|
||||
// Time Mult
|
||||
tic();
|
||||
vector<double> f3 = M2.Mult(w);
|
||||
for (int k=1; k<NLOOPS; ++k){
|
||||
f3 = M2.Mult(w);
|
||||
}
|
||||
double t2 = toc();
|
||||
|
||||
// Time MultT
|
||||
tic();
|
||||
vector<double> f4 = M2.MultT(w);
|
||||
for (int k=1; k<NLOOPS; ++k){
|
||||
f4 = M2.Mult(w);
|
||||
}
|
||||
double t3 = toc();
|
||||
|
||||
// Print results
|
||||
printf("Results for %dx%d matrix vector multiplication doing %d loops\n", n, n, NLOOPS);
|
||||
printf("Time for initialization: %f seconds.\n", t1);
|
||||
printf("Time for Mult : %f seconds, %f per loop.\n", t2, t2/NLOOPS);
|
||||
printf("Time for MultT : %f seconds, %f per loop.\n", t3, t3/NLOOPS);
|
||||
|
||||
// Check if resulting vectors are equal
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
double err = f3[i] - f4[i];
|
||||
if(abs(err) > 1e-4)
|
||||
{
|
||||
cout << "Resulting vectors are not equal" << endl;
|
||||
}
|
||||
}
|
||||
// ################################################
|
||||
|
||||
// Time initialization
|
||||
tic();
|
||||
vector<double> x(n,0);
|
||||
for (int i=0; i<n; i++){x[i] = sigmoid( (10.0*i)/(n-1) - 5 );}
|
||||
DenseMatrix2 M3(x,x);
|
||||
double t4 = toc();
|
||||
|
||||
// Time Mult
|
||||
tic();
|
||||
vector<double> f5 = M3.Mult(w);
|
||||
for (int k=1; k<NLOOPS; ++k){
|
||||
f5 = M3.Mult(w);
|
||||
}
|
||||
double t5 = toc();
|
||||
|
||||
// Time MultT
|
||||
tic();
|
||||
vector<double> f6 = M3.MultT(w);
|
||||
for (int k=1; k<NLOOPS; ++k){
|
||||
f6 = M3.Mult(w);
|
||||
}
|
||||
double t6 = toc();
|
||||
|
||||
// Print results
|
||||
printf("\nResults for %dx%d matrix vector multiplication doing %d loops taking advantage of tensor product structure of the matrix\n", n, n, NLOOPS);
|
||||
printf("Time for initialization: %f seconds.\n", t4);
|
||||
printf("Time for Mult : %f seconds, %f per loop.\n", t5, t5/NLOOPS);
|
||||
printf("Time for MultT : %f seconds, %f per loop.\n", t6, t6/NLOOPS);
|
||||
|
||||
// Check if resulting vectors are equal
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
double err = f5[i] - f6[i];
|
||||
if(abs(err) > 1e-4)
|
||||
{
|
||||
cout << "Resulting vectors are not equal" << endl;
|
||||
}
|
||||
}
|
||||
|
||||
printf("\nNOTE: difference in runtime noticable with n=10.000 (~30 seconds)\n");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main(){
|
||||
|
||||
task_a();
|
||||
task_b();
|
||||
task_c();
|
||||
task_d();
|
||||
task_e();
|
||||
task_f();
|
||||
task_g();
|
||||
|
||||
return 0;
|
||||
};
|
||||
BIN
ex1/ABCEFG/skalar_stl/main.o
Normal file
BIN
ex1/ABCEFG/skalar_stl/main.o
Normal file
Binary file not shown.
73
ex1/ABCEFG/skalar_stl/mayer_primes.h
Normal file
73
ex1/ABCEFG/skalar_stl/mayer_primes.h
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstring> //memset
|
||||
#include <vector>
|
||||
//using namespace std;
|
||||
|
||||
/** \brief Determines all prime numbers in interval [2, @p max].
|
||||
*
|
||||
* The sieve of Eratosthenes is used.
|
||||
*
|
||||
* The implementation originates from <a href="http://code.activestate.com/recipes/576559-fast-prime-generator/">Florian Mayer</a>.
|
||||
*
|
||||
* \param[in] max end of interval for the prime number search.
|
||||
* \return vector of prime numbers @f$2,3,5, ..., p<=max @f$.
|
||||
*
|
||||
* \copyright
|
||||
* Copyright (c) 2008 Florian Mayer (adapted by Gundolf Haase 2018)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
template <class T>
|
||||
std::vector<T> get_primes(T max)
|
||||
{
|
||||
std::vector<T> primes;
|
||||
char *sieve;
|
||||
sieve = new char[max / 8 + 1];
|
||||
// Fill sieve with 1
|
||||
memset(sieve, 0xFF, (max / 8 + 1) * sizeof(char));
|
||||
for (T x = 2; x <= max; x++)
|
||||
{
|
||||
if (sieve[x / 8] & (0x01 << (x % 8))) {
|
||||
primes.push_back(x);
|
||||
// Is prime. Mark multiplicates.
|
||||
for (T j = 2 * x; j <= max; j += x)
|
||||
{
|
||||
sieve[j / 8] &= ~(0x01 << (j % 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[] sieve;
|
||||
return primes;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------
|
||||
//int main() // by Florian Mayer
|
||||
//{g++ -O3 -std=c++14 -fopenmp main.cpp && ./a.out
|
||||
// vector<unsigned long> primes;
|
||||
// primes = get_primes(10000000);
|
||||
// // return 0;
|
||||
// // Print out result.
|
||||
// vector<unsigned long>::iterator it;
|
||||
// for(it=primes.begin(); it < primes.end(); it++)
|
||||
// cout << *it << " ";
|
||||
//
|
||||
// cout << endl;
|
||||
// return 0;
|
||||
//}
|
||||
312
ex1/ABCEFG/skalar_stl/mylib.cpp
Normal file
312
ex1/ABCEFG/skalar_stl/mylib.cpp
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
#include "mylib.h"
|
||||
#include "mayer_primes.h"
|
||||
#include "DenseMatrix.h"
|
||||
|
||||
#include <cassert> // assert
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <random>
|
||||
#include <list>
|
||||
using namespace std;
|
||||
|
||||
// -------------- Task A --------------
|
||||
|
||||
tuple<double, double, double> means0(double a, double b, double c){
|
||||
double arith = (a+b+c) / 3;
|
||||
double geo = pow((a*b*c), 1.0f/3);
|
||||
double harm = 3 / ((1.0f/a) + (1.0f/b) + (1.0f/c));
|
||||
return make_tuple(arith, geo, harm);
|
||||
}
|
||||
|
||||
tuple<double, double, double> means(const vector<double>& v){
|
||||
int n = v.size();
|
||||
double sum = 0;
|
||||
double prod = 1;
|
||||
double invsum = 0;
|
||||
|
||||
for (int i = 0; i<n; ++i){
|
||||
sum += v[i];
|
||||
prod *= v[i];
|
||||
invsum += 1.0f/v[i];
|
||||
}
|
||||
|
||||
double arith = sum / n;
|
||||
double geo = pow(prod, 1.0f/n);
|
||||
double harm = n / invsum;
|
||||
return make_tuple(arith, geo, harm);
|
||||
}
|
||||
|
||||
// -------------- Task B --------------
|
||||
|
||||
void fill_vector(istream& istr, vector<double>& v)
|
||||
{
|
||||
double d=0;
|
||||
while ( istr >> d) v.push_back(d); // Einlesen
|
||||
if (!istr.eof())
|
||||
{ // Fehlerbehandlung
|
||||
cout << " Error handling \n";
|
||||
if ( istr.bad() ) throw runtime_error("Schwerer Fehler in istr");
|
||||
if ( istr.fail() ) // Versuch des Aufraeumens
|
||||
{
|
||||
cout << " Failed in reading all data.\n";
|
||||
istr.clear();
|
||||
}
|
||||
}
|
||||
v.shrink_to_fit(); // C++11
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void read_vector_from_file(const string& file_name, vector<double>& v)
|
||||
{
|
||||
ifstream fin(file_name); // Oeffne das File im ASCII-Modus
|
||||
if( fin.is_open() ) // File gefunden:
|
||||
{
|
||||
v.clear(); // Vektor leeren
|
||||
fill_vector(fin, v);
|
||||
}
|
||||
else // File nicht gefunden:
|
||||
{
|
||||
cout << "\nFile " << file_name << " has not been found.\n\n" ;
|
||||
assert( fin.is_open() && "File not found." ); // exeption handling for the poor programmer
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void write_vector_to_file(const string& file_name, const vector<double>& v)
|
||||
{
|
||||
ofstream fout(file_name); // Oeffne das File im ASCII-Modus
|
||||
if( fout.is_open() )
|
||||
{
|
||||
for (unsigned int k=0; k<v.size(); ++k)
|
||||
{
|
||||
fout << v.at(k) << endl;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "\nFile " << file_name << " has not been opened.\n\n" ;
|
||||
assert( fout.is_open() && "File not opened." ); // exeption handling for the poor programmer
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// -------------- Task C --------------
|
||||
|
||||
double sum_of_spec(int n)
|
||||
{
|
||||
long int sum = 0;
|
||||
for (int i=1; i<n+1; i++){
|
||||
if (i % 3 == 0 || i % 5 == 0){
|
||||
sum += i;
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
double formula(int n)
|
||||
{
|
||||
double div_by_3 = floor( n / 3.0f );
|
||||
double div_by_5 = floor( n / 5.0f );
|
||||
double div_by_15 = floor( n / 15.0f );
|
||||
|
||||
double S_3 = 3.0 * (div_by_3*((div_by_3+1)/2.0f));
|
||||
double S_5 = 5.0 * (div_by_5*((div_by_5+1)/2.0f));
|
||||
double S_15 = 15.0 * (div_by_15*((div_by_15+1)/2.0f));
|
||||
|
||||
double sum = S_3 + S_5 - S_15;
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// -------------- Task E --------------
|
||||
|
||||
void insert_into_vector(vector<int>& vec, int n) {
|
||||
random_device rd; // random device
|
||||
mt19937 gen(rd()); // seed
|
||||
uniform_int_distribution<> dist(1, n); // define range
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int rand_num = dist(gen);
|
||||
auto pos = lower_bound(vec.begin(), vec.end(), rand_num);
|
||||
vec.insert(pos, rand_num);
|
||||
}
|
||||
assert(is_sorted(vec.begin(), vec.end()));
|
||||
}
|
||||
|
||||
void insert_into_list(list<int>& lst, int n) {
|
||||
random_device rd;
|
||||
mt19937 gen(rd());
|
||||
uniform_int_distribution<> dist(1, n);
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int rand_num = dist(gen);
|
||||
auto pos = lower_bound(lst.begin(), lst.end(), rand_num);
|
||||
lst.insert(pos, rand_num);
|
||||
}
|
||||
assert(is_sorted(lst.begin(), lst.end()));
|
||||
}
|
||||
|
||||
// -------------- Task F --------------
|
||||
|
||||
int single_goldbach(int k) {
|
||||
const vector<int> primes = get_primes(k);
|
||||
int count = 0;
|
||||
|
||||
for (size_t i = 0; i < primes.size(); i++) {
|
||||
for (size_t j = i; j < primes.size(); j++) {
|
||||
if (primes[i] + primes[j] == k) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
vector<int> count_goldbach(int n) {
|
||||
const vector<int> primes = get_primes(n);
|
||||
vector<int> counts(n+1);
|
||||
|
||||
for (size_t i = 1; i < primes.size(); i++) {
|
||||
for (size_t j = i; j < primes.size(); j++) {
|
||||
int sum = primes[i] + primes[j];
|
||||
if (sum <= n) {
|
||||
counts[sum]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return counts;
|
||||
}
|
||||
|
||||
|
||||
void print_decomps(int k) {
|
||||
const vector<int> primes = get_primes(k);
|
||||
cout << "\nDecompositions for k = " << k << ": ";
|
||||
|
||||
for (size_t i = 0; i < primes.size(); i++) {
|
||||
for (size_t j = i; j < primes.size(); j++) {
|
||||
if (primes[i] + primes[j] == k) {
|
||||
cout << primes[i] << " + " << primes[j] << ", ";
|
||||
}
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
// -------------- Task G --------------
|
||||
|
||||
|
||||
double sigmoid(double x)
|
||||
{
|
||||
return 1.0 / (1.0 + std::exp(-x));
|
||||
}
|
||||
|
||||
double DenseMatrix::sigmoid(double x)
|
||||
{
|
||||
return 1.0 / (1.0 + exp(-x));
|
||||
}
|
||||
|
||||
DenseMatrix::DenseMatrix(int n, int m) : rows(n), cols(m), matrix(n*m)
|
||||
{
|
||||
int nm = max(n, m);
|
||||
for (size_t i = 0; i < rows; i++)
|
||||
{
|
||||
for (size_t j = 0; j < cols; j++)
|
||||
{
|
||||
double x_i = 10.0*i/(nm-1) - 5.0;
|
||||
double x_j = 10.0*j/(nm-1) - 5.0;
|
||||
matrix[i*cols + j] = sigmoid(x_i) * sigmoid(x_j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vector<double> DenseMatrix::Mult(const vector<double>& vec) const
|
||||
{
|
||||
assert(vec.size() == cols);
|
||||
vector<double> result(rows, 0.0);
|
||||
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < cols; ++j)
|
||||
{
|
||||
result[i] += matrix[i*cols + j] * vec[j];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<double> DenseMatrix::MultT(const vector<double>& vec) const
|
||||
{
|
||||
assert(vec.size() == rows);
|
||||
vector<double> result(cols, 0.0);
|
||||
|
||||
for (size_t i = 0; i < cols; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < rows; ++j)
|
||||
{
|
||||
result[i] += matrix[j*cols + i] * vec[j];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void DenseMatrix::print() const {
|
||||
int count(0);
|
||||
cout.precision(5);
|
||||
for (double val : matrix) {
|
||||
printf("%.6f ", val);
|
||||
|
||||
count++;
|
||||
if(count%cols == 0){cout << endl;}
|
||||
}
|
||||
}
|
||||
|
||||
// #################################################
|
||||
|
||||
DenseMatrix2::DenseMatrix2(vector<double> const u, vector<double> const v) : rows(u.size()), cols(v.size()), u_(u), v_(v) {}
|
||||
|
||||
vector<double> DenseMatrix2::Mult(const vector<double>& vec) const
|
||||
{
|
||||
assert(vec.size() == cols);
|
||||
vector<double> result(rows, 0.0);
|
||||
|
||||
double scalar(0.0);
|
||||
for (size_t i = 0; i < vec.size(); i++)
|
||||
{
|
||||
scalar += v_[i] * vec[i];
|
||||
}
|
||||
for (size_t i = 0; i < u_.size(); i++)
|
||||
{
|
||||
result[i] = scalar*u_[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<double> DenseMatrix2::MultT(const vector<double>& vec) const
|
||||
{
|
||||
assert(vec.size() == rows);
|
||||
vector<double> result(cols, 0.0);
|
||||
|
||||
double scalar(0.0);
|
||||
for (size_t i = 0; i < u_.size(); i++)
|
||||
{
|
||||
scalar += u_[i] * vec[i];
|
||||
}
|
||||
for (size_t i = 0; i < v_.size(); i++)
|
||||
{
|
||||
result[i] = scalar*v_[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
112
ex1/ABCEFG/skalar_stl/mylib.h
Normal file
112
ex1/ABCEFG/skalar_stl/mylib.h
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
#pragma once
|
||||
|
||||
#include <cassert> // assert
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <list>
|
||||
using namespace std;
|
||||
|
||||
// -------------- Task A --------------
|
||||
|
||||
// Returns arithmetic, geometric and harmonic mean for 3 values a,b,c.
|
||||
tuple<double, double, double> means0(double a, double b, double c);
|
||||
|
||||
// Returns arithmetic, geometric and harmonic mean for a vector.
|
||||
tuple<double, double, double> means(const vector<double>& v);
|
||||
|
||||
// -------------- Task B --------------
|
||||
|
||||
/**
|
||||
This function opens the ASCII-file named @p file_name and reads the
|
||||
double data into the C++ vector @p v.
|
||||
If the file @p file_name does not exist then the code stops with an appropriate message.
|
||||
@param[in] file_name name of the ASCII-file
|
||||
@param[out] v C++ vector with double values
|
||||
*/
|
||||
|
||||
void read_vector_from_file(const string& file_name, vector<double>& v);
|
||||
|
||||
|
||||
/**
|
||||
This function opens the ASCII-file named @p file_name and rewrites its with the
|
||||
double data from the C++ vector @p v.
|
||||
If there are problems in opening/generating file @p file_name
|
||||
then the code stops with an appropriate message.
|
||||
@param[in] file_name name of the ASCII-file
|
||||
@param[in] v C++ vector with double values
|
||||
*/
|
||||
|
||||
void write_vector_to_file(const string& file_name, const vector<double>& v);
|
||||
|
||||
/**
|
||||
Fills the double-vector @p v with data from an input stream @p istr until this input stream
|
||||
ends regularily. The vector is cleared and its memory is automatically allocated.
|
||||
@param[in] istr input stream
|
||||
@param[out] v C++ vector with double values
|
||||
@warning An exception is thrown in case of wrong data format or corrupted data.
|
||||
*/
|
||||
void fill_vector(istream& istr, vector<double>& v);
|
||||
|
||||
// -------------- Task C --------------
|
||||
|
||||
// Sums up all positive integers less or equal n which are multiples of 3 or of 5 (including or!) by brute force.
|
||||
double sum_of_spec(int n);
|
||||
|
||||
// Sums up all positive integers less or equal n which are multiples of 3 or of 5 (including or!) by inclusion-exclusion principle.
|
||||
double formula(int n);
|
||||
|
||||
// -------------- Task E --------------
|
||||
|
||||
// Inserts n random numbers into sorted vector v such that v remains sorted.
|
||||
void insert_into_vector(vector<int>& vec, int n);
|
||||
|
||||
// Inserts n random numbers into sorted list such that the list remains sorted.
|
||||
void insert_into_list(list<int>& lst, int n);
|
||||
|
||||
|
||||
// -------------- Task F --------------
|
||||
|
||||
// Counts number of possible decompositions with 2 primes that sum up to k.
|
||||
int single_goldbach(int k);
|
||||
|
||||
// Counts number of possible decompositions with 2 primes that sum up to k for all even numbers k \in {4,...,n}.
|
||||
vector<int> count_goldbach(int n);
|
||||
|
||||
// Prints all decompositions of k.
|
||||
void print_decomps(int k);
|
||||
|
||||
// -------------- Task G --------------
|
||||
|
||||
// Sigmoid function 1/(1+exp(-x))
|
||||
double sigmoid(double x);
|
||||
|
||||
class DenseMatrix {
|
||||
private:
|
||||
double sigmoid(double x);
|
||||
size_t rows;
|
||||
size_t cols;
|
||||
vector<double> matrix;
|
||||
public:
|
||||
DenseMatrix(int n, int m); // Constructor
|
||||
vector<double> Mult(const vector<double>& vec) const;
|
||||
vector<double> MultT(const vector<double>& vec) const;
|
||||
void print() const;
|
||||
};
|
||||
|
||||
class DenseMatrix2 {
|
||||
private:
|
||||
size_t rows;
|
||||
size_t cols;
|
||||
vector<double> u_;
|
||||
vector<double> v_;
|
||||
public:
|
||||
DenseMatrix2(vector<double> const u, vector<double> const v); // Constructor
|
||||
vector<double> Mult(const vector<double>& vec) const;
|
||||
vector<double> MultT(const vector<double>& vec) const;
|
||||
};
|
||||
BIN
ex1/ABCEFG/skalar_stl/mylib.o
Normal file
BIN
ex1/ABCEFG/skalar_stl/mylib.o
Normal file
Binary file not shown.
6
ex1/ABCEFG/skalar_stl/out_1.txt
Normal file
6
ex1/ABCEFG/skalar_stl/out_1.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
1
|
||||
1000
|
||||
498.184
|
||||
inf
|
||||
95.6857
|
||||
287.905
|
||||
51
ex1/ABCEFG/skalar_stl/timing.h
Normal file
51
ex1/ABCEFG/skalar_stl/timing.h
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
//
|
||||
// Gundolf Haase, Oct 18 2024
|
||||
//
|
||||
#pragma once
|
||||
#include <chrono> // timing
|
||||
#include <stack>
|
||||
|
||||
//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
using TPoint= std::chrono::time_point<Clock>;
|
||||
|
||||
// [Galowicz, C++17 STL Cookbook, p. 29]
|
||||
inline
|
||||
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
|
||||
|
||||
/** Starts stopwatch timer.
|
||||
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
|
||||
*
|
||||
* The timining can be nested and the recent time point is stored on top of the stack.
|
||||
*
|
||||
* @return recent time point
|
||||
* @see toc
|
||||
*/
|
||||
inline auto tic()
|
||||
{
|
||||
MyStopWatch.push(Clock::now());
|
||||
return MyStopWatch.top();
|
||||
}
|
||||
|
||||
/** Returns the elapsed time from stopwatch.
|
||||
*
|
||||
* The time point from top of the stack is used
|
||||
* if time point @p t_b is not passed as input parameter.
|
||||
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
|
||||
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
|
||||
* The last option is to be used in the case of
|
||||
* non-nested but overlapping time measurements.
|
||||
*
|
||||
* @param[in] t_b start time of some stop watch
|
||||
* @return elapsed time in seconds.
|
||||
*
|
||||
*/
|
||||
inline double toc(TPoint const &t_b = MyStopWatch.top())
|
||||
{
|
||||
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
|
||||
using Unit = std::chrono::seconds;
|
||||
using FpSeconds = std::chrono::duration<double, Unit::period>;
|
||||
auto t_e = Clock::now();
|
||||
MyStopWatch.pop();
|
||||
return FpSeconds(t_e-t_b).count();
|
||||
}
|
||||
60
ex1/D/.vscode/settings.json
vendored
Normal file
60
ex1/D/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"array": "cpp",
|
||||
"atomic": "cpp",
|
||||
"bit": "cpp",
|
||||
"cctype": "cpp",
|
||||
"charconv": "cpp",
|
||||
"chrono": "cpp",
|
||||
"clocale": "cpp",
|
||||
"cmath": "cpp",
|
||||
"compare": "cpp",
|
||||
"complex": "cpp",
|
||||
"concepts": "cpp",
|
||||
"cstdarg": "cpp",
|
||||
"cstddef": "cpp",
|
||||
"cstdint": "cpp",
|
||||
"cstdio": "cpp",
|
||||
"cstdlib": "cpp",
|
||||
"cstring": "cpp",
|
||||
"ctime": "cpp",
|
||||
"cwchar": "cpp",
|
||||
"cwctype": "cpp",
|
||||
"deque": "cpp",
|
||||
"string": "cpp",
|
||||
"unordered_map": "cpp",
|
||||
"vector": "cpp",
|
||||
"exception": "cpp",
|
||||
"algorithm": "cpp",
|
||||
"functional": "cpp",
|
||||
"iterator": "cpp",
|
||||
"memory": "cpp",
|
||||
"memory_resource": "cpp",
|
||||
"numeric": "cpp",
|
||||
"optional": "cpp",
|
||||
"random": "cpp",
|
||||
"ratio": "cpp",
|
||||
"string_view": "cpp",
|
||||
"system_error": "cpp",
|
||||
"tuple": "cpp",
|
||||
"type_traits": "cpp",
|
||||
"utility": "cpp",
|
||||
"format": "cpp",
|
||||
"initializer_list": "cpp",
|
||||
"iomanip": "cpp",
|
||||
"iosfwd": "cpp",
|
||||
"iostream": "cpp",
|
||||
"istream": "cpp",
|
||||
"limits": "cpp",
|
||||
"new": "cpp",
|
||||
"numbers": "cpp",
|
||||
"ostream": "cpp",
|
||||
"span": "cpp",
|
||||
"sstream": "cpp",
|
||||
"stdexcept": "cpp",
|
||||
"streambuf": "cpp",
|
||||
"cinttypes": "cpp",
|
||||
"typeinfo": "cpp",
|
||||
"variant": "cpp"
|
||||
}
|
||||
}
|
||||
123
ex1/D/CLANG_default.mk
Normal file
123
ex1/D/CLANG_default.mk
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=CLANG_
|
||||
|
||||
#CLANGPATH=//usr/lib/llvm-10/bin/
|
||||
CC = ${CLANGPATH}clang
|
||||
CXX = ${CLANGPATH}clang++
|
||||
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
|
||||
#F77 = gfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
|
||||
WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
|
||||
WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
||||
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
||||
|
||||
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
|
||||
# don't use -Ofast
|
||||
# -ftrapv
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# different libraries in Ubuntu or manajaró
|
||||
ifndef UBUNTU
|
||||
UBUNTU=1
|
||||
endif
|
||||
|
||||
# BLAS, LAPACK
|
||||
LINKFLAGS += -llapack -lblas
|
||||
# -lopenblas
|
||||
ifeq ($(UBUNTU),1)
|
||||
# ubuntu
|
||||
else
|
||||
# on archlinux
|
||||
LINKFLAGS += -lcblas
|
||||
endif
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -flto
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# very good check
|
||||
# http://clang.llvm.org/extra/clang-tidy/
|
||||
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
|
||||
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
|
||||
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
|
||||
#READABILITY=,readability*${SWITCH_OFF}
|
||||
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
#TIDYFLAGS += -checks='modernize*
|
||||
# ???
|
||||
#TIDYFLAGS = -checks='cert*' -header-filter=.*
|
||||
# MPI checks ??
|
||||
#TIDYFLAGS = -checks='mpi*'
|
||||
# ??
|
||||
#TIDYFLAGS = -checks='performance*' -header-filter=.*
|
||||
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
|
||||
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
codecheck: tidy_check
|
||||
tidy_check:
|
||||
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
|
||||
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
# time ./${PROGRAM} ${PARAMS}
|
||||
./${PROGRAM} ${PARAMS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^ ${PARAMS}
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
codecheck: tidy_check
|
||||
130
ex1/D/GCC_AMD32_default.mk
Normal file
130
ex1/D/GCC_AMD32_default.mk
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
CC = gcc
|
||||
CXX = g++
|
||||
F77 = gfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
# on mephisto:
|
||||
#CXXFLAGS += -I/share/apps/atlas/include
|
||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
||||
#LINKFLAGS += -lcblas -latlas
|
||||
|
||||
#LINKFLAGS += -lblas
|
||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
||||
|
||||
|
||||
#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow
|
||||
WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -Winline -fmax-errors=1
|
||||
# -Wunreachable-code
|
||||
# -Wunreachable-code
|
||||
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
||||
#-msse3
|
||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=5
|
||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
||||
|
||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# BLAS, LAPACK
|
||||
OPENBLAS_DIR = /opt/openblas_GCCseq
|
||||
#OPENBLAS_DIR = /opt/openblas_GCC
|
||||
OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
|
||||
OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
|
||||
CXXFLAGS += -I${OPENBLAS_INCDIR}
|
||||
LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -flto
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
||||
-@rm -r html
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
# time ./${PROGRAM}
|
||||
# ./${PROGRAM}
|
||||
( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
|
||||
# or 'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}' in your ~/.bashrc
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar:
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
./$^
|
||||
gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ 11
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
183
ex1/D/GCC_default.mk
Normal file
183
ex1/D/GCC_default.mk
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
CC = gcc
|
||||
CXX = g++
|
||||
F77 = gfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -Winline -fmax-errors=1
|
||||
# -Wunreachable-code
|
||||
CXXFLAGS += -ffast-math -O1 -march=native -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
|
||||
#-msse3
|
||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=5
|
||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
||||
|
||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
|
||||
LINKFLAGS += -O1
|
||||
|
||||
#architecture
|
||||
#CPU = -march=znver2
|
||||
CXXFLAGS += ${CPU}
|
||||
LINKFLAGS += ${CPU}
|
||||
|
||||
# different libraries in Ubuntu or manajaró
|
||||
ifndef UBUNTU
|
||||
UBUNTU=1
|
||||
endif
|
||||
|
||||
# BLAS, LAPACK
|
||||
ifeq ($(UBUNTU),1)
|
||||
LINKFLAGS += -llapack -lblas
|
||||
# -lopenblas
|
||||
else
|
||||
# on archlinux
|
||||
LINKFLAGS += -llapack -lopenblas -lcblas
|
||||
endif
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -flto
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# for debugging purpose (save code)
|
||||
# -fsanitize=leak # only one out the three can be used
|
||||
# -fsanitize=address
|
||||
# -fsanitize=thread
|
||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||
#CXXFLAGS += ${SANITARY}
|
||||
#LINKFLAGS += ${SANITARY}
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
|
||||
-@rm -rf html
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
#run: ${PROGRAM}
|
||||
# time ./${PROGRAM} ${PARAMS}
|
||||
./${PROGRAM} ${PARAMS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
#find . -size +10M > large_files
|
||||
#--exclude-from ${MY_DIR}/large_files
|
||||
|
||||
zip: clean
|
||||
@echo "Zip the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
.SUFFIXES: .f90
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
|
||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
.f90.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
||||
# Graphical interface
|
||||
# valkyrie
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^ ${PARAMS}
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
|
||||
# * install
|
||||
# * sudo vi /etc/sysctl.conf
|
||||
# add kernel.perf_event_paranoid = 0
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ ${PARAMS}
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
codecheck: $(SOURCES)
|
||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
lscpu
|
||||
inxi -C
|
||||
lstopo
|
||||
|
||||
# Excellent hardware info
|
||||
# hardinfo
|
||||
# Life monitoring of CPU frequency etc.
|
||||
# sudo i7z
|
||||
|
||||
# Memory consumption
|
||||
# vmstat -at -SM 3
|
||||
# xfce4-taskmanager
|
||||
|
||||
|
||||
# https://www.tecmint.com/check-linux-cpu-information/
|
||||
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
|
||||
|
||||
# Debugging:
|
||||
# https://wiki.archlinux.org/index.php/Debugging
|
||||
137
ex1/D/ICC_default.mk
Normal file
137
ex1/D/ICC_default.mk
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
# Basic Defintions for using INTEL compiler suite sequentially
|
||||
# requires setting of COMPILER=ICC_
|
||||
|
||||
#BINDIR = /opt/intel/bin/
|
||||
|
||||
# special on my sony [GH]
|
||||
#BINDIR = /opt/save.intel/bin/
|
||||
# very special on my sony [GH]
|
||||
# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
|
||||
|
||||
# Error with g++-4.8 using icpc14.0,x:
|
||||
# find directory wherein bits/c++config.h is located
|
||||
# 'locate bits/c++config.h'
|
||||
#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
|
||||
|
||||
|
||||
CC = ${BINDIR}icc
|
||||
CXX = ${BINDIR}icpc
|
||||
F77 = ${BINDIR}ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
|
||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
|
||||
# -Winline -Wredundant-decls -Wunreachable-code
|
||||
CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
# -vec-report=3
|
||||
# -qopt-report=5 -qopt-report-phase=vec
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
CXXFLAGS += -align
|
||||
|
||||
# use MKL by INTEL
|
||||
# https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
|
||||
# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
|
||||
# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
|
||||
LINKFLAGS += -O3 -mkl=sequential
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -ipo
|
||||
LINKFLAGS += -ipo
|
||||
|
||||
# annotated assembler file
|
||||
ANNOTED = -fsource-asm -S
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# alternatively to the solution above:
|
||||
#edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 .
|
||||
amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
inspxe-gui &
|
||||
|
||||
advisor:
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
advixe-gui &
|
||||
|
||||
icc-info:
|
||||
icpc -# main.cpp
|
||||
|
||||
|
||||
|
||||
|
||||
176
ex1/D/ONEAPI_default.mk
Normal file
176
ex1/D/ONEAPI_default.mk
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
# Basic Defintions for using INTEL compiler suite sequentially
|
||||
# requires setting of COMPILER=ONEAPI_
|
||||
|
||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
||||
# requires
|
||||
# source /opt/intel/oneapi/setvars.sh
|
||||
# on AMD: export MKL_DEBUG_CPU_TYPE=5
|
||||
|
||||
#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
|
||||
#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
|
||||
#export KMP_AFFINITY=verbose,compact
|
||||
|
||||
CC = ${BINDIR}icc
|
||||
CXX = ${BINDIR}dpcpp
|
||||
F77 = ${BINDIR}ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
## Compiler flags
|
||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
|
||||
WARNINGS += -Wpessimizing-move -Wredundant-move
|
||||
#-wd2015,2012,2014 -wn3
|
||||
# -Winline -Wredundant-decls -Wunreachable-code
|
||||
# -qopt-subscript-in-range
|
||||
# -vec-threshold0
|
||||
|
||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include"
|
||||
#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include"
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -ipo
|
||||
LINKFLAGS += -ipo
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# annotated Assembler file
|
||||
ANNOTED = -fsource-asm -S
|
||||
|
||||
#architecture
|
||||
CPU = -march=core-avx2
|
||||
#CPU += -mtp=zen
|
||||
# -xCORE-AVX2
|
||||
# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
|
||||
CXXFLAGS += ${CPU}
|
||||
LINKFLAGS += ${CPU}
|
||||
|
||||
# use MKL by INTEL
|
||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
||||
# sequential MKL
|
||||
# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
|
||||
CXXFLAGS += -qmkl=sequential -UMKL_ILP64
|
||||
LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
|
||||
# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static
|
||||
# install intel-oneapi-compiler-static
|
||||
# or
|
||||
LINKFLAGS += -shared-intel
|
||||
|
||||
|
||||
OPENMP = -qopenmp
|
||||
CXXFLAGS += ${OPENMP}
|
||||
LINKFLAGS += ${OPENMP}
|
||||
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
# -vec-report=3
|
||||
# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
|
||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par
|
||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg
|
||||
# Redirect report from *.optrpt to stderr
|
||||
# -qopt-report-file=stderr
|
||||
# Guided paralellization
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
## run time checks
|
||||
# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
|
||||
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS} *.optrpt
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
|
||||
|
||||
mem: inspector
|
||||
prof: vtune
|
||||
cache: inspector
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
||||
amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# inspxe-gui &
|
||||
vtune-gui ./${PROGRAM} &
|
||||
|
||||
advisor:
|
||||
source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
|
||||
# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
|
||||
advisor --collect=survey ./${PROGRAM}
|
||||
# advisor --collect=roofline ./${PROGRAM}
|
||||
advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
|
||||
|
||||
vtune:
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
|
||||
export ADVIXE_EXPERIMENTAL=roofline
|
||||
vtune -collect hotspots ./${PROGRAM}
|
||||
vtune -report hotspots -r r000hs > vtune.out
|
||||
# vtune-gui ./${PROGRAM} &
|
||||
|
||||
icc-info:
|
||||
icpc -# main.cpp
|
||||
|
||||
# MKL on AMD
|
||||
# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
|
||||
#
|
||||
# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
|
||||
# export MKL_DEBUG_CPU_TYPE=5
|
||||
# export MKL_NUM_THRAEDS=1
|
||||
# export MKL_DYNAMIC=false
|
||||
# on Intel compiler
|
||||
# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
|
||||
93
ex1/D/PGI_default.mk
Normal file
93
ex1/D/PGI_default.mk
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
# Basic Defintions for using PGI-compiler suite sequentially
|
||||
# requires setting of COMPILER=PGI_
|
||||
# OPTIRUN = optirun
|
||||
|
||||
|
||||
CC = pgcc
|
||||
CXX = pgc++
|
||||
F77 = pgfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
# on mephisto:
|
||||
#CXXFLAGS += -I/share/apps/atlas/include
|
||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
||||
#LINKFLAGS += -lcblas -latlas
|
||||
|
||||
#LINKFLAGS += -lblas
|
||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
||||
|
||||
WARNINGS = -Minform=warn
|
||||
# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow -Wredundant-decls
|
||||
# -pedantic -Wunreachable-code -Wextra -Winline
|
||||
# -Wunreachable-code
|
||||
|
||||
#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
|
||||
PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
|
||||
# -Minfo
|
||||
# -Mprof=time
|
||||
# -Mprof=lines
|
||||
# take care with option -Msafeptr
|
||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -O3 -std=c++11 -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
|
||||
# -fastsse -fargument-noalias ${WARNINGS} -msse3 -vec-report=3
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
|
||||
|
||||
# Profiling options PGI, see: pgcollect -help
|
||||
# CPU_PROF = -allcache
|
||||
CPU_PROF = -time
|
||||
# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
|
||||
#GPU_PROF = -cuda=branch:cc20
|
||||
#
|
||||
PROF_FILE = pgprof.out
|
||||
|
||||
cache: prof
|
||||
|
||||
prof: ${PROGRAM}
|
||||
${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
|
||||
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
|
||||
|
||||
info:
|
||||
pgaccelinfo -v
|
||||
2563
ex1/D/skalar_stl/Doxyfile
Normal file
2563
ex1/D/skalar_stl/Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
30
ex1/D/skalar_stl/Makefile
Normal file
30
ex1/D/skalar_stl/Makefile
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
# use Intel compilers
|
||||
#COMPILER=ICC_
|
||||
|
||||
# use PGI compilers
|
||||
# COMPILER=PGI_
|
||||
|
||||
|
||||
SOURCES = main.cpp mylib.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = main.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
LINKFLAGS += -g
|
||||
# do not use -pg with PGI compilers
|
||||
|
||||
ifndef COMPILER
|
||||
COMPILER=GCC_
|
||||
endif
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
BIN
ex1/D/skalar_stl/main.GCC_
Executable file
BIN
ex1/D/skalar_stl/main.GCC_
Executable file
Binary file not shown.
124
ex1/D/skalar_stl/main.cpp
Normal file
124
ex1/D/skalar_stl/main.cpp
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
#include "mylib.h"
|
||||
#include <cassert>
|
||||
#include <chrono> // timing
|
||||
#include <cmath> // sqrt()
|
||||
#include <cstdlib> // atoi()
|
||||
#include <cstring> // strncmp()
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
using namespace std;
|
||||
using namespace std::chrono; // timing
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int const NLOOPS = 50; // chose a value such that the benchmark runs at least 10 sec.
|
||||
unsigned int N = 50000001;
|
||||
//##########################################################################
|
||||
// Read Paramater from command line (C++ style)
|
||||
cout << "Checking command line parameters for: -n <number> " << endl;
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
cout << " arg[" << i << "] = " << argv[i] << endl;
|
||||
if (std::strncmp(argv[i], "-n", 2) == 0 && i + 1 < argc) // found "-n" followed by another parameter
|
||||
{
|
||||
N = static_cast<unsigned int>(atoi(argv[i + 1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Corect call: " << argv[0] << " -n <number>\n";
|
||||
}
|
||||
}
|
||||
|
||||
cout << "\nN = " << N << endl;
|
||||
|
||||
//##########################################################################
|
||||
// Memory allocation
|
||||
cout << "Memory allocation\n";
|
||||
|
||||
vector<double> x(N), y(N);
|
||||
|
||||
cout.precision(2);
|
||||
cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||
cout.precision(6);
|
||||
|
||||
//##########################################################################
|
||||
// Data initialization
|
||||
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
|
||||
for (unsigned int i = 0; i < N; ++i)
|
||||
{
|
||||
x[i] = i + 1;
|
||||
y[i] = 1.0 / pow(x[i], 2);
|
||||
}
|
||||
|
||||
//##########################################################################
|
||||
cout << "\nStart Benchmarking Normal sum\n";
|
||||
|
||||
// Do calculation
|
||||
auto t1 = system_clock::now(); // start timer
|
||||
double sk1(0.0),ss(0.0);
|
||||
for (int i = 0; i < NLOOPS; ++i)
|
||||
{
|
||||
sk1 = normal_sum(y);
|
||||
ss += sk1; // prevents the optimizer from removing unused calculation results.
|
||||
}
|
||||
|
||||
auto t2 = system_clock::now(); // stop timer
|
||||
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||
t_diff = t_diff/NLOOPS;
|
||||
|
||||
|
||||
// Print result
|
||||
printf("\nSum = %.16f\n", sk1);
|
||||
|
||||
//##########################################################################
|
||||
|
||||
// Timings and Performance
|
||||
cout << endl;
|
||||
cout.precision(2);
|
||||
cout << "Timing in sec. : " << t_diff << endl;
|
||||
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
|
||||
|
||||
//##########################################################################
|
||||
|
||||
cout << "\nStart Benchmarking Kahan summation\n";
|
||||
|
||||
// Do calculation
|
||||
t1 = system_clock::now(); // start timer
|
||||
double sk2(0.0),sss(0.0);
|
||||
for (int i = 0; i < NLOOPS; ++i)
|
||||
{
|
||||
sk2 = Kahan_skalar(y);
|
||||
sss += sk2; // prevents the optimizer from removing unused calculation results.
|
||||
}
|
||||
|
||||
t2 = system_clock::now(); // stop timer
|
||||
duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||
t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||
// duration per loop seconds
|
||||
|
||||
// Print result
|
||||
printf("\nSum = %.16f\n", sk2);
|
||||
|
||||
|
||||
//##########################################################################
|
||||
|
||||
// Timings and Performance
|
||||
cout << endl;
|
||||
cout.precision(2);
|
||||
cout << "Timing in sec. : " << t_diff << endl;
|
||||
cout << "GFLOPS : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : " << 2.0 * N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]) << endl;
|
||||
|
||||
//##########################################################################
|
||||
|
||||
// Print limit
|
||||
printf("\nLimit = %.16f\n\n", pow(M_PI,2) / 6.0f);
|
||||
|
||||
//##########################################################################
|
||||
|
||||
return 0;
|
||||
} // memory for x and y will be deallocated by their destructors
|
||||
BIN
ex1/D/skalar_stl/main.o
Normal file
BIN
ex1/D/skalar_stl/main.o
Normal file
Binary file not shown.
29
ex1/D/skalar_stl/mylib.cpp
Normal file
29
ex1/D/skalar_stl/mylib.cpp
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#include "mylib.h"
|
||||
#include <cassert> // assert()
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
long double Kahan_skalar(vector<double> const &input)
|
||||
{
|
||||
long double sum = 0.0;
|
||||
long double c = 0.0;
|
||||
|
||||
for (long unsigned int i=0; i<input.size(); i++){
|
||||
long double y = input[i] - c;
|
||||
long double t = sum + y;
|
||||
c = (t-sum) - y;
|
||||
sum = t;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
long double normal_sum(vector<double> const &input)
|
||||
{
|
||||
long double sum = 0.0;
|
||||
for (long unsigned int i=0; i<input.size(); i++){
|
||||
sum += input[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
6
ex1/D/skalar_stl/mylib.h
Normal file
6
ex1/D/skalar_stl/mylib.h
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
#pragma once
|
||||
#include <vector>
|
||||
|
||||
long double Kahan_skalar(std::vector<double> const &input);
|
||||
|
||||
long double normal_sum(std::vector<double> const &input);
|
||||
BIN
ex1/D/skalar_stl/mylib.o
Normal file
BIN
ex1/D/skalar_stl/mylib.o
Normal file
Binary file not shown.
1826
ex1/D/skalar_stl/small_Doxyfile
Normal file
1826
ex1/D/skalar_stl/small_Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue