added subdomain support in Mesh class, CalculateLaplaceMult implementation

This commit is contained in:
jakob.schratter 2026-01-22 17:52:23 +01:00
commit 2e887c04bc
13 changed files with 4336 additions and 69057 deletions

131
CLANG_default.mk Normal file
View file

@ -0,0 +1,131 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=CLANG_
# https://llvm.org/docs/CompileCudaWithLLVM.html
# https://llvm.org/docs/NVPTXUsage.html
#CLANGPATH=//usr/lib/llvm-10/bin/
CC = ${CLANGPATH}clang
CXX = ${CLANGPATH}clang++
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
#F77 = gfortran
LINKER = ${CXX}
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
WARNINGS += -pedantic -Weverything -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion
WARNINGS += -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
WARNINGS += -Wno-unsafe-buffer-usage
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
# don't use -Ofast
# -ftrapv
LINKFLAGS += -O3
# different libraries in Ubuntu or manajaró
ifndef UBUNTU
UBUNTU=1
endif
# BLAS, LAPACK
LINKFLAGS += -llapack -lblas
# -lopenblas
ifeq ($(UBUNTU),1)
# ubuntu
else
# on archlinux
LINKFLAGS += -lcblas
endif
# interprocedural optimization
CXXFLAGS += -flto
LINKFLAGS += -flto
#sudo apt install libomp-dev
# OpenMP
CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
# very good check
# http://clang.llvm.org/extra/clang-tidy/
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
#READABILITY=,readability*${SWITCH_OFF}
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
#TIDYFLAGS += -checks='modernize*
# ???
#TIDYFLAGS = -checks='cert*' -header-filter=.*
# MPI checks ??
#TIDYFLAGS = -checks='mpi*'
# ??
#TIDYFLAGS = -checks='performance*' -header-filter=.*
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
codecheck: tidy_check
tidy_check:
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
run: clean ${PROGRAM}
# time ./${PROGRAM} ${PARAMS}
./${PROGRAM} ${PARAMS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^ ${PARAMS}
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
codecheck: tidy_check

212
GCCMKL_default.mk Normal file
View file

@ -0,0 +1,212 @@
# Basic Defintions for using GNU-compiler suite with OpenMP und MKL
# requires setting of COMPILER=GCCMKL_
# install MKL in manjaro
# https://linux-packages.com/manjaro-linux/package/intel-mkl
# > sudo pacman -Sy
# > sudo pacman -S intel-mkl
ifeq ($(ONEAPI),1)
MKL_INCLUDE=/opt/intel/oneapi/mkl/2024.0/include
MKL_LIB=/opt/intel/oneapi/2024.0/lib
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/oneapi/2024.0/lib
else
MKL_INCLUDE=/usr/include/mkl
MKL_LIB=/usr/lib/x86_64-linux-gnu/mkl
endif
CC = gcc
CXX = g++
F77 = gfortran
LINKER = ${CXX}
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Winline -fmax-errors=1
# -Wunreachable-code
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
#-msse3
# -ftree-vectorizer-verbose=2 -DNDEBUG
# -ftree-vectorizer-verbose=5
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
LINKFLAGS += -O3
#architecture
CPU = -march=znver2
#CPU = -march=core-avx2
CXXFLAGS += ${CPU}
LINKFLAGS += ${CPU}
# MKL
#CXXFLAGS += -I/usr/include/mkl -DUSE_MKL -Wno-redundant-decls
CXXFLAGS += -I${MKL_INCLUDE} -DUSE_MKL -Wno-redundant-decls
#LINKFLAGS += -lmkl_intel_lp64 -lmkl_tbb_thread -ltbb -lmkl_core
#LINKFLAGS += -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -L/usr/lib/x86_64-linux-gnu/mkl
LINKFLAGS += -L${MKL_LIB} -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core
#LINKFLAGS += -lmkl_intel_lp64 -lmkl_sequential -lmkl_core
# workaround for MKL slow down on AMD hardware
# https://danieldk.eu/Posts/2020-08-31-MKL-Zen.html
default: run
libfakeintel.so:
gcc -shared -fPIC -o libfakeintel.so fakeintel.c
echo "call: export LD_PRELOAD=./libfakeintel.so "
# different libraries in Ubuntu or manajaro
#ifndef UBUNTU
#UBUNTU=1
#endif
## BLAS, LAPACK
#ifeq ($(UBUNTU),1)
#LINKFLAGS += -llapack -lblas
## -lopenblas
#else
## on archlinux
#LINKFLAGS += -llapack -lopenblas -lcblas
#endif
# interprocedural optimization
CXXFLAGS += -flto
LINKFLAGS += -flto
# for debugging purpose (save code)
# -fsanitize=leak # only one out the three can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS += ${SANITARY}
# OpenMP
CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
-@rm -rf html
run: clean ${PROGRAM} libfakeintel.so
#run: ${PROGRAM}
# time ./${PROGRAM} ${PARAMS}
./${PROGRAM} ${PARAMS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
#find . -size +10M > large_files
#--exclude-from ${MY_DIR}/large_files
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.SUFFIXES: .f90
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
.f90.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
# Graphical interface
# valkyrie
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^ ${PARAMS}
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
# * install
# * sudo vi /etc/sysctl.conf
# add kernel.perf_event_paranoid = 0
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ ${PARAMS} 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers
lscpu
inxi -C
lstopo
# Excellent hardware info
# hardinfo
# Life monitoring of CPU frequency etc.
# sudo i7z
# Memory consumption
# vmstat -at -SM 3
# xfce4-taskmanager
# https://www.tecmint.com/check-linux-cpu-information/
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
# Debugging:
# https://wiki.archlinux.org/index.php/Debugging

111
GCC_SINGLE_default.mk Normal file
View file

@ -0,0 +1,111 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
CC = gcc
CXX = g++
F77 = gfortran
LINKER = ${CXX}
# on mephisto:
#CXXFLAGS += -I/share/apps/atlas/include
#LINKFLAGS += -L/share/apps/atlas/lib -L/usr/lib64/atlas
#LINKFLAGS += -latlas -lcblas
#LINKFLAGS += -lblas
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Winline -fmax-errors=1
# -Wunreachable-code
#CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native -DNDEBUG ${WARNINGS}
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
# info on vectorization
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
#-fdump-tree-pre=stderr
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
#CXXFLAGS += ${VECTORIZE}
# -funroll-all-loops -msse3
#GCC -march=knl -march=broadwell -march=haswell
# interprocedural optimization
#CXXFLAGS += -flto
LINKFLAGS += -flto
# for debugging purpose (save code)
# -fsanitize=leak # only one out the trhee can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS +=${SANITARY}
# OpenMP but no OpenMP in Single mode
#CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
@rm -rf html
run: clean ${PROGRAM}
# time ./${PROGRAM}
./${PROGRAM}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar:
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
thread:${PROGRAM}
valgrind -v --tool=helgrind --log-file=$^.thread.out ./$^
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
./$^
gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &

182
GCC_default.mk Normal file
View file

@ -0,0 +1,182 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
CC = gcc
CXX = g++
F77 = gfortran
LINKER = ${CXX}
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Winline -fmax-errors=1
# -Wunreachable-code
CXXFLAGS += -ffast-math -O3 -march=native -std=c++20 ${WARNINGS}
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
#-msse3
# -ftree-vectorizer-verbose=2 -DNDEBUG
# -ftree-vectorizer-verbose=5
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
LINKFLAGS += -O3
#architecture
#CPU = -march=znver2
CXXFLAGS += ${CPU}
LINKFLAGS += ${CPU}
# different libraries in Ubuntu or manajaró
ifndef UBUNTU
UBUNTU=1
endif
# BLAS, LAPACK
ifeq ($(UBUNTU),1)
LINKFLAGS += -llapack -lblas
# -lopenblas
else
# on archlinux
LINKFLAGS += -llapack -lopenblas -lcblas
endif
# interprocedural optimization
CXXFLAGS += -flto
LINKFLAGS += -flto
# for debugging purpose (save code)
# -fsanitize=leak # only one out the three can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS += ${SANITARY}
# OpenMP
CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
-@rm -rf html
run: clean ${PROGRAM}
#run: ${PROGRAM}
# time ./${PROGRAM} ${PARAMS}
./${PROGRAM} ${PARAMS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
#find . -size +10M > large_files
#--exclude-from ${MY_DIR}/large_files
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.SUFFIXES: .f90
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
.f90.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
# Graphical interface
# valkyrie
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^ ${PARAMS}
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
# * install
# * sudo vi /etc/sysctl.conf
# add kernel.perf_event_paranoid = 0
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ ${PARAMS} 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 -I${CUDA_INC} --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers
lscpu
inxi -C
lstopo
# Excellent hardware info
# hardinfo
# Life monitoring of CPU frequency etc.
# sudo i7z
# Memory consumption
# vmstat -at -SM 3
# xfce4-taskmanager
# https://www.tecmint.com/check-linux-cpu-information/
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
# Debugging:
# https://wiki.archlinux.org/index.php/Debugging

38
Makefile Normal file
View file

@ -0,0 +1,38 @@
#DIRS=skalar skalar_stl jacobi generate_mesh jacobi_oo_stl
DIRS=skalar_stl jacobi_oo_stl mgrid demo_skalar
#
#WWW_ROOT=${HOME}/public_html/Lectures/Math2CPP/Codes/shm
WWW_ROOT=../../html/Codes/shm
clean:
@for i in ${DIRS}; do cd $${i}; make clean_all; cd ..; done
# rm *.tar
doc:
@for i in ${DIRS}; do cd $${i}; make doc; cd ..; done
tar:
@for i in ${DIRS}; do cd $${i}; make tar; cd ..; done
zip:
@for i in ${DIRS}; do cd $${i}; make zip; cd ..; done
www: clean tar zip doc
mkdir -p ${WWW_ROOT}
cp -up *_default.mk ${WWW_ROOT}
@for i in ${DIRS};\
do \
mv $${i}.tar $${i}.zip ${WWW_ROOT}; \
cp -r $${i} ${WWW_ROOT}; \
done
# delete large files
@rm -f ${WWW_ROOT}/mgrid/level_11_*.txt
# @for i in ${DIRS};\
# do \
# tar -czf $${i}.tgz $${i} *default*.mk; \
# mv $${i}.tgz ${WWW_ROOT}; \
# cp -r $${i} ${WWW_ROOT}; \
# find $${i} -name html -exec cp -r {} ${WWW_ROOT}/$${i} \; ; done

1021
generate_mesh/coffee_cup.txt Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -236,6 +236,7 @@ void Mesh::Export_scicomp(std::string const &basename) const
return; return;
} }
// subject to permutation: // subject to permutation:
// re-sort: _xc // re-sort: _xc
// _xc[2*k_new], _xc[2*k_new+1] with k_new = po2n[k] via old(_xc); // _xc[2*k_new], _xc[2*k_new+1] with k_new = po2n[k] via old(_xc);
@ -283,9 +284,10 @@ void Mesh::Visualize(vector<double> const &v) const
void Mesh::Visualize_matlab(vector<double> const &v) const void Mesh::Visualize_matlab(vector<double> const &v) const
{ {
// define external command // define external command
const string exec_m("matlab -nosplash < visualize_results.m"); // Matlab //const string exec_m("matlab -nosplash < visualize_results.m"); // Matlab
//const string exec_m("octave --no-window-system --no-gui visualize_results.m"); // Octave //const string exec_m("octave --no-window-system --no-gui visualize_results.m"); // Octave
//const string exec_m("flatpak run org.octave.Octave visualize_results.m"); // Octave (flatpak): desktop GH //const string exec_m("flatpak run org.octave.Octave visualize_results.m"); // Octave (flatpak): desktop GH
const string exec_m("octave visualize_results.m");
const string fname("uv.txt"); const string fname("uv.txt");
Write_ascii_matlab(fname, v); Write_ascii_matlab(fname, v);
@ -956,6 +958,45 @@ Mesh::Mesh(std::string const &fname)
//cout << " P E R M U T E D !" << endl; //cout << " P E R M U T E D !" << endl;
} }
vector<int> ElementSubdomains;
Mesh::Mesh(std::string const &filename, std::string const &subdomain_filename) : Mesh(filename)
{
ElementSubdomains = ReadElementSubdomains(subdomain_filename);
}
const vector<int> Mesh::ReadElementSubdomains(string const &dname) const
{
ifstream ifs(dname);
if (!(ifs.is_open() && ifs.good())) {
cerr << "ParMesh::ReadElementSubdomain: Error cannot open file " << dname << endl;
assert(ifs.is_open());
}
int const OFFSET{1}; // Matlab to C indexing
cout << "ASCI file " << dname << " opened" << endl;
// Read some mesh constants
int nelem;
ifs >> nelem;
cout << nelem << " " << Nelems() << endl;
assert( Nelems() == nelem);
// Allocate memory
vector<int> t2d(nelem, -1);
// Read element mapping
for (int k = 0; k < nelem; ++k) {
int tmp;
ifs >> tmp;
t2d[k] = tmp - OFFSET;
}
return t2d;
}
void Mesh::ReadVertexBasedMesh(std::string const &fname) void Mesh::ReadVertexBasedMesh(std::string const &fname)
{ {
ifstream ifs(fname); ifstream ifs(fname);

View file

@ -45,6 +45,16 @@ public:
*/ */
explicit Mesh(std::string const &fname); explicit Mesh(std::string const &fname);
/**
* Reads mesh data plus subdomain data from a binary file.
*
* File format, see ascii_write_mesh.m
*
* @param[in] filename file name
* @param[in] subdomain_filename subdomain file name
*/
explicit Mesh(std::string const &filename, std::string const &subdomain_filename);
/** /**
* Reads mesh data from a binary file. * Reads mesh data from a binary file.
* *
@ -63,6 +73,8 @@ public:
return _nelem; return _nelem;
} }
/** /**
* Global number of vertices for each finite element. * Global number of vertices for each finite element.
* @return number of vertices per element. * @return number of vertices per element.
@ -422,6 +434,7 @@ public:
*/ */
void liftToQuadratic(); void liftToQuadratic();
protected: protected:
//public: //public:
void SetNelem(int nelem) void SetNelem(int nelem)
@ -522,7 +535,19 @@ public:
*/ */
[[nodiscard]] bool checkObtuseAngles() const; [[nodiscard]] bool checkObtuseAngles() const;
private:
std::vector<int> ElementSubdomains;
/**
* Reads the global triangle to subdomain mapping.
*
* @param[in] dname file name
*
* @see ascii_write_subdomains.m for the file format
*/
[[nodiscard]] const std::vector<int> ReadElementSubdomains(std::string const &dname) const;
/** /**
* Calculates the largest inner angle in element @p idx. * Calculates the largest inner angle in element @p idx.
* *

View file

@ -383,6 +383,87 @@ void FEM_Matrix::Derive_Matrix_Pattern_slow()
return; return;
} }
void FEM_Matrix::CalculateLaplaceMult(vector<double> &f)
{
cout << "\n############ FEM_Matrix::CalculateLaplaceMult ";
double tstart = omp_get_wtime(); // OpenMP
assert(_mesh.NdofsElement() == 3); // only for triangular, linear elements
//cout << _nnz << " vs. " << _id[_nrows] << " " << _nrows<< endl;
assert(_nnz == _id[_nrows]);
for (int k = 0; k < _nrows; ++k) {
_sk[k] = 0.0;
}
for (int k = 0; k < _nrows; ++k) {
f[k] = 0.0;
}
double ske[3][3], fe[3];
// Loop over all elements
auto const nelem = _mesh.Nelems();
auto const &ia = _mesh.GetConnectivity();
auto const &xc = _mesh.GetCoords();
const vector<int> sd_vec = _mesh.ElementSubdomains;
#pragma omp parallel for private(ske,fe)
for (int i = 0; i < nelem; ++i) {
auto subdomain = sd_vec[i];
double lambda = Thermal_coefficient(subdomain);
cout << subdomain << endl;
CalcElemSpecific(ia.data() + 3 * i, xc.data(), lambda, ske);
//AddElem(ia.data()+3 * i, ske, fe, _id.data(), _ik.data(), _sk.data(), f.data()); // GH: deprecated
AddElem_3(ia.data() + 3 * i, ske, fe, f);
}
double duration = omp_get_wtime() - tstart; // OpenMP
cout << "finished in " << duration << " sec. ########\n"; // ToDo: change to systemclock
//Debug();
return;
}
double FEM_Matrix::Thermal_coefficient(const int subdomain)
{
int matlab_sd_index = subdomain - 1;
double lambda = 0.0;
switch (matlab_sd_index)
{
// outside
case 0:
lambda = 1.0;
break;
// ceramic
case 1:
lambda = 1.0;
break;
// water
case 2:
lambda = 1.0;
break;
// air
case 3:
lambda = 1.0;
break;
default:
lambda = 1.0;
break;
}
return lambda;
}
void FEM_Matrix::CalculateLaplace(vector<double> &f) void FEM_Matrix::CalculateLaplace(vector<double> &f)
{ {
@ -686,6 +767,26 @@ void CalcElem(int const ial[3], double const xc[], double ske[3][3], double fe[3
} }
void CalcElemSpecific(int const ial[3], double const xc[], double const lambda, double ske[3][3])
{
const int i1 = 2 * ial[0], i2 = 2 * ial[1], i3 = 2 * ial[2];
const double x13 = xc[i3 + 0] - xc[i1 + 0], y13 = xc[i3 + 1] - xc[i1 + 1],
x21 = xc[i1 + 0] - xc[i2 + 0], y21 = xc[i1 + 1] - xc[i2 + 1],
x32 = xc[i2 + 0] - xc[i3 + 0], y32 = xc[i2 + 1] - xc[i3 + 1];
const double jac = fabs(x21 * y13 - x13 * y21);
ske[0][0] = lambda * 0.5 / jac * (y32 * y32 + x32 * x32);
ske[0][1] = lambda * 0.5 / jac * (y13 * y32 + x13 * x32);
ske[0][2] = lambda * 0.5 / jac * (y21 * y32 + x21 * x32);
ske[1][0] = ske[0][1];
ske[1][1] = lambda * 0.5 / jac * (y13 * y13 + x13 * x13);
ske[1][2] = lambda * 0.5 / jac * (y21 * y13 + x21 * x13);
ske[2][0] = ske[0][2];
ske[2][1] = ske[1][2];
ske[2][2] = lambda * 0.5 / jac * (y21 * y21 + x21 * x21);
}
void CalcElem_RHS(int const ial[3], double const xc[], double fe[3], void CalcElem_RHS(int const ial[3], double const xc[], double fe[3],
const std::function<double(double,double)> &func) const std::function<double(double,double)> &func)
{ {

View file

@ -340,6 +340,19 @@ class FEM_Matrix: public CRS_Matrix
void Derive_Matrix_Pattern_slow(); void Derive_Matrix_Pattern_slow();
/**
* Calculates the entries of f.e. stiffness matrix for the Laplace operator
* for multiple domains with different conductivities
* and load/rhs vector @p f.
* No memory is allocated.
*
* @param[in,out] f (preallocated) rhs/load vector
*/
void CalculateLaplaceMult(std::vector<double> &f);
double Thermal_coefficient(const int subdomain);
/** /**
* Calculates the entries of f.e. stiffness matrix for the Laplace operator * Calculates the entries of f.e. stiffness matrix for the Laplace operator
* and load/rhs vector @p f. * and load/rhs vector @p f.
@ -647,6 +660,17 @@ class BisectIntDirichlet: public BisectInterpolation
*/ */
void CalcElem(int const ial[3], double const xc[], double ske[3][3], double fe[3]); void CalcElem(int const ial[3], double const xc[], double ske[3][3], double fe[3]);
/**
* Calculates the element stiffness matrix @p ske
* of one triangular element with linear shape functions
* for specific thermal conductivity in subdomain
* @param[in] ial node indices of the three element vertices
* @param[in] xc vector of node coordinates with x(2*k,2*k+1) as coordinates of node k
* @param[in] lambda thermal conductivity of element
* @param[out] ske element stiffness matrix
*/
void CalcElemSpecific(int const ial[3], double const xc[], double const lambda, double ske[3][3]);
/** /**
* Calculates the element mass matrix @p ske. * Calculates the element mass matrix @p ske.
* of one triangular element with linear shape functions. * of one triangular element with linear shape functions.

View file

@ -23,9 +23,10 @@ int main(int argc, char **argv )
int nrefine = 0; int nrefine = 0;
if (argc > 1) nrefine = atoi(argv[1]); if (argc > 1) nrefine = atoi(argv[1]);
// generating the mesh
Mesh const mesh_c("../generate_mesh/coffee_cup.txt", "../generate_mesh/coffee_cup_sd.txt");
//Mesh const mesh_c("square_tiny.txt"); //Mesh const mesh_c("square_tiny.txt");
Mesh const mesh_c("square_100.txt");
//Mesh const mesh_c("square.txt");
bool ba = mesh_c.checkObtuseAngles(); bool ba = mesh_c.checkObtuseAngles();
if (ba) cout << "mesh corrected" << endl; if (ba) cout << "mesh corrected" << endl;
@ -34,33 +35,36 @@ int main(int argc, char **argv )
//mesh.Debug(); //mesh.Debug();
//mesh.DebugEdgeBased(); //mesh.DebugEdgeBased();
// Initializing FEM matrix !pattern! (only zero entries now)
FEM_Matrix SK(mesh); // CRS matrix FEM_Matrix SK(mesh); // CRS matrix
//SK.writeBinary("sparseMatrix.bin"); //SK.writeBinary("sparseMatrix.bin");
//SK.Debug(); //SK.Debug();
vector<double> uv(SK.Nrows(), 0.0); // temperature
// Initialize RHS
vector<double> fv(SK.Nrows(), 0.0); // r.h.s. vector<double> fv(SK.Nrows(), 0.0); // r.h.s.
SK.CalculateLaplace(fv); // matrix // Calculate Matrix entries
SK.CalculateRHS(fv, [](double x, double y) { // rhs SK.CalculateLaplaceMult(fv); // matrix
return std::sin(M_PI * 2.5 * y) * (M_PI * M_PI * 2.5 * 2.5 * x * x - 2);
}
);
//SK.CheckRowSum();
SK.CheckMatrix();
//SK.Debug(); //SK.Debug();
// Two ways to initialize the vector // Calculate RHS
//mesh.SetValues(uv,f_zero); // user function SK.CalculateRHS(fv, [](double x, double y) { // rhs
//mesh.SetValues(uv, [](double x, double y) -> double {return 0.0*x*y;} ); // lambda function return std::sin(M_PI * 2.5 * y) * (M_PI * M_PI * 2.5 * 2.5 * x * x - 2); });
//mesh.SetValues(uv, [](double x, double y) -> double {return 5e-3*(x+1)*(y+1);} ); // lambda function //SK.CheckRowSum();
// SK.CheckMatrix();
mesh.SetValues(uv, [](double x, double y) -> double {
return x *x * std::sin(2.5 * M_PI * y);
} );
// Initialize temperature
vector<double> uv(SK.Nrows(), 0.0); // temperature
mesh.SetValues(uv, [](double x, double y) -> double { return 18; } ); // initial temperature of every domain
// Apply BC
SK.ApplyDirichletBC(uv, fv); SK.ApplyDirichletBC(uv, fv);
// Solve
auto exact_sol(uv); auto exact_sol(uv);
//SK.Mult(fv,uv); //SK.Mult(fv,uv);
@ -73,6 +77,8 @@ int main(int argc, char **argv )
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
cout << "JacobiSolve: timing in sec. : " << t_diff << endl; cout << "JacobiSolve: timing in sec. : " << t_diff << endl;
// Calculate error and visualize
auto [val, idx] = findLargestAbsError(exact_sol, uv, 1e+6, 100); auto [val, idx] = findLargestAbsError(exact_sol, uv, 1e+6, 100);
//mesh.Visualize(getAbsError(exact_sol, uv)); //mesh.Visualize(getAbsError(exact_sol, uv));

File diff suppressed because it is too large Load diff