This commit is contained in:
dino.celebic 2025-12-26 20:26:31 +01:00
commit 2467b9ae03
44 changed files with 22631 additions and 0 deletions

View file

@ -0,0 +1,154 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
#startmake as follows to avoid warnings caused by OpenMPI code
# make 2>&1 | grep -v openmpi
MPI_ROOT=/usr/bin/
CC = ${MPI_ROOT}mpicc
CXX = ${MPI_ROOT}mpicxx
F77 = ${MPI_ROOT}mpif77
LINKER = ${CXX}
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
#MPIRUN = ${MPI_ROOT}mpiexec
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
# solution according to https://github.com/open-mpi/ompi/issues/11063
MPIRUN += -mca btl ^openib
# KFU:sauron
CXXFLAGS += -I/software/boost/1_72_0/include
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
# WARNINGS += -Weffc++ -Wextra
# -Wno-pragmas
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
# -ftree-vectorizer-verbose=5 -DNDEBUG
# -ftree-vectorizer-verbose=2
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# info on vectorization
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
#-fdump-tree-pre=stderr
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
#CXXFLAGS += ${VECTORIZE}
# -funroll-all-loops -msse3
#GCC -march=knl -march=broadwell -march=haswell
# for debugging purpose (save code)
# -fsanitize=leak # only one out the trhee can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS +=${SANITARY}
# OpenMP
CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
@rm -rf html latex
run: ${PROGRAM}
${MPIRUN} -np 6 ./$^
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# 2>&1 | grep -v openmpi
# special: get rid of compiler warnings genereate by openmpi-files
#.cpp.o:
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
# |grep -sv openmpi
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
# Graphical interface
# valkyrie
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers

View file

@ -0,0 +1,107 @@
# Basic Defintions for using INTEL-MPI with its compilers
# requires setting of COMPILER=ICC_NATIVE_
# MPI_ROOT should be defined by shell
# path to icpc is contained in $PATH
MPI_BIN = $(shell dirname `which icpc` | sed 's/bin\/intel64/mpi\/intel64\/bin/g')/
MPI_LIB = $(shell echo ${MPI_BIN} | sed 's/bin/lib/g')
# Intel-MPI wrappers used gcc as default !!
CC = ${MPI_BIN}mpicc -cc=icc
CXX = ${MPI_BIN}mpicxx -cxx=icpc
F77 = ${MPI_BIN}mpif77 -f77=ifort
LINKER = ${CXX}
MPIRUN = ${MPI_BIN}mpirun
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
# -Weffc++ -Wunreachable-code -Winline
CXXFLAGS += -O3 -fargument-noalias -DNDEBUG -std=c++17 ${WARNINGS} ${MPI_COMPILE_FLAGS}
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal \
-Wshadow ${MPI_COMPILE_FLAGS}
# -vec-report=3 -mkl
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
# use MKL by INTEL
LINKFLAGS += -mkl ${MPI_LINK_FLAGS}
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: ${PROGRAM}
(export LD_LIBRARY_PATH=${MPI_LIB}:${LD_LIBRARY_PATH} ;${MPIRUN} -np 4 ./$^ ${PROG_ARGS})
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &

View file

@ -0,0 +1,112 @@
# Basic Defintions for using INTEL compilers with OpenMPI headers and libraries
# requires setting of COMPILER=ICC_
# MPI_ROOT should be defined by shell
MPI_ROOT=/usr
CC = icc
CXX = icpc
F77 = ifort
LINKER = ${CXX}
MPIRUN = ${MPI_ROOT}/bin/mpirun
# no differences when C or C++ is used !! (always used options from mpicxx)
MPI_COMPILE_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:compile`
MPI_LINK_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:link`
# MPI_LINK_FLAGS = -pthread -L/usr/lib/openmpi/lib -lmpi_cxx -lmpi -lopen-rte -lopen-pal -ldl -Wl,--export-dynamic -lnsl -lutil -lm -ldl
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
# -Weffc++ -Wunreachable-code -Winline
CXXFLAGS += -O3 -std=c++17 -fargument-noalias -DNDEBUG ${WARNINGS} ${MPI_COMPILE_FLAGS}
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal \
-Wshadow ${MPI_COMPILE_FLAGS}
# -vec-report=3 -mkl
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
# use MKL by INTEL
LINKFLAGS += -mkl
# use MPI by Compiler
LINKFLAGS += ${MPI_LINK_FLAGS}
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &

View file

@ -0,0 +1,128 @@
# Basic Defintions for using OpenMPI with CLANG compilers
# requires setting of COMPILER=OPENMPI_CLANG_
# Pass CLANG Compilers to the OpenMPI wrappers
# see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
EXPORT = export OMPI_CXX=clang++; export OMPI_CC=clang; export OMPI_mpifort=flang
CC = mpicc
CXX = mpicxx
F77 = mpifort
LINKER = ${CXX}
MPIRUN = ${MPI_BIN}mpirun
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
SILENCE_MPI = -Wno-weak-vtables -Wno-old-style-cast -Wno-cast-align -Wno-deprecated
SILENCE_MPI+= -Wno-sign-conversion -Wno-reserved-id-macro -Wno-c++98-compat-pedantic
SILENCE_MPI+= -Wno-zero-as-null-pointer-constant -Wno-source-uses-openmp
WARNINGS = -Weverything -Wno-c++98-compat -Wno-weak-vtables -ferror-limit=3 ${SILENCE_MPI}
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
CXXFLAGS += -Ofast -std=c++17 ${WARNINGS}
#CXXFLAGS += -Ofast -std=c++17
# -ftrapv
#
CFLAGS += -Ofast -Weverything -ferror-limit=3 ${MPI_COMPILE_FLAGS}
# OpenMP
#CXXFLAGS += -fopenmp
#LINKFLAGS += -fopenmp
# tidy_check
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
#READABILITY=,readability*${SWITCH_OFF}
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
#TIDYFLAGS += -checks='modernize*
MPI_COMPILE_FLAGS = `${MPI_BIN}mpicxx -showme:compile`
MPI_LINK_FLAGS = `${MPI_BIN}mpicxx -showme:link`
#TIDYFLAGS += ${MPI_COMPILE_FLAGS}
TIDYFLAGS += -extra-arg="-I/usr/lib/x86_64-linux-gnu/openmpi/include"
#check:
# echo ${MPI_COMPILE_FLAGS}
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
@( ${EXPORT}; $(LINKER) $^ ${LINKFLAGS} -o $@ )
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
codecheck: tidy_check
tidy_check:
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
.c.o:
@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &

View file

@ -0,0 +1,107 @@
# Basic Defintions for using OpenMPI with Intel compilers
# requires setting of COMPILER=OPENMPI_ICC_
# Pass Intel Compilers to the OpenMPI wrappers
# see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
EXPORT = export OMPI_CXX=icpc; export OMPI_CC=icc; export OMPI_mpifort=ifort
CC = mpicc
CXX = mpicxx
F77 = mpifort
LINKER = ${CXX}
MPIRUN = ${MPI_BIN}mpirun
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
# -Weffc++ -Wunreachable-code -Winline
CXXFLAGS += -fast -fargument-noalias -DNDEBUG -std=c++17 ${WARNINGS}
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal -Wshadow
# -vec-report=3 -mkl
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
# use MKL by INTEL
LINKFLAGS += -O3 -mkl ${MPI_LINK_FLAGS}
# ipo: warning #11021: unresolved __GI_memset
# see: https://software.intel.com/en-us/articles/ipo-warning-11021-unresolved-symbols-referenced-a-dynamic-library
LINKFLAGS +=
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
@( ${EXPORT}; $(LINKER) $^ ${LINKFLAGS} -o $@ )
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
.c.o:
@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &

View file

@ -0,0 +1,125 @@
# Use the MPI-wrappers from the PGI compiler suite.
# requires setting of COMPILER=PGI_MPI_
#
# requires
# sudo apt install librdmacm1
# Details for run time information
# export PGI_ACC_TIME=1
# unset PGI_ACC_TIME
# export PGI_ACC_NOTIFY=1
# export PGI_ACC_NOTIFY=3
# unset PGI_ACC_NOTIFY
PGI_PATH = /opt/pgi/linux86-64/2019/bin
#ifeq "$(HOSTNAME)" "mephisto.uni-graz.at"
# # mephisto
# PGI_PATH = /share/apps/pgi/linux86-64/2016/bin
#endif
#MPI_ROOT=${PGI_PATH}mpi/mpich/bin/
MPI_ROOT= ${PGI_PATH}/../mpi/openmpi-3.1.3/bin/
MPIRUN = ${MPI_ROOT}mpirun
CC = ${MPI_ROOT}mpicc
CXX = ${MPI_ROOT}mpicxx
#F77 = ${MPI_ROOT}mpif77
ifndef LINKER
LINKER = ${CC}
endif
LINKER = ${CXX}
WARNINGS = -Minform=warn
PGI_PROFILING += -Minfo=loop,vect,opt,intensity,mp,accel
#PGI_PROFILING += -Mprof=lines Minfo=ccff
CXXFLAGS += -e3 -std=c++17 -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
CFLAGS += -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
#
# for OpenACC
# Target architecture (nvidia,host)
TA_ARCH = host
#TA_ARCH = nvidia,host
#TA_ARCH = -ta=nvidia:cc2+,cuda5.5,fastmath
#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,cuda5.5,fastmath,keepgpu
#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,fastmath,keepgpu
#,keepgpu
# CFLAGS = -O3 -ta=$(TA_ARCH)
#CFLAGS += -B -gopt $(TA_ARCH)
#CXXFLAGS += -B -gopt $(TA_ARCH)
# -Minfo=all
# libcudart.a is needed for direct CUDA calls
#LINKFLAGS = -gopt $(TA_ARCH) -L${BINDIR}../lib $(PGI_PROFILING)
# -lcudart
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
rm -f ${PROGRAM} ${OBJECTS} *.gpu *gprof.out
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
#run: clean ${PROGRAM}
run: ${PROGRAM}
${MPIRUN} -np 4 ${OPTIRUN} ./${PROGRAM}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# Profiling options PGI, see: pgcollect -help
CPU_PROF = -allcache
GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
#GPU_PROF = -cuda=branch:cc20
#
PROF_FILE = pgprof.out
prof: ${PROGRAM}
# ./$^
# $(CUDA_HOME)/bin/nvvp &
# export LD_LIBRARY_PATH=/state/partition1/apps/pgi/linux86-64/12.9/lib:$LD_LIBRARY_PATH
${OPTIRUN} ${BINDIR}pgcollect $(GPU_PROF) ./$^
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
# Memory checker (slooooow!!!):
# see doc at /usr/local/cuda/doc/cuda-memcheck.pdf
# mem: ${PROGRAM}
# $(CUDA_HOME)memcheck ./$^

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,54 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# COMPILER=GCC_SEQ_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
MAIN = main
SOURCES = ${MAIN}.cpp vdop.cpp geom.cpp par_geom.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = ${MAIN}.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
# -DNDEBUG
# -pg slows down the code on my laptop when using CLANG_
LINKFLAGS += -g
#-pg
#CXXFLAGS += -Q --help=optimizers
#CXXFLAGS += -fopt-info
include ../${COMPILER}default.mk
#############################################################################
# additional specific cleaning in this directory
clean_all::
@rm -f uv.txt
#############################################################################
# special testing
# NPROCS = 4
#
TFILE = t.dat
# TTMP = t.tmp
#
graph: $(PROGRAM)
# @rm -f $(TFILE).*
# next two lines only sequentially
./$(PROGRAM)
@mv $(TFILE).000 $(TFILE)
# $(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
# @echo " "; echo "Manipulate data for graphics."; echo " "
# @cat $(TFILE).* > $(TTMP)
# @sort -b -k 2 $(TTMP) -o $(TTMP).1
# @sort -b -k 1 $(TTMP).1 -o $(TTMP).2
# @awk -f nl.awk $(TTMP).2 > $(TFILE)
# @rm -f $(TTMP).* $(TTMP) $(TFILE).*
#
-gnuplot jac.dem

View file

@ -0,0 +1,43 @@
function [ xc, ia, v ] = ascii_read_meshvector( fname )
%
% Loads the 2D triangular mesh (coordinates, vertex connectivity)
% together with values on its vertices from an ASCII file.
% Matlab indexing is stored (starts with 1).
%
% The input file format is compatible
% with Mesh_2d_3_matlab:Write_ascii_matlab(..) in jacobi_oo_stl/geom.h
%
%
% IN: fname - filename
% OUT: xc - coordinates
% ia - mesh connectivity
% v - solution vector
DELIMETER = ' ';
fprintf('Read file %s\n',fname)
% Read mesh constants
nn = dlmread(fname,DELIMETER,[0 0 0 3]); %% row_1, col_1, row_2, col_2 in C indexing!!!
nnode = nn(1);
ndim = nn(2);
nelem = nn(3);
nvert = nn(4);
% Read coordinates
row_start = 0+1;
row_end = 0+nnode;
xc = dlmread(fname,DELIMETER,[row_start 0 row_end ndim-1]);
% Read connectivity
row_start = row_end+1;
row_end = row_end+nelem;
ia = dlmread(fname,DELIMETER,[row_start 0 row_end nvert-1]);
% Read solution
row_start = row_end+1;
row_end = row_end+nnode;
v = dlmread(fname,DELIMETER,[row_start 0 row_end 0]);
end

View file

@ -0,0 +1,49 @@
function ascii_write_mesh( xc, ia, e, basename)
%
% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
% in an ASCII file.
% Matlab indexing is stored (starts with 1).
%
% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
%
% IN:
% coordinates xc: [2][nnode]
% connectivity ia: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
% basename: file name without extension
%
% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
%
fname = [basename, '.txt'];
nnode = int32(size(xc,2));
ndim = int32(size(xc,1));
nelem = int32(size(ia,2));
nvert_e = int32(3);
dlmwrite(fname,nnode,'delimiter','\t','precision',16) % number of nodes
dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16) % space dimension
dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16) % number of elements
dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16) % number of vertices per element
% dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16) % coordinates
dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
% no subdomain info transferred
tmp=int32(ia(1:3,:));
% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
% store only start and end point of boundary edges,
nbedges = size(e,2);
dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16) % number boundary edges
tmp=int32(e(1:2,:));
% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
end

View file

@ -0,0 +1,51 @@
function ascii_write_subdomains( xc, ia, e, basename)
%
% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
% in an ASCII file.
% Matlab indexing is stored (starts with 1).
%
% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
%
% IN:
% coordinates xc: [2][nnode]
% connectivity ia: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
% basename: file name without extension
%
% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
%
fname = [basename, '_sd.txt'];
nnode = int32(size(xc,2));
ndim = int32(size(xc,1));
nelem = int32(size(ia,2));
nvert_e = int32(3);
% dlmwrite(fname,nnode,'delimiter','\t','precision',16) % number of nodes
% dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16) % space dimension
% dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16) % number of elements
dlmwrite(fname,nelem,'delimiter','\t','precision',16) % number of elements
% dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16) % number of vertices per element
% % dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16) % coordinates
% dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
% subdomain info
tmp=int32(ia(4,:));
% % dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
% dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t') % connectivity in Matlab indexing
% % store only start and end point of boundary edges,
% nbedges = size(e,2);
% dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16) % number boundary edges
% tmp=int32(e(1:2,:));
% % dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
% dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
end

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,712 @@
#ifndef GEOM_FILE
#define GEOM_FILE
#include <array>
#include <functional> // function; C++11
#include <iostream>
#include <memory> // shared_ptr
#include <string>
#include <vector>
/**
* Basis class for finite element meshes.
*/
class Mesh
{
public:
/**
* Constructor initializing the members with default values.
*
* @param[in] ndim space dimensions (dimension for coordinates)
* @param[in] nvert_e number of vertices per element (dimension for connectivity)
* @param[in] ndof_e degrees of freedom per element (= @p nvert_e for linear elements)
* @param[in] nedge_e number of edges per element (= @p nvert_e for linear elements in 2D)
*/
explicit Mesh(int ndim, int nvert_e = 0, int ndof_e = 0, int nedge_e = 0);
__attribute__((noinline))
Mesh(Mesh const &) = default;
Mesh &operator=(Mesh const &) = delete;
/**
* Destructor.
*
* See clang warning on
* <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
*/
virtual ~Mesh();
/**
* Reads mesh data from a binary file.
*
* File format, see ascii_write_mesh.m
*
* @param[in] fname file name
*/
explicit Mesh(std::string const &fname);
/**
* Reads mesh data from a binary file.
*
* File format, see ascii_write_mesh.m
*
* @param[in] fname file name
*/
void ReadVertexBasedMesh(std::string const &fname);
/**
* Number of finite elements in (sub)domain.
* @return number of elements.
*/
int Nelems() const
{
return _nelem;
}
/**
* Global number of vertices for each finite element.
* @return number of vertices per element.
*/
int NverticesElements() const
{
return _nvert_e;
}
/**
* Global number of degrees of freedom (dof) for each finite element.
* @return degrees of freedom per element.
*/
int NdofsElement() const
{
return _ndof_e;
}
/**
* Number of vertices in mesh.
* @return number of vertices.
*/
int Nnodes() const
{
return _nnode;
}
/**
* Space dimension.
* @return number of dimensions.
*/
int Ndims() const
{
return _ndim;
}
/**
* (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
*
* @param[in] nelem number of elements
* @param[in] nvert_e number of vertices per element
*/
void Resize_Connectivity(int nelem, int nvert_e)
{
SetNelem(nelem); // number of elements
SetNverticesElement(nvert_e); // vertices per element
_ia.resize(nelem * nvert_e);
}
/**
* Read connectivity information (g1,g2,g3)_i.
* @return connectivity vector [nelems*ndofs].
*/
const std::vector<int> &GetConnectivity() const
{
return _ia;
}
/**
* Access/Change connectivity information (g1,g2,g3)_i.
* @return connectivity vector [nelems*ndofs].
*/
std::vector<int> &GetConnectivity()
{
return _ia;
}
/**
* (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
*
* @param[in] nnodes number of nodes
* @param[in] ndim space dimension
*/
void Resize_Coords(int nnodes, int ndim)
{
SetNnode(nnodes); // number of nodes
SetNdim(ndim); // space dimension
_xc.resize(nnodes * ndim);
}
/**
* Read coordinates of vertices (x,y)_i.
* @return coordinates vector [nnodes*2].
*/
const std::vector<double> &GetCoords() const
{
return _xc;
}
/**
* Access/Change coordinates of vertices (x,y)_i.
* @return coordinates vector [nnodes*2].
*/
std::vector<double> &GetCoords()
{
return _xc;
}
/**
* Calculate values in vector @p v via function @p func(x,y)
* @param[in] v vector
* @param[in] func function of (x,y) returning a double value.
*/
void SetValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
void SetBoundaryValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
void SetDirchletValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
/**
* Prints the information for a finite element mesh
*/
void Debug() const;
/**
* Prints the edge based information for a finite element mesh
*/
void DebugEdgeBased() const;
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
virtual std::vector<int> Index_DirichletNodes() const;
virtual std::vector<int> Index_BoundaryNodes() const;
/**
* Write vector @p v together with its mesh information to an ASCii file @p fname.
*
* The data are written in C-style.
*
* @param[in] fname file name
* @param[in] v vector
*/
void Write_ascii_matlab(std::string const &fname, std::vector<double> const &v) const;
/**
* Exports the mesh information to ASCii files @p basename + {_coords|_elements}.txt.
*
* The data are written in C-style.
*
* @param[in] basename first part of file names
*/
void Export_scicomp(std::string const &basename) const;
/**
* Visualize @p v together with its mesh information via matlab or octave.
*
* Comment/uncomment those code lines in method Mesh:Visualize (geom.cpp)
* that are supported on your system.
*
* @param[in] v vector
*
* @warning matlab files ascii_read_meshvector.m visualize_results.m
* must be in the executing directory.
*/
void Visualize(std::vector<double> const &v) const;
/**
* Global number of edges.
* @return number of edges in mesh.
*/
int Nedges() const
{
return _nedge;
}
/**
* Global number of edges for each finite element.
* @return number of edges per element.
*/
int NedgesElements() const
{
return _nedge_e;
}
/**
* Read edge connectivity information (e1,e2,e3)_i.
* @return edge connectivity vector [nelems*_nedge_e].
*/
const std::vector<int> &GetEdgeConnectivity() const
{
return _ea;
}
/**
* Access/Change edge connectivity information (e1,e2,e3)_i.
* @return edge connectivity vector [nelems*_nedge_e].
*/
std::vector<int> &GetEdgeConnectivity()
{
return _ea;
}
/**
* Read edge information (v1,v2)_i.
* @return edge connectivity vector [_nedge*2].
*/
const std::vector<int> &GetEdges() const
{
return _edges;
}
/**
* Access/Change edge information (v1,v2)_i.
* @return edge connectivity vector [_nedge*2].
*/
std::vector<int> &GetEdges()
{
return _edges;
}
/**
* Determines all node to node connections from the vertex based mesh.
*
* @return vector[k][] containing all connections of vertex k, including to itself.
*/
std::vector<std::vector<int>> Node2NodeGraph() const
{
//// Check version 2 wrt. version 1
//auto v1=Node2NodeGraph_1();
//auto v2=Node2NodeGraph_2();
//if ( equal(v1.cbegin(),v1.cend(),v2.begin()) )
//{
//std::cout << "\nidentical Versions\n";
//}
//else
//{
//std::cout << "\nE R R O R in Versions\n";
//}
//return Node2NodeGraph_1();
return Node2NodeGraph_2(); // 2 times faster than version 1
}
/**
* Accesses the father-of-nodes relation.
*
* @return vector of length 0 because no relation available.
*
*/
virtual std::vector<int> const &GetFathersOfVertices() const
{
return _dummy;
}
/**
* Deletes all edge connectivity information (saves memory).
*/
void Del_EdgeConnectivity();
protected:
//public:
void SetNelem(int nelem)
{
_nelem = nelem;
}
void SetNverticesElement(int nvert)
{
_nvert_e = nvert;
}
void SetNdofsElement(int ndof)
{
_ndof_e = ndof;
}
void SetNnode(int nnode)
{
_nnode = nnode;
}
void SetNdim(int ndim)
{
_ndim = ndim;
}
void SetNedge(int nedge)
{
_nedge = nedge;
}
/**
* Reads vertex based mesh data from a binary file.
*
* File format, see ascii_write_mesh.m
*
* @param[in] fname file name
*/
void ReadVectexBasedMesh(std::string const &fname);
/**
* The vertex based mesh data are used to derive the edge based data.
*
* @warning Exactly 3 vertices, 3 edges per element are assumed (linear triangle in 2D)
*/
void DeriveEdgeFromVertexBased()
{
//DeriveEdgeFromVertexBased_slow();
//DeriveEdgeFromVertexBased_fast();
DeriveEdgeFromVertexBased_fast_2();
}
void DeriveEdgeFromVertexBased_slow();
void DeriveEdgeFromVertexBased_fast();
void DeriveEdgeFromVertexBased_fast_2();
/**
* The edge based mesh data are used to derive the vertex based data.
*
* @warning Exactly 3 vertices, 3 edges per element are assumed (linear triangle in 2D)
*/
void DeriveVertexFromEdgeBased();
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
int Nnbedges() const
{
return static_cast<int>(_bedges.size());
}
/**
* Checks whether the array dimensions fit to their appropriate size parameters
* @return index vector.
*/
virtual bool Check_array_dimensions() const;
/**
* Permutes the vertex information in an edge based mesh.
*
* @param[in] old2new new indices of original vertices.
*/
void PermuteVertices_EdgeBased(std::vector<int> const &old2new);
private:
/**
* Determines all node to node connections from the vertex based mesh.
*
* @return vector[k][] containing all connections of vertex k, including to itself.
*/
std::vector<std::vector<int>> Node2NodeGraph_1() const; // is correct
/**
* Determines all node to node connections from the vertex based mesh.
*
* Faster than @p Node2NodeGraph_1().
*
* @return vector[k][] containing all connections of vertex k, including to itself.
*/
std::vector<std::vector<int>> Node2NodeGraph_2() const; // is correct
//private:
protected:
int _nelem; //!< number elements
int _nvert_e; //!< number of vertices per element
int _ndof_e; //!< degrees of freedom (d.o.f.) per element
int _nnode; //!< number nodes/vertices
int _ndim; //!< space dimension of the problem (1, 2, or 3)
std::vector<int> _ia; //!< element connectivity
std::vector<double> _xc; //!< coordinates
protected:
// B.C.
std::vector<int> _bedges; //!< boundary edges [nbedges][2] storing start/end vertex
// 2020-01-08
std::vector<int> _sdedges; //!< boundary edges [nbedges][2] with left/right subdomain number
//private:
protected:
// edge based connectivity
int _nedge; //!< number of edges in mesh
int _nedge_e; //!< number of edges per element
std::vector<int> _edges; //!< edges of mesh (vertices ordered ascending)
std::vector<int> _ea; //!< edge based element connectivity
// B.C.
std::vector<int> _ebedges; //!< boundary edges [nbedges]
private:
const std::vector<int> _dummy; //!< empty dummy vector
};
// *********************************************************************
class RefinedMesh: public Mesh
{
public:
/**
* Constructs a refined mesh according to the marked elements in @p ibref.
*
* If the vector @p ibref has size 0 then all elements will be refined.
*
* @param[in] cmesh original mesh for coarsening.
* @param[in] ibref vector containing True/False regarding refinement for each element
*
*/
//explicit RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref = std::vector<bool>(0));
RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref);
//RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref);
/**
* Constructs a refined mesh by regulare refinement of all elements.
*
* @param[in] cmesh original mesh for coarsening.
*
*/
explicit RefinedMesh(Mesh const &cmesh)
: RefinedMesh(cmesh, std::vector<bool>(0))
{}
RefinedMesh(RefinedMesh const &) = delete;
//RefinedMesh(RefinedMesh const&&) = delete;
RefinedMesh &operator=(RefinedMesh const &) = delete;
//RefinedMesh& operator=(RefinedMesh const&&) = delete;
/**
* Destructor.
*/
virtual ~RefinedMesh() override;
/**
* Refines the mesh according to the marked elements.
*
* @param[in] ibref vector containing True/False regarding refinement for each element
*
* @return the refined mesh
*
*/
Mesh RefineElements(std::vector<bool> const &ibref);
/**
* Refines all elements in the actual mesh.
*
* @param[in] nref number of regular refinements to perform
*
*/
void RefineAllElements(int nref = 1);
/**
* Accesses the father-of-nodes relation.
*
* @return father-of-nodes relation [nnodes][2]
*
*/
std::vector<int> const &GetFathersOfVertices() const override
{
return _vfathers;
}
protected:
/**
* Checks whether the array dimensions fit to their appropriate size parameters
* @return index vector.
*/
bool Check_array_dimensions() const override;
/**
* Permutes the vertex information in an edge based mesh.
*
* @param[in] old2new new indices of original vertices.
*/
void PermuteVertices_EdgeBased(std::vector<int> const &old2new);
private:
//Mesh const & _cmesh; //!< coarse mesh
std::vector<bool> const _ibref; //!< refinement info
int _nref; //!< number of regular refinements performed
std::vector<int> _vfathers; //!< stores the 2 fathers of each vertex (equal fathers denote original coarse vertex)
};
// *********************************************************************
class gMesh_Hierarchy
{
public:
/**
* Constructs mesh hierarchy of @p nlevel levels starting with coarse mesh @p cmesh.
* The coarse mesh @p cmesh will be @p nlevel-1 times geometrically refined.
*
* @param[in] cmesh initial coarse mesh
* @param[in] nlevel number levels in mesh hierarchy
*
*/
gMesh_Hierarchy(Mesh const &cmesh, int nlevel);
size_t size() const
{
return _gmesh.size();
}
/**
* Access to mesh @p lev from mesh hierarchy.
*
* @return mesh @p lev
* @warning An out_of_range exception might be thrown.
*
*/
Mesh const &operator[](int lev) const
{
return *_gmesh.at(lev);
}
/**
* Access to finest mesh in mesh hierarchy.
*
* @return finest mesh
*
*/
Mesh const &finest() const
{
return *_gmesh.back();
}
/**
* Access to coarest mesh in mesh hierarchy.
*
* @return coarsest mesh
*
*/
Mesh const &coarsest() const
{
return *_gmesh.front();
}
private:
std::vector<std::shared_ptr<Mesh>> _gmesh; //!< mesh hierarchy from coarse ([0]) to fine.
};
// *********************************************************************
/**
* 2D finite element mesh of the square consisting of linear triangular elements.
*/
class Mesh_2d_3_square: public Mesh
{
public:
/**
* Generates the f.e. mesh for the unit square.
*
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[in] myid my MPI-rank / subdomain
* @param[in] procx number of ranks/subdomains in x-direction
* @param[in] procy number of processes in y-direction
*/
Mesh_2d_3_square(int nx, int ny, int myid = 0, int procx = 1, int procy = 1);
/**
* Destructor
*/
~Mesh_2d_3_square() override;
/**
* Set solution vector based on a tensor product grid in the rectangle.
* @param[in] u solution vector
*/
void SetU(std::vector<double> &u) const;
/**
* Set right hand side (rhs) vector on a tensor product grid in the rectangle.
* @param[in] f rhs vector
*/
void SetF(std::vector<double> &f) const;
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
std::vector<int> Index_DirichletNodes() const override;
std::vector<int> Index_BoundaryNodes() const override;
/**
* Stores the values of vector @p u of (sub)domain into a file @p name for further processing in gnuplot.
* The file stores rowise the x- and y- coordinates together with the value from @p u .
* The domain [@p xl, @p xr] x [@p yb, @p yt] is discretized into @p nx x @p ny intervals.
*
* @param[in] name basename of file name (file name will be extended by the rank number)
* @param[in] u local vector
*
* @warning Assumes tensor product grid in unit square; rowise numbered
* (as generated in class constructor).
* The output is provided for tensor product grid visualization
* ( similar to Matlab-surf() ).
*
* @see Mesh_2d_3_square
*/
void SaveVectorP(std::string const &name, std::vector<double> const &u) const;
// here will still need to implement in the class
// GetBound(), AddBound()
// or better a generalized way with indices and their appropriate ranks for MPI communication
private:
/**
* Determines the coordinates of the discretization nodes of the domain [@p xl, @p xr] x [@p yb, @p yt]
* which is discretized into @p nx x @p ny intervals.
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[in] xl x-coordinate of left boundary
* @param[in] xr x-coordinate of right boundary
* @param[in] yb y-coordinate of lower boundary
* @param[in] yt y-coordinate of upper boundary
* @param[out] xc coordinate vector of length 2n with x(2*k,2*k+1) as coordinates of node k
*/
void GetCoordsInRectangle(int nx, int ny, double xl, double xr, double yb, double yt,
double xc[]);
/**
* Determines the element connectivity of linear triangular elements of a FEM discretization
* of a rectangle using @p nx x @p ny equidistant intervals for discretization.
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[out] ia element connectivity matrix with ia(3*s,3*s+1,3*s+2) as node numbers od element s
*/
void GetConnectivityInRectangle(int nx, int ny, int ia[]);
private:
int _myid; //!< my MPI rank
int _procx; //!< number of MPI ranks in x-direction
int _procy; //!< number of MPI ranks in y-direction
std::array<int, 4> _neigh; //!< MPI ranks of neighbors (negative: no neighbor but b.c.)
int _color; //!< red/black coloring (checker board) of subdomains
double _xl; //!< x coordinate of lower left corner of square
double _xr; //!< x coordinate of lower right corner of square
double _yb; //!< y coordinate or lower left corner of square
double _yt; //!< y coordinate of upper right corner of square
int _nx; //!< number of intervals in x-direction
int _ny; //!< number of intervals in y-direction
};
// *********************************************************************
#endif

View file

@ -0,0 +1,105 @@
// MPI code in C++.
// See [Gropp/Lusk/Skjellum, "Using MPI", p.33/41 etc.]
// and /opt/mpich/include/mpi2c++/comm.h for details
#include "geom.h"
#include "par_geom.h"
#include "vdop.h"
#include <cassert>
#include <cmath>
#include <iostream>
#include <mpi.h> // MPI
#include <omp.h> // OpenMP
using namespace std;
int main(int argc, char **argv )
{
MPI_Init(&argc, &argv);
MPI_Comm const icomm(MPI_COMM_WORLD);
omp_set_num_threads(1); // don't use OMP parallelization for a start
//
{
int np;
MPI_Comm_size(icomm, &np);
// assert(4 == np); // example is only provided for 4 MPI processes
}
// #####################################################################
// ---- Read the f.e. mesh and the mapping of elements to MPI processes
//Mesh const mesh_c("square_4.txt"); // Files square_4.txt and square_4_sd.txt are needed
ParMesh const mesh("square",icomm);
int const numprocs = mesh.NumProcs();
int const myrank = mesh.MyRank();
if ( 0 == myrank ) {
cout << "\n There are " << numprocs << " processes running.\n \n";
}
int const check_rank=1; // choose the MPI process you would like to check the mesh
//if ( check_rank == myrank ) mesh.Debug();
//if ( check_rank == myrank ) mesh.DebugEdgeBased();
// ##########################################################################
// ---- allocate local vectors and check skalar product and vector accumulation
if (check_rank==myrank) {printf("\n\n-------------- Task 9 --------------\n\n");}
if (check_rank==myrank) cout << "Mesh coordinates: " << mesh.GetCoords() << endl << endl;
MPI_Barrier(icomm);
vector<double> xl(mesh.Nnodes(), 1.0);
// for visualization I had to type in terminal:
// export LIBGL_ALWAYS_SOFTWARE=1
if (check_rank==myrank) mesh.Visualize(xl);
double ss = mesh.dscapr(xl,xl);
cout << myrank << " : scalar : " << ss << endl;
mesh.VecAccu(xl);
if (check_rank==myrank) mesh.Visualize(xl);
MPI_Barrier(icomm);
if (check_rank==myrank) {printf("\n\n-------------- Task 10 --------------\n\n");}
vector<int> y(mesh.Nnodes(), 1);
mesh.VecAccuInt(y);
if (check_rank==myrank) {
printf("Accumulated integer vector y:\n");
for (int i : y) {
cout << i << " ";
}
}
MPI_Barrier(icomm);
if (check_rank==myrank) {printf("\n\n-------------- Task 11 --------------\n\n");}
int global_nodes = mesh.GlobalNodes();
if (check_rank==myrank) cout << "Global nodes: " << global_nodes << endl;
MPI_Barrier(icomm);
if (check_rank==myrank) {printf("\n\n-------------- Task 12 --------------\n\n");}
// Set xl to 1s vector again
for (size_t k=0; k<xl.size(); ++k)
{
xl[k] = 1.0;
}
if (check_rank==myrank) mesh.Visualize(xl);
mesh.Average(xl);
if (check_rank==myrank) mesh.Visualize(xl);
// -------------- Task 13 --------------
// Should work with 2, 4 and 6 subdomains (change run target in GCC_default.mk)
// Check subdomains with different values for check_rank (0-5)
MPI_Finalize();
return 0;
}

View file

@ -0,0 +1,626 @@
// see: http://llvm.org/docs/CodingStandards.html#include-style
#include "vdop.h"
//#include "geom.h"
#include "par_geom.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <cmath>
#include <ctime> // contains clock()
#include <fstream>
#include <iostream>
#include <list>
#include <numeric> // accumulate()
#include <string>
#include <vector>
using namespace std;
ParMesh::ParMesh(int ndim, int nvert_e, int ndof_e, int nedge_e, MPI_Comm const &icomm)
: Mesh(ndim, nvert_e, ndof_e, nedge_e),
_icomm(icomm), _numprocs(-1), _myrank(-1),
_v_l2g(0), _t_l2g(0), _v_g2l{{}}, _t_g2l{{}}, _valence(0),
_sendbuf(0), _sendcounts(0), _sdispls(0),
_loc_itf(0), _gloc_itf(0), _buf2loc(0)
{
MPI_Comm_size(icomm, &_numprocs);
MPI_Comm_rank(icomm, &_myrank);
}
ParMesh::~ParMesh()
{}
ParMesh::ParMesh(std::string const &sname, MPI_Comm const &icomm)
: ParMesh(2, 3, 3, 3, icomm) // two dimensions, 3 vertices, 3 dofs, 3 edges per element
{
//const int numprocs = _icomm.Get_size();
const string NS = "_" + to_string(_numprocs);
const string fname = sname + NS + ".txt";
//cout << "############ " << fname << endl;
ReadVertexBasedMesh(fname);
cout << "\n End of sequential File read \n";
// ------------------------------------------------------------------------------
// Until this point a l l processes possess a l l mesh info in g l o b a l numbering
//
// Now, we have to select the data belonging to my_rank
// and we have to create the mapping local to global (l2g) and vice versa (g2l)
// ------------------------------------------------------------------------------
// save the global node mesh (maybe we need it later)
DeriveEdgeFromVertexBased(); // and even more
Mesh global_mesh(*this); // requires a l o t of memory
Del_EdgeConnectivity();
// read the subdomain info
const string dname = sname + NS + "_sd" + ".txt";
vector<int> t2d = ReadElementSubdomains(dname); // global mapping triangle to subdomain for all elements
//const int myrank = _icomm.Get_rank();
Transform_Local2Global_Vertex(_myrank, t2d); // Vertex based mesh: now in l o c a l indexing
DeriveEdgeFromVertexBased(); // Generate also the l o c a l edge based information
Generate_VectorAdd();
// Now we have to organize the MPI communication of vertices on the subdomain interfaces
return;
}
vector<int> ParMesh::ReadElementSubdomains(string const &dname)
{
ifstream ifs(dname);
if (!(ifs.is_open() && ifs.good())) {
cerr << "ParMesh::ReadElementSubdomain: Error cannot open file " << dname << endl;
assert(ifs.is_open());
}
int const OFFSET{1}; // Matlab to C indexing
cout << "ASCI file " << dname << " opened" << endl;
// Read some mesh constants
int nelem;
ifs >> nelem;
cout << nelem << " " << Nelems() << endl;
assert( Nelems() == nelem);
// Allocate memory
vector<int> t2d(nelem, -1);
// Read element mapping
for (int k = 0; k < nelem; ++k) {
int tmp;
ifs >> tmp;
//t2d[k] = tmp - OFFSET;
// 2020-01-08
t2d[k] = min(tmp, NumProcs()) - OFFSET;
}
return t2d;
}
void ParMesh::Transform_Local2Global_Vertex(int const myrank, vector<int> const &t2d)
{
// number of local elements
const int l_ne = count(t2d.cbegin(), t2d.cend(), myrank);
//cout << myrank << ":: " << lne << endl;
vector<int> l_ia(l_ne * NverticesElements(), -1); // local elements still with global vertex numbers
_t_l2g.resize(l_ne, -1);
int lk = 0;
for (size_t k = 0; k < t2d.size(); ++k) {
if (myrank == t2d[k]) {
//if (0==myrank)
//{
//cout << lk << " k " << t2d[k] << endl;
//}
l_ia[3 * lk ] = _ia[3 * k ];
l_ia[3 * lk + 1] = _ia[3 * k + 1];
l_ia[3 * lk + 2] = _ia[3 * k + 2]; // local elements still with global vertex numbers
_t_l2g[lk] = k; // elements: local to global mapping
_t_g2l[k] = lk; // global to local
++lk;
}
}
// Checks:
assert( count(l_ia.cbegin(), l_ia.cend(), -1) == 0 );
assert( count(_t_l2g.cbegin(), _t_l2g.cend(), -1) == 0 );
// Vertices: local to global mapping
auto tmp = l_ia;
sort(tmp.begin(), tmp.end());
auto ip = unique(tmp.begin(), tmp.end());
tmp.erase(ip, tmp.end());
_v_l2g = tmp; // Vertices: local to global mapping
for (size_t lkv = 0; lkv < _v_l2g.size(); ++lkv) {
_v_g2l[_v_l2g[lkv]] = lkv; // global to local
}
// Boundary edges
vector<int> l_bedges;
vector<int> l_sdedges;
for (size_t b = 0; b < _bedges.size(); b += 2) {
int const v1 = _bedges[b ]; // global vertex numbers
int const v2 = _bedges[b + 1];
try {
int const lv1 = _v_g2l.at(v1); // map[] would add that element
int const lv2 = _v_g2l.at(v2); // but at() throws an exeption
l_bedges.push_back(lv1);
l_bedges.push_back(lv2); // Boundaries: already in local indexing
// 2020-01-08
l_sdedges.push_back(_sdedges[b ]);
l_sdedges.push_back(_sdedges[b+1]);
}
catch (std::out_of_range & err) {
//cerr << ".";
}
}
// number of local vertices
const int l_nn = _v_l2g.size();
vector<double> l_xc(Ndims()*l_nn);
for (int lkk = 0; lkk < l_nn; ++lkk) {
int k = _v_l2g.at(lkk);
l_xc[2 * lkk ] = _xc[2 * k ];
l_xc[2 * lkk + 1] = _xc[2 * k + 1];
}
// Now, we represent the vertex mesh in l o c a l numbering
// elements
for (size_t i = 0; i < l_ia.size(); ++i) {
l_ia[i] = _v_g2l.at(l_ia[i]); // element vertices: global to local
}
SetNelem(l_ne);
_ia = l_ia;
// boundary
_bedges = l_bedges;
_sdedges = l_sdedges;
// coordinates
SetNnode(l_nn);
_xc = l_xc;
return;
}
void ParMesh::Generate_VectorAdd()
{
// Some checks
int lnn = Nnodes(); // local number of vertices
assert(static_cast<int>(_v_l2g.size()) == lnn);
int ierr{-12345};
// ---- Determine global largest vertex index
int gidx_max{-1}; // global largest vertex index
int lmax = *max_element(_v_l2g.cbegin(), _v_l2g.cend());
MPI_Allreduce(&lmax, &gidx_max, 1, MPI_INT, MPI_MAX, _icomm);
int gidx_min{-1}; // global smallest vertex index
int lmin = *min_element(_v_l2g.cbegin(), _v_l2g.cend());
MPI_Allreduce(&lmin, &gidx_min, 1, MPI_INT, MPI_MIN, _icomm);
//cout << gidx_min << " " << gidx_max << endl;
assert(0 == gidx_min); // global indices have to start with 0
// ---- Determine for all global vertices the number of subdomains it belongs to
vector<int> global(gidx_max+1, 0); // global scalar array for vertices
for (auto const gidx : _v_l2g) global[gidx] = 1;
// https://www.mpi-forum.org/docs/mpi-2.2/mpi22-report/node109.htm
ierr = MPI_Allreduce(MPI_IN_PLACE, global.data(), global.size(), MPI_INT, MPI_SUM, _icomm);
//if (0 == MyRank()) cout << global << endl;
//MPI_Barrier(_icomm);
//cout << _xc[2*_v_g2l.at(2)] << " , " << _xc[2*_v_g2l.at(2)+1] << endl;
//MPI_Barrier(_icomm);
// now, global[] contains the number of subdomains a global vertex belongs to
if ( count(global.cbegin(), global.cend(), 0) > 0 )
cerr << "\n !!! Non-continuous global vertex indexing !!!\n";
// ---- Determine local interface vertices ( <==> global[] > 1 )
// _loc_itf, neigh_itf
//vector<int> loc_itf; // local indices of interface vertices on this MPI process
for (size_t lk = 0; lk < _v_l2g.size(); ++lk) {
int const gk = _v_l2g[lk]; // global index of local vertex lk
if ( global[gk] > 1 ) {
_loc_itf.push_back(lk); // local indices of interface vertices on this MPI process
}
}
//MPI_Barrier(_icomm);
//if (0 == MyRank()) cout << "\n..._loc_itf...\n" << _loc_itf << "\n......\n";
//MPI_Barrier(_icomm);
// ---- global indices of local interface vertices
//auto gloc_itf(_loc_itf);
_gloc_itf=_loc_itf;
for_each(_gloc_itf.begin(), _gloc_itf.end(), [this] (auto & v) -> void { v = _v_l2g[v];} );
//MPI_Barrier(_icomm);
//if (0 == MyRank()) cout << "\n..._gloc_itf...\n" << _gloc_itf << "\n......\n";
//DebugVector(_gloc_itf,"_gloc_itf");
// ---- Determine the global length of interfaces
vector<int> vnn(NumProcs(), -1); // number of interface vertices per MPI rank
int l_itf(_loc_itf.size()); // # local interface vertices
ierr = MPI_Allgather(&l_itf, 1, MPI_INT, vnn.data(), 1, MPI_INT, _icomm);
assert(0 == ierr);
//cout << vnn << endl;
// ---- Now we consider only the inferface vertices
int snn = accumulate(vnn.cbegin(), vnn.cend(), 0); // required length of array for global interface indices
//cout << snn << " " << gnn << endl;
vector<int> dispnn(NumProcs(), 0) ; // displacement of interface vertices per MPI rank
partial_sum(vnn.cbegin(), vnn.cend() - 1, dispnn.begin() + 1);
//cout << dispnn << endl;
// ---- Get the global indices for all global interfaces
vector<int> g_itf(snn, -1); // collects all global indices of the global interfaces
// https://www.mpich.org/static//docs/v3.0.x/www3/MPI_Gatherv.html
ierr = MPI_Gatherv( _gloc_itf.data(), _gloc_itf.size(), MPI_INT,
g_itf.data(), vnn.data(), dispnn.data(), MPI_INT, 0, _icomm);
assert(0 == ierr);
// https://www.mpich.org/static/docs/v3.1/www3/MPI_Bcast.html
ierr = MPI_Bcast(g_itf.data(), g_itf.size(), MPI_INT, 0, _icomm);
assert(0 == ierr); // Now, each MPI rank has the all global indices of the global interfaces
//MPI_Barrier(_icomm);
//if (MyRank() == 0) cout << "\n...g_itf...\n" << g_itf << "\n......\n";
//MPI_Barrier(_icomm);
// ----- Determine all MPI ranks a local interface vertex belongs to
vector<vector<int>> neigh_itf(_loc_itf.size());// subdomains a local interface vertex belongs to
for (size_t lk = 0; lk < _loc_itf.size(); ++lk) {
const int gvert = _gloc_itf[lk]; // global index of local interface node lk
for (int rank = 0; rank < NumProcs(); ++rank) {
auto const startl = g_itf.cbegin() + dispnn[rank];
auto const endl = startl + vnn[rank];
if ( find( startl, endl, gvert) != endl) {
neigh_itf[lk].push_back(rank);
}
}
}
// ---- check the available info in _loc_itf[lk], _gloc_itf[lk], neigh_itf[lk]
//MPI_Barrier(_icomm);
////if (MyRank()==0) cout << "\n...neigh_itf ...\n" << neigh_itf << endl;
//if (MyRank() == 0) {
//for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
//cout << lk << " : local idx " << _loc_itf[lk] << " , global idx " << _gloc_itf[lk];
//cout << " with MPI ranks " << neigh_itf[lk] << endl;
//}
//}
//MPI_Barrier(_icomm);
// ---- store the valence (e.g., the number of subdomains it belongs to) of all local vertices
_valence.resize(Nnodes(),1);
for (size_t lk = 0; lk < _loc_itf.size(); ++lk)
{
_valence[_loc_itf[lk]] = neigh_itf[lk].size();
}
//DebugVector(_valence,"_valence",_icomm);
// ---- We ware going to use MPI_Alltoallv for data exchange on interfaces
// https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node109.htm#Node109
// https://www.open-mpi.org/doc/v4.0/man3/MPI_Alltoallv.3.php
//int MPI_Alltoallv(const void* sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void* recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm)
//
// MPI_Alltoallv needs:
// vector<double> sendbuf (MPI_IN_PLACE: used also as recvbuf)
// vector<int> sendcounts (the same as for recv)
// vector<int> sdispls (the same as for recv)
//
// We need to map the interface vertices onto the sendbuffer:
// vector<int> loc_itf local index of interface vertex lk
// vector<int> gloc_itf global index of interface vertex lk
// vector<int> buf2loc local indices of sendbuffer positions (the same as for recv)
// ---- Determine sendcounts[] and sdipls[] from neigh_itf[]
//vector<int> _sendcounts(NumProcs(), 0);
_sendcounts.resize(NumProcs(), 0);
for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
auto const &kneigh = neigh_itf[lk];
for (size_t ns = 0; ns < kneigh.size(); ++ns) {
++_sendcounts[kneigh[ns]];
}
}
//if (MyRank() == 0) cout << "\n..._sendcounts ...\n" << _sendcounts << endl;
//vector<int> _sdispls(NumProcs(), 0);
_sdispls.resize(NumProcs(), 0);
partial_sum(_sendcounts.cbegin(), _sendcounts.cend() - 1, _sdispls.begin() + 1);
//vector<int> _sdispls(NumProcs()+1, 0);
//partial_sum(_sendcounts.cbegin(), _sendcounts.cend(), _sdispls.begin() + 1);
//if (MyRank() == 0) cout << "\n..._sdispls ...\n" << _sdispls << endl;
// ---- Determine size of buffer 'nbuffer' and mapping 'buf2loc'
int const nbuffer = accumulate(_sendcounts.cbegin(), _sendcounts.cend(), 0);
//vector<int> _buf2loc(nbuffer, -1);
_buf2loc.resize(nbuffer, -1);
int buf_idx = 0; // position in buffer
for (int rank = 0; rank < NumProcs(); ++rank) {
assert( buf_idx == _sdispls[rank]);
for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
auto const &kneigh = neigh_itf[lk];
if (find(kneigh.cbegin(),kneigh.cend(),rank)!=kneigh.cend())
{
_buf2loc[buf_idx] = _loc_itf[lk];
++buf_idx;
}
}
}
//if (MyRank() == 0) cout << "\n...buf2loc ...\n" << buf2loc << endl;
//DebugVector(buf2loc,"buf2loc",_icomm);
// ---- Allocate send/recv buffer
//vector<double> _sendbuf(nbuffer,-1.0);
_sendbuf.resize(nbuffer,-1.0);
assert(CheckInterfaceExchange_InPlace());
cout << " Check of data exchange (InPlace) successful!\n";
assert(CheckInterfaceExchange());
cout << " Check of data exchange successful!\n";
assert(CheckInterfaceAdd_InPlace());
cout << " Check of data add successful!\n";
assert(CheckInterfaceAdd());
cout << " Check of data add (InPlace) successful!\n";
vector<double> x(Nnodes(),-1.0);
VecAccu(x);
cout << " VecAccu (InPlace) successful!\n";
return;
}
bool ParMesh::CheckInterfaceExchange_InPlace() const
{
vector<double> x(Nnodes(),-1.0);
copy(_v_l2g.cbegin(),_v_l2g.cend(),x.begin()); // init x with global vertex indices
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = x[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
vector<double> y(x);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = -1.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
y[_buf2loc.at(ls)] = _sendbuf[ls];
}
double const eps=1e-10;
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
return bv;
}
bool ParMesh::CheckInterfaceExchange() const
{
vector<double> x(Nnodes(),-1.0);
copy(_v_l2g.cbegin(),_v_l2g.cend(),x.begin()); // init x with global vertex indices
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = x[_buf2loc.at(ls)];
}
vector<double> recvbuf(_sendbuf.size());
int ierr = MPI_Alltoallv(_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
recvbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
//DebugVector(recvbuf,"recvbuf",_icomm);
assert(ierr==0);
vector<double> y(x);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = -1.0; // only for interface nodes
for(size_t ls = 0; ls<recvbuf.size(); ++ls)
{
y[_buf2loc.at(ls)] = recvbuf[ls];
}
//cout << "WRONG : " << count(y.cbegin(),y.cend(), -1.0) << endl;
double const eps=1e-10;
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
return bv;
}
bool ParMesh::CheckInterfaceAdd_InPlace() const
{
vector<double> x(Nnodes(),-1.0);
for (size_t i=0; i<x.size(); ++i)
{
x[i] = _xc[2*i]+_xc[2*i+1]; // init x with coordinate values
}
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = x[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
vector<double> y(x);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
y[_buf2loc.at(ls)] += _sendbuf[ls];
}
MPI_Barrier(_icomm);
//DebugVector(x,"x",_icomm);
//DebugVector(y,"y",_icomm);
for (size_t i= 0; i<y.size(); ++i) y[i]/=_valence[i]; // divide by valence
double const eps=1e-10;
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
return bv;
}
bool ParMesh::CheckInterfaceAdd() const
{
vector<double> x(Nnodes(),-1.0);
for (size_t i=0; i<x.size(); ++i)
{
//x[i] = _xc[2*i]+_xc[2*i+1]; // init x with coordinate values
x[i] = _v_l2g[i];
}
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = x[_buf2loc.at(ls)];
}
vector<double> recvbuf(_sendbuf.size());
int ierr = MPI_Alltoallv(_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
recvbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
//DebugVector(recvbuf,"recvbuf",_icomm);
assert(ierr==0);
vector<double> y(x);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<recvbuf.size(); ++ls)
{
//if (0==MyRank()) cout << ls << ": " << _buf2loc.at(ls) << " " << y[_buf2loc.at(ls)] << "("<< x[_buf2loc.at(ls)] << ")" << " " << recvbuf[ls] << " (" << _sendbuf[ls] << ")" << endl;
y[_buf2loc.at(ls)] += recvbuf[ls];
}
MPI_Barrier(_icomm);
//DebugVector(x,"x",_icomm);
//DebugVector(y,"y",_icomm);
for (size_t i= 0; i<y.size(); ++i) y[i]/=_valence[i]; // divide by valence
double const eps=1e-10;
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
return bv;
}
// ----------
void ParMesh::VecAccu(std::vector<double> &w) const
{
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = w[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
w[_buf2loc.at(ls)] += _sendbuf[ls];
}
return;
}
// ##########################################################################
// ##########################################################################
// ---- EX10 ----
void ParMesh::VecAccuInt(std::vector<int> &w) const
{
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = w[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
w[_buf2loc.at(ls)] += _sendbuf[ls];
}
return;
}
// ---- EX11 ----
int ParMesh::GlobalNodes() const
{
int local_count = 0;
for (int i=0; i<Nnodes(); ++i) {
local_count += 1.0 / _valence[i];
}
int global_nodes = 0;
MPI_Allreduce(&local_count, &global_nodes, 1, MPI_INT, MPI_SUM, _icomm);
return global_nodes;
}
// ---- EX12 ----
void ParMesh::Average(std::vector<double> &w) const
{
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = w[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
w[_buf2loc.at(ls)] += _sendbuf[ls];
}
// Divide interface nodes value by its valence
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] /= _valence[_loc_itf.at(lk)];
}

View file

@ -0,0 +1,150 @@
#ifndef PAR_GEOM_FILE
#define PAR_GEOM_FILE
#include "geom.h"
#include "vdop.h"
#include <array>
#include <functional> // function; C++11
#include <iostream>
#include <map>
#include <memory> // shared_ptr
#include <mpi.h> // MPI
#include <string>
#include <vector>
class ParMesh: public Mesh
{
public:
/**
* Constructor initializing the members with default values.
*
* @param[in] ndim space dimensions (dimension for coordinates)
* @param[in] nvert_e number of vertices per element (dimension for connectivity)
* @param[in] ndof_e degrees of freedom per element (= @p nvert_e for linear elements)
* @param[in] nedge_e number of edges per element (= @p nvert_e for linear elements in 2D)
* @param[in] icomm MPI communicator
*/
explicit ParMesh(int ndim, int nvert_e = 0, int ndof_e = 0, int nedge_e = 0, MPI_Comm const &icomm = MPI_COMM_WORLD);
ParMesh(ParMesh const &) = default;
ParMesh &operator=(ParMesh const &) = delete;
/**
* Destructor.
*
* See clang warning on
* <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
*/
virtual ~ParMesh();
/**
* Reads mesh data from a binary file.
*
* @param[in] sname suffix of file name
* @param[in] icomm MPI communicator
* @see ascii_write_mesh.m for the file format.
*/
explicit ParMesh(std::string const &sname, MPI_Comm const &icomm = MPI_COMM_WORLD);
void VecAccu(std::vector<double> &w) const;
void VecAccuInt(std::vector<int> &w) const;
int GlobalNodes() const;
void Average(std::vector<double> &w) const;
/** Inner product
* @param[in] x vector
* @param[in] y vector
* @return resulting Euclidian inner product <x,y>
*/
double dscapr(std::vector<double> const &x, std::vector<double> const &y) const
{
return par_scalar(x, y, _icomm);
}
private:
/**
* Reads the global triangle to subdomain mapping.
*
* @param[in] dname file name
*
* @see ascii_write_subdomains.m for the file format
*/
std::vector<int> ReadElementSubdomains(std::string const &dname);
/**
* Transform
*
* @param[in] myrank MPI rank of this process
* @param[in] t2d global mapping triangle to subdomain for all elements (vertex based)
*/
void Transform_Local2Global_Vertex(int myrank, std::vector<int> const &t2d);
/**
* Transform
*/
void Generate_VectorAdd();
bool CheckInterfaceExchange_InPlace() const;
bool CheckInterfaceExchange() const;
bool CheckInterfaceAdd_InPlace() const;
bool CheckInterfaceAdd() const;
public:
/** MPI rank of the calling process in communication group.
*
* @return MPI rank of the calling process
*/
int MyRank() const
{
return _myrank;
}
/** Number of MPI processes in communication group.
*
* @return Number of MPI processes
*/
int NumProcs() const
{
return _numprocs;
}
/** Returns recent
* @return MPI communicator
*/
MPI_Comm GetCommunicator() const
{
return _icomm;
}
private:
// Don't use &_icomm ==> Error
MPI_Comm const _icomm; //!< MPI communicator for the group of processes
int _numprocs; //!< number of MPI processes
int _myrank; //!< my MPI rank
std::vector<int> _v_l2g; //!< vertices: local to global mapping
std::vector<int> _t_l2g; //!< triangles: local to global mapping
std::map<int, int> _v_g2l; //!< vertices: global to local mapping
std::map<int, int> _t_g2l; //!< triangles: global to local mapping
//std::vector<int> e_l2g; //!< edges: local to global mapping
std::vector<int> _valence; //!< valence of local vertices, i.e. number of subdomains they belong to
// MPI_Alltoallv needs:
mutable std::vector<double> _sendbuf; //!< send buffer a n d receiving buffer (MPI_IN_PLACE)
std::vector<int> _sendcounts; //!< number of data to send to each MPI rank (the same as for recv)
std::vector<int> _sdispls; //!< offset of data to send to each MPI rank wrt. _senbuffer (the same as for recv)
//
// We need to map the interface vertices onto the sendbuffer:
std::vector<int> _loc_itf; //!< local index of interface vertex lk
std::vector<int> _gloc_itf; //!< global index of interface vertex lk
std::vector<int> _buf2loc; //!< local indices of sendbuffer positions (the same as for recv)
};
#endif

View file

@ -0,0 +1,71 @@
% Square:
% flatpak run org.octave.Octave <filename>
% or
% octave --no-window-system --no-gui -qf <filename>
clear all
clc
% %% L-shape
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 1 0;
% 2 2 1 1 0.5 1 0;
% 2 1 1 0.5 2 1 0;
% 2 1 0 2 2 1 0;
% 2 0 0 2 0 1 0]';
%% square
% g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 0;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0]';
%% 2 squares
g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 0 1 1 2;
2 1 0 1 1 1 0;
2 0 0 1 0 1 0;
2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 2 2 0 1 2 0;
2 2 1 1 1 2 0
]';
% %% 4 squares
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 2;
% 2 1 0 1 1 1 3;
% 2 0 0 1 0 1 0;
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 2 0;
% 2 2 1 1 1 2 4;
% % 2 1 1 1 0 2 1;
% % 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 1 2 3 4;
% 2 1 0 2 2 3 0;
% 2 0 0 2 1 3 0;
% % 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 1 2 4 0;
% 2 2 1 2 2 4 0
% % 2 1 1 2 1 4 3
% ]';
[p,e,t] = initmesh(g,'hmax',0.1);
pdemesh(p,e,t)
%% GH
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
%
% coordinates p: [2][nnode]
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
ascii_write_mesh( p, t, e, mfilename);
ascii_write_subdomains( p, t, e, mfilename);
% tmp=t(1:3,:)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,653 @@
652
1
2
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
2
1
1
1
1
2
2
2
2
1
1
1
1
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
2
2
1
1
1
1
1
1
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
2
2
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
1
1
1
1
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
1
1
2
2
1
1
2
2
1
1
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
1
1
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
1
1
2
2
1
1
2
2
2
2
2
2
1
1
2
2
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
2
2
1
1
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
1
1
1
1
1
1
2
2
2
2
1
1
2
2
2
2
2
1
2
1

View file

@ -0,0 +1,71 @@
% Square:
% flatpak run org.octave.Octave <filename>
% or
% octave --no-window-system --no-gui -qf <filename>
clear all
clc
% %% L-shape
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 1 0;
% 2 2 1 1 0.5 1 0;
% 2 1 1 0.5 2 1 0;
% 2 1 0 2 2 1 0;
% 2 0 0 2 0 1 0]';
%% square
% g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 0;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0]';
% %% 2 squares
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 2;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0;
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 2 0;
% 2 2 1 1 1 2 0
% ]';
%% 4 squares
g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 0 1 1 2;
2 1 0 1 1 1 3;
2 0 0 1 0 1 0;
2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 2 2 0 1 2 0;
2 2 1 1 1 2 4;
% 2 1 1 1 0 2 1;
% 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 1 2 3 4;
2 1 0 2 2 3 0;
2 0 0 2 1 3 0;
% 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 2 2 1 2 4 0;
2 2 1 2 2 4 0
% 2 1 1 2 1 4 3
]';
[p,e,t] = initmesh(g,'hmax',0.1);
pdemesh(p,e,t)
%% GH
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
%
% coordinates p: [2][nnode]
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
ascii_write_mesh( p, t, e, mfilename);
ascii_write_subdomains( p, t, e, mfilename);
% tmp=t(1:3,:)

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,98 @@
% Square:
% flatpak run org.octave.Octave <filename>
% or
% octave --no-window-system --no-gui -qf <filename>
clear all
clc
% %% L-shape
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 1 0;
% 2 2 1 1 0.5 1 0;
% 2 1 1 0.5 2 1 0;
% 2 1 0 2 2 1 0;
% 2 0 0 2 0 1 0]';
%% square
% g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 0;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0]';
% %% 2 squares
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 2;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0;
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 2 0;
% 2 2 1 1 1 2 0
% ]';
% %% 4 squares
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 2;
% 2 1 0 1 1 1 3;
% 2 0 0 1 0 1 0;
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 2 0;
% 2 2 1 1 1 2 4;
% % 2 1 1 1 0 2 1;
% % 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 1 2 3 4;
% 2 1 0 2 2 3 0;
% 2 0 0 2 1 3 0;
% % 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 1 2 4 0;
% 2 2 1 2 2 4 0
% % 2 1 1 2 1 4 3
% ]';
%% 6 squares
g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 0 1 1 2;
2 1 0 1 1 1 3;
2 0 0 1 0 1 0;
2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 2 2 0 1 2 5;
2 2 1 1 1 2 4;
% 2 1 1 1 0 2 1;
% 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 1 2 3 4;
2 1 0 2 2 3 0;
2 0 0 2 1 3 0;
% 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 2 2 1 2 4 6;
2 2 1 2 2 4 0;
% 2 1 1 2 1 4 3;
2 2 3 0 0 5 0; % 5 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 3 3 0 1 5 0;
2 3 2 1 1 5 6;
% 2 2 2 1 0 5 2;
% 2 2 3 1 1 6 5; % 6 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 3 3 1 2 6 0;
2 3 2 2 2 6 0
% 2 2 2 2 1 6 4
]';
[p,e,t] = initmesh(g,'hmax',0.1);
pdemesh(p,e,t)
%% GH
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
%
% coordinates p: [2][nnode]
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
ascii_write_mesh( p, t, e, mfilename);
ascii_write_subdomains( p, t, e, mfilename);
% tmp=t(1:3,:)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,704 @@
188 2 327 3
1 0
1 1
2 0
2 1
1 0.1
1 0.2
1 0.3
1 0.4
1 0.5
1 0.6
1 0.7
1 0.8
1 0.9
1.1 0
1.2 0
1.3 0
1.4 0
1.5 0
1.6 0
1.7 0
1.8 0
1.9 0
2 0.1
2 0.2
2 0.3
2 0.4
2 0.5
2 0.6
2 0.7
2 0.8
2 0.9
1.9 1
1.8 1
1.7 1
1.6 1
1.5 1
1.4 1
1.3 1
1.2 1
1.1 1
1.49824 0.488715
1.05193 0.0620153
1.96625 0.0483577
1.04793 0.955298
1.95161 0.955198
1.73603 0.345854
1.28857 0.282229
1.36738 0.727982
1.64625 0.722546
1.91195 0.84478
1.08323 0.845761
1.85001 0.0860639
1.52152 0.243093
1.22787 0.537745
1.67462 0.537899
1.81754 0.691116
1.16397 0.184115
1.49817 0.807652
1.74693 0.844955
1.84393 0.263468
1.24154 0.844531
1.08066 0.749219
1.76054 0.083432
1.50008 0.655952
1.84529 0.488219
1.17462 0.387723
1.35549 0.162537
1.44621 0.349021
1.60832 0.424124
1.6628 0.210825
1.133 0.617724
1.58119 0.13085
1.61703 0.869587
1.38903 0.856845
1.73 0.670898
1.91197 0.644767
1.16231 0.798105
1.14015 0.0810055
1.07152 0.14153
1.82658 0.899363
1.90413 0.172802
1.16585 0.900048
1.91126 0.746232
1.37648 0.580317
1.32975 0.43121
1.23911 0.676272
1.09469 0.495657
1.38535 0.261717
1.57154 0.56268
1.82457 0.587265
1.6065 0.316664
1.82091 0.349209
1.13118 0.291188
1.25373 0.0995169
1.4471 0.089654
1.64761 0.140153
1.80397 0.168364
1.82574 0.797144
1.69201 0.442852
1.52834 0.406743
1.464 0.903223
1.41795 0.780259
1.54963 0.730766
1.92222 0.448553
1.56289 0.928378
1.31213 0.892653
1.07541 0.655465
1.73457 0.92605
1.67051 0.812302
1.93201 0.256721
1.32454 0.794509
1.64979 0.933767
1.08946 0.92638
1.93126 0.0917278
1.90811 0.925959
1.54511 0.0706685
1.40439 0.48925
1.91756 0.544695
1.26613 0.200964
1.73971 0.756545
1.74585 0.247972
1.21468 0.316416
1.91124 0.347099
1.28928 0.613103
1.07981 0.375561
1.53607 0.314626
1.42573 0.699103
1.58547 0.659664
1.16172 0.695142
1.76076 0.51266
1.24656 0.452978
1.46284 0.568585
1.46297 0.192615
1.56643 0.474578
1.05798 0.441083
1.35115 0.0700935
1.60594 0.183437
1.35214 0.350198
1.07574 0.57016
1.7861 0.425088
1.71861 0.156597
1.24404 0.927617
1.58815 0.801244
1.2391 0.752451
1.1502 0.552118
1.64984 0.364251
1.07328 0.22312
1.47839 0.735193
1.37306 0.930749
1.68388 0.877557
1.54616 0.862383
1.77797 0.635848
1.62727 0.0716422
1.44232 0.837419
1.69071 0.0900998
1.85764 0.411619
1.85866 0.956351
1.96709 0.142936
1.13894 0.956686
1.31226 0.5173
1.47028 0.271985
1.26747 0.369611
1.65816 0.624881
1.36475 0.659896
1.20042 0.615405
1.32361 0.226305
1.45486 0.428791
1.59228 0.249245
1.62324 0.488742
1.7872 0.3017
1.66598 0.289144
1.7382 0.594826
1.21269 0.254385
1.16824 0.476568
1.58631 0.365552
1.11512 0.435004
1.54649 0.18839
1.30407 0.704311
1.39725 0.409587
1.51629 0.133942
1.42593 0.633055
1 0.95
1.05 1
2 0.95
1 0.05
2 0.05
2 0.15
1.95 1
22 3 43
81 24 110
6 5 79
43 3 186
7 6 147
8 7 125
9 8 135
10 9 139
107 10 139
11 10 107
12 11 62
114 23 158
94 16 136
95 18 116
87 9 135
42 14 78
13 12 51
16 17 136
17 18 95
18 19 116
20 21 63
21 22 52
52 22 114
43 23 114
110 25 123
184 4 188
26 27 104
104 27 118
24 25 110
121 46 171
30 31 50
50 31 115
45 32 115
80 33 108
115 32 157
44 40 183
33 34 108
44 13 113
108 34 112
36 37 101
101 37 149
1 14 42
14 15 78
158 23 187
37 38 149
34 35 112
42 5 185
28 29 76
106 38 142
78 15 94
117 41 132
51 12 62
92 46 170
100 41 167
164 48 178
31 45 115
35 36 105
82 39 159
62 11 107
22 43 114
25 26 123
45 31 184
97 60 121
119 47 173
88 47 166
102 48 127
89 41 134
76 29 83
29 30 83
109 49 120
5 42 79
122 47 162
127 48 164
75 49 163
103 49 143
103 64 128
163 55 172
21 52 63
39 40 159
106 61 111
131 54 174
131 66 162
63 52 97
81 60 97
99 55 169
101 58 151
30 50 83
99 46 140
124 54 160
93 7 147
126 53 168
93 57 173
102 74 111
168 53 177
104 65 156
133 53 161
116 19 153
138 68 179
40 44 113
77 62 129
88 67 133
153 20 155
102 58 154
75 56 120
89 64 132
38 39 142
109 59 150
76 56 90
83 50 98
90 56 152
51 62 77
107 71 129
15 16 94
94 67 119
78 57 79
42 78 79
98 59 120
112 73 150
20 63 155
92 60 123
77 61 82
51 77 82
80 59 98
56 76 83
132 64 181
117 84 160
88 68 138
41 89 132
111 61 144
145 54 165
93 66 125
129 71 165
119 67 166
100 68 126
100 69 134
89 55 163
56 75 152
27 28 118
99 69 146
96 70 137
168 70 171
90 65 118
125 66 176
79 57 147
95 67 136
57 78 94
19 20 153
116 72 180
121 70 141
133 95 180
146 91 171
52 81 97
50 80 98
56 83 98
134 69 169
46 92 140
146 69 175
126 68 161
143 73 151
61 77 144
124 84 164
101 74 154
152 75 172
103 58 148
28 76 118
140 92 156
105 101 151
36 101 105
82 61 142
106 74 149
129 86 144
139 87 145
105 73 112
59 80 108
49 75 120
109 73 143
60 81 110
123 104 156
48 102 111
74 106 111
35 105 112
59 108 150
13 51 113
51 82 113
81 52 114
80 50 115
32 33 157
95 116 180
96 72 153
131 85 160
117 85 179
76 90 118
65 104 118
138 85 162
57 94 119
56 98 120
59 109 120
121 60 170
70 96 141
66 93 122
122 93 173
26 104 123
60 110 123
85 117 160
144 86 178
174 87 176
7 93 125
137 70 168
126 91 175
127 64 148
58 102 148
64 89 128
49 103 128
124 86 165
62 107 129
65 90 130
55 99 130
145 87 174
66 122 162
84 117 132
164 84 181
68 88 161
67 95 133
55 89 169
41 100 134
135 125 176
8 125 135
67 94 136
17 95 136
70 121 171
72 96 137
167 117 179
47 88 138
9 87 139
71 107 139
130 99 140
65 130 140
63 97 141
97 121 141
39 82 142
61 106 142
58 103 143
49 109 143
48 111 178
77 129 144
54 124 165
71 139 145
69 100 175
46 99 146
6 79 147
57 93 147
64 103 148
102 127 148
74 101 149
38 106 149
73 109 150
108 112 150
73 105 151
58 143 151
130 90 172
55 130 172
141 96 155
72 116 153
58 101 154
74 102 154
63 141 155
96 153 155
92 123 156
65 140 156
33 80 157
80 115 157
24 81 158
81 114 158
113 82 159
40 113 159
84 124 160
54 131 160
53 126 161
88 133 161
85 131 162
47 138 162
128 89 163
49 128 163
86 124 178
64 127 181
86 129 165
71 145 165
67 88 166
47 119 166
68 100 167
41 117 167
91 126 168
53 133 177
69 99 169
89 134 169
60 92 170
46 121 170
46 146 171
91 168 171
90 152 172
75 163 172
57 119 173
47 122 173
66 131 174
54 145 174
100 126 175
91 146 175
87 135 176
66 174 176
72 137 177
137 168 177
111 144 178
124 164 178
85 138 179
68 167 179
177 133 180
72 177 180
84 132 181
127 164 181
13 44 182
182 44 183
2 182 183
1 42 185
23 43 186
24 158 187
32 45 188
45 184 188
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1

View file

@ -0,0 +1,135 @@
#include "vdop.h"
#include <cassert> // assert()
#include <cmath>
#include <iostream>
#include <vector>
using namespace std;
void vddiv(vector<double> & x, vector<double> const& y,
vector<double> const& z)
{
assert( x.size()==y.size() && y.size()==z.size() );
size_t n = x.size();
#pragma omp parallel for
for (size_t k = 0; k < n; ++k)
{
x[k] = y[k] / z[k];
}
return;
}
//******************************************************************************
void vdaxpy(std::vector<double> & x, std::vector<double> const& y,
double alpha, std::vector<double> const& z )
{
assert( x.size()==y.size() && y.size()==z.size() );
size_t n = x.size();
#pragma omp parallel for
for (size_t k = 0; k < n; ++k)
{
x[k] = y[k] + alpha * z[k];
}
return;
}
//******************************************************************************
double dscapr(std::vector<double> const& x, std::vector<double> const& y)
{
assert( x.size()==y.size());
size_t n = x.size();
double s = 0.0;
//#pragma omp parallel for reduction(+:s)
for (size_t k = 0; k < n; ++k)
{
s += x[k] * y[k];
}
return s;
}
//******************************************************************************
//void DebugVector(vector<double> const &v)
//{
//cout << "\nVector (nnode = " << v.size() << ")\n";
//for (size_t j = 0; j < v.size(); ++j)
//{
//cout.setf(ios::right, ios::adjustfield);
//cout << v[j] << " ";
//}
//cout << endl;
//return;
//}
//******************************************************************************
bool CompareVectors(std::vector<double> const& x, int const n, double const y[], double const eps)
{
bool bn = (static_cast<int>(x.size())==n);
if (!bn)
{
cout << "######### Error: " << "number of elements" << endl;
}
//bool bv = equal(x.cbegin(),x.cend(),y);
bool bv = equal(x.cbegin(),x.cend(),y,
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
if (!bv)
{
assert(static_cast<int>(x.size())==n);
cout << "######### Error: " << "values" << endl;
}
return bn && bv;
}
//******************************************************************************
double par_scalar(vector<double> const &x, vector<double> const &y, MPI_Comm const& icomm)
{
const double s = dscapr(x,y);
double sg;
MPI_Allreduce(&s,&sg,1,MPI_DOUBLE,MPI_SUM,icomm);
return(sg);
}
//******************************************************************************
void ExchangeAll(vector<double> const &xin, vector<double> &yout, MPI_Comm const &icomm)
{
int myrank, numprocs,ierr(-1);
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
MPI_Comm_size(icomm, &numprocs);
int const N=xin.size();
int const sendcount = N/numprocs; // equal sized junks
assert(sendcount*numprocs==N); // really all junk sized?
assert(xin.size()==yout.size());
auto sendbuf = xin.data();
auto recvbuf = yout.data();
ierr = MPI_Alltoall(sendbuf, sendcount, MPI_DOUBLE,
recvbuf, sendcount, MPI_DOUBLE, icomm);
assert(0==ierr);
return;
}
//******************************************************************************
void ExchangeAllInPlace(vector<double> &xin, MPI_Comm const &icomm)
{
int myrank, numprocs,ierr(-1);
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
MPI_Comm_size(icomm, &numprocs);
int const N=xin.size();
int const sendcount = N/numprocs; // equal sized junks
assert(sendcount*numprocs==N); // really all junk sized?
auto sendbuf = xin.data();
ierr = MPI_Alltoall(MPI_IN_PLACE, sendcount, MPI_DOUBLE,
sendbuf, sendcount, MPI_DOUBLE, icomm);
assert(0==ierr);
return;
}

View file

@ -0,0 +1,166 @@
#ifndef VDOP_FILE
#define VDOP_FILE
#include <iostream>
#include <mpi.h> // MPI
#include <string>
#include <vector>
/** @brief Element-wise vector divison x_k = y_k/z_k.
*
* @param[out] x target vector
* @param[in] y source vector
* @param[in] z source vector
*
*/
void vddiv(std::vector<double> &x, std::vector<double> const &y,
std::vector<double> const &z);
/** @brief Element-wise daxpy operation x(k) = y(k) + alpha*z(k).
*
* @param[out] x target vector
* @param[in] y source vector
* @param[in] alpha scalar
* @param[in] z source vector
*
*/
void vdaxpy(std::vector<double> &x, std::vector<double> const &y,
double alpha, std::vector<double> const &z );
/** @brief Calculates the Euclidean inner product of two vectors.
*
* @param[in] x vector
* @param[in] y vector
* @return Euclidean inner product @f$\langle x,y \rangle@f$
*
*/
double dscapr(std::vector<double> const &x, std::vector<double> const &y);
inline
double L2_scapr(std::vector<double> const &x, std::vector<double> const &y)
{
return dscapr(x, y) / x.size();
}
/** Parallel inner product
@param[in] x vector
@param[in] y vector
@param[in] icomm MPI communicator
@return resulting Euclidian inner product <x,y>
*/
double par_scalar(std::vector<double> const &x, std::vector<double> const &y,
MPI_Comm const& icomm=MPI_COMM_WORLD);
/* ReadId : Input and broadcast of an integer */
inline
int ReadIn(std::string const &ss = std::string(), MPI_Comm const &icomm = MPI_COMM_WORLD)
{
MPI_Barrier(icomm);
int myrank; /* my rank number */
MPI_Comm_rank(icomm, &myrank);
int id;
if (myrank == 0) {
std::cout << "\n\n " << ss << " : Which process do you want to debug ? \n";
std::cin >> id;
}
MPI_Bcast(&id, 1, MPI_INT, 0, icomm);
return id;
}
/**
* Print entries of a vector to standard output.
*
* @param[in] v vector values
* @param[in] ss string containing the vector name
* @param[in] icomm communicator group for MPI
*
*/
//void DebugVector(std::vector<double> const &v);
template <class T>
void DebugVector(std::vector<T> const &v, std::string const &ss = std::string(), MPI_Comm const &icomm = MPI_COMM_WORLD)
{
MPI_Barrier(icomm);
int numprocs; /* # processes */
MPI_Comm_size(icomm, &numprocs);
int myrank; /* my rank number */
MPI_Comm_rank(icomm, &myrank);
int readid = ReadIn(ss); /* Read readid */
while ( (0 <= readid) && (readid < numprocs) ) {
if (myrank == readid) {
std::cout << "\n\n process " << readid;
std::cout << "\n .... " << ss << " (nnode = " << v.size() << ")\n";
for (size_t j = 0; j < v.size(); ++j) {
std::cout.setf(std::ios::right, std::ios::adjustfield);
std::cout << v[j] << " ";
}
std::cout << std::endl;
fflush(stdout);
}
readid = ReadIn(ss, icomm); /* Read readid */
}
MPI_Barrier(icomm);
return;
}
/** @brief Compares an STL vector with POD vector.
*
* The accuracy criteria @f$ |x_k-y_k| < \varepsilon \left({1+0.5(|x_k|+|y_k|)}\right) @f$
* follows the book by
* <a href="https://www.springer.com/la/book/9783319446592">Stoyan/Baran</a>, p.8.
*
* @param[in] x STL vector
* @param[in] n length of POD vector
* @param[in] y POD vector
* @param[in] eps relative accuracy criteria (default := 0.0).
* @return true iff pairwise vector elements are relatively close to each other.
*
*/
bool CompareVectors(std::vector<double> const &x, int n, double const y[], double const eps = 0.0);
/** Output operator for vector
* @param[in,out] s output stream, e.g. @p cout
* @param[in] v vector
*
* @return output stream
*/
template <class T>
std::ostream &operator<<(std::ostream &s, std::vector<T> const &v)
{
for (auto vp : v) {
s << vp << " ";
}
return s;
}
/** Exchanges equal size partions of vector @p xin with all MPI processes.
* The received data are return in vector @p yout .
*
* @param[in] xin input vector
* @param[out] yout output vector
* @param[in] icomm MPI communicator
*
*/
void ExchangeAll(std::vector<double> const &xin, std::vector<double> &yout, MPI_Comm const &icomm = MPI_COMM_WORLD);
/** Exchanges equal size partions of vector @p xin with all MPI processes.
* The received data are return in vector @p xin .
*
* @param[in,out] xin input/output vector
* @param[in] icomm MPI communicator
*
*/
void ExchangeAllInPlace(std::vector<double> &xin, MPI_Comm const &icomm = MPI_COMM_WORLD);
#endif

View file

@ -0,0 +1,20 @@
%% Visualize results
%
% flatpak run org.octave.Octave <filename>
% or
% octave --no-window-system --no-gui -qf <filename>
%
% or
% matlab -nosplash < <filename>
clear all
clc
%%
fname = 'uv.txt';
[xc,ia,v] = ascii_read_meshvector(fname);
h = trisurf(ia, xc(:,1), xc(:,2), v);
waitfor(h) % wait for closing the figure