ex7

2025-12-26 20:26:31 +01:00 · 2025-12-26 20:26:31 +01:00 · 2467b9ae03
commit 2467b9ae03
parent 89326dd329
44 changed files with 22631 additions and 0 deletions
--- a/ex7/code/task4/GCC_default.mk
+++ b/ex7/code/task4/GCC_default.mk
@ -0,0 +1,154 @@
+# Basic Defintions for using GNU-compiler suite sequentially
+# requires setting of COMPILER=GCC_
+
+#startmake as follows to avoid warnings caused by OpenMPI code
+#  make 2>&1 | grep -v openmpi
+
+
+MPI_ROOT=/usr/bin/
+
+CC	= ${MPI_ROOT}mpicc
+CXX     = ${MPI_ROOT}mpicxx
+F77	= ${MPI_ROOT}mpif77
+LINKER  = ${CXX}
+
+# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
+MPIRUN  = ${MPI_ROOT}mpirun --oversubscribe -display-map
+#MPIRUN  = ${MPI_ROOT}mpiexec
+
+# 2023, Oct 23:  ""WARNING: There is at least non-excluded one OpenFabrics device found,"
+# solution according to https://github.com/open-mpi/ompi/issues/11063
+MPIRUN += -mca btl ^openib 
+
+# KFU:sauron
+CXXFLAGS += -I/software/boost/1_72_0/include
+
+WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
+           -Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
+
+# WARNINGS += -Weffc++ -Wextra
+# -Wno-pragmas
+CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
+# -ftree-vectorizer-verbose=5  -DNDEBUG
+#          -ftree-vectorizer-verbose=2
+# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
+# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
+
+# info on vectorization
+#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
+#-fdump-tree-pre=stderr
+VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
+#CXXFLAGS += ${VECTORIZE}
+
+# -funroll-all-loops   -msse3
+#GCC  -march=knl -march=broadwell -march=haswell
+
+# for debugging purpose (save code)
+# -fsanitize=leak         # only one out the trhee can be used
+# -fsanitize=address
+# -fsanitize=thread
+SANITARY =  -fsanitize=address  -fsanitize=undefined -fsanitize=null -fsanitize=return \
+ -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
+ -fsanitize=bool -fsanitize=enum -fsanitize=vptr
+#CXXFLAGS  += ${SANITARY}
+#LINKFLAGS +=${SANITARY}
+
+# OpenMP
+CXXFLAGS += -fopenmp
+LINKFLAGS += -fopenmp
+
+default: ${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+	@echo
+	@echo "Start with :  $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
+	@echo
+
+clean:
+	@rm -f ${PROGRAM} ${OBJECTS} gmon.out
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
+	@rm -rf html latex
+
+run: ${PROGRAM}
+	${MPIRUN} -np 6 ./$^
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+zip: clean
+	@echo "Zip the directory: " ${MY_DIR}
+	@cd .. ;\
+	zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+#	 2>&1 | grep -v openmpi
+
+# special: get rid of compiler warnings genereate by openmpi-files
+#.cpp.o:
+# 	@$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
+# 	|grep -sv openmpi
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+#    some tools
+# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
+cache: ${PROGRAM}
+	valgrind --tool=callgrind --simulate-cache=yes ./$^
+#	kcachegrind callgrind.out.<pid> &
+	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
+
+# Check for wrong memory accesses, memory leaks, ...
+# use smaller data sets
+# no "-pg"  in compile/link options
+mem: ${PROGRAM}
+	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
+	# Graphical interface
+	# valkyrie
+
+#  Simple run time profiling of your code
+#  CXXFLAGS += -g -pg
+#  LINKFLAGS += -pg
+prof: ${PROGRAM}
+	perf record ./$^
+	perf report
+#	gprof -b ./$^ > gp.out
+#	kprof -f gp.out -p gprof &
+
+#Trace your heap:
+#> heaptrack ./main.GCC_
+#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
+heap: ${PROGRAM}
+	heaptrack ./$^ 11
+	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
+
+codecheck: $(SOURCES)
+	cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
+
+
+########################################################################
+#  get the detailed  status of all optimization flags
+info:
+	echo "detailed  status of all optimization flags"
+	$(CXX) --version
+	$(CXX) -Q $(CXXFLAGS) --help=optimizers
--- a/ex7/code/task4/ICC_NATIVE_default.mk
+++ b/ex7/code/task4/ICC_NATIVE_default.mk
@ -0,0 +1,107 @@
+# Basic Defintions for using INTEL-MPI with its compilers
+# requires setting of COMPILER=ICC_NATIVE_
+
+# MPI_ROOT should be defined by shell
+#   path to  icpc  is contained in $PATH
+MPI_BIN = $(shell dirname `which icpc` | sed 's/bin\/intel64/mpi\/intel64\/bin/g')/
+MPI_LIB = $(shell echo ${MPI_BIN} | sed 's/bin/lib/g')
+
+#  Intel-MPI wrappers used gcc as default !!
+CC	= ${MPI_BIN}mpicc  -cc=icc
+CXX = ${MPI_BIN}mpicxx -cxx=icpc
+F77	= ${MPI_BIN}mpif77 -f77=ifort
+LINKER  = ${CXX}
+
+MPIRUN  = ${MPI_BIN}mpirun
+
+WARNINGS = -Wall  -Wextra -pedantic -Woverloaded-virtual  -Wfloat-equal -Wshadow
+           #  -Weffc++ -Wunreachable-code -Winline
+CXXFLAGS += -O3 -fargument-noalias  -DNDEBUG -std=c++17 ${WARNINGS} ${MPI_COMPILE_FLAGS}
+CFLAGS   += -O3 -fargument-noalias  -DNDEBUG -Wall  -Wextra -pedantic -Wfloat-equal \
+            -Wshadow ${MPI_COMPILE_FLAGS}
+# -vec-report=3 -mkl
+# -guide -parallel
+# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
+# -auto-p32 -simd
+
+# use MKL by INTEL
+LINKFLAGS += -mkl ${MPI_LINK_FLAGS}
+
+default:	${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+	@echo
+	@echo "Start with :  $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
+	@echo
+
+clean:
+	rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+run: ${PROGRAM}
+	(export LD_LIBRARY_PATH=${MPI_LIB}:${LD_LIBRARY_PATH} ;${MPIRUN} -np 4 ./$^ ${PROG_ARGS})
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+# #    some tools
+# # Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
+# cache: ${PROGRAM}
+# 	valgrind --tool=callgrind --simulate-cache=yes ./$^
+# #	kcachegrind callgrind.out.<pid> &
+# 
+# # Check for wrong memory accesses, memory leaks, ...
+# # use smaller data sets
+# mem: ${PROGRAM}
+# 	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
+# 
+# #  Simple run time profiling of your code
+# #  CXXFLAGS += -g -pg
+# #  LINKFLAGS += -pg
+# prof: ${PROGRAM}
+# 	./$^
+# 	gprof -b ./$^ > gp.out
+# #	kprof -f gp.out -p gprof &
+# 
+
+
+mem: inspector
+prof: amplifier
+cache: amplifier
+
+gap_par_report:
+	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
+	
+# GUI for performance report
+amplifier: ${PROGRAM}
+	${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
+
+# GUI for Memory and Thread analyzer (race condition)
+inspector: ${PROGRAM}
+# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+	${BINDIR}../inspector_xe_2013/bin64/inspxe-gui & 
--- a/ex7/code/task4/ICC_default.mk
+++ b/ex7/code/task4/ICC_default.mk
@ -0,0 +1,112 @@
+# Basic Defintions for using INTEL compilers with OpenMPI headers and libraries
+# requires setting of COMPILER=ICC_
+
+# MPI_ROOT should be defined by shell
+MPI_ROOT=/usr
+
+CC	= icc
+CXX = icpc
+F77	= ifort
+LINKER  = ${CXX}
+
+MPIRUN  = ${MPI_ROOT}/bin/mpirun
+
+# no differences when C or C++ is used !!  (always used options from mpicxx)
+MPI_COMPILE_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:compile`
+MPI_LINK_FLAGS    = `${MPI_ROOT}/bin/mpicxx -showme:link`
+# MPI_LINK_FLAGS    = -pthread -L/usr/lib/openmpi/lib -lmpi_cxx -lmpi -lopen-rte -lopen-pal -ldl -Wl,--export-dynamic -lnsl -lutil -lm -ldl
+
+
+WARNINGS = -Wall  -Wextra -pedantic -Woverloaded-virtual  -Wfloat-equal -Wshadow
+           #  -Weffc++ -Wunreachable-code -Winline
+CXXFLAGS += -O3 -std=c++17 -fargument-noalias  -DNDEBUG  ${WARNINGS} ${MPI_COMPILE_FLAGS}
+CFLAGS   += -O3 -fargument-noalias  -DNDEBUG -Wall  -Wextra -pedantic -Wfloat-equal \
+            -Wshadow ${MPI_COMPILE_FLAGS}
+# -vec-report=3 -mkl
+# -guide -parallel
+# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
+# -auto-p32 -simd
+
+# use MKL by INTEL
+LINKFLAGS += -mkl 
+# use MPI by Compiler
+LINKFLAGS += ${MPI_LINK_FLAGS}
+
+default:	${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+	@echo
+	@echo "Start with :  $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
+	@echo
+
+clean:
+	rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+run: ${PROGRAM}
+	${MPIRUN} -np 4 ./$^
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+# #    some tools
+# # Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
+# cache: ${PROGRAM}
+# 	valgrind --tool=callgrind --simulate-cache=yes ./$^
+# #	kcachegrind callgrind.out.<pid> &
+# 
+# # Check for wrong memory accesses, memory leaks, ...
+# # use smaller data sets
+# mem: ${PROGRAM}
+# 	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
+# 
+# #  Simple run time profiling of your code
+# #  CXXFLAGS += -g -pg
+# #  LINKFLAGS += -pg
+# prof: ${PROGRAM}
+# 	./$^
+# 	gprof -b ./$^ > gp.out
+# #	kprof -f gp.out -p gprof &
+# 
+
+
+mem: inspector
+prof: amplifier
+cache: amplifier
+
+gap_par_report:
+	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
+	
+# GUI for performance report
+amplifier: ${PROGRAM}
+	${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
+
+# GUI for Memory and Thread analyzer (race condition)
+inspector: ${PROGRAM}
+# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+	${BINDIR}../inspector_xe_2013/bin64/inspxe-gui & 
--- a/ex7/code/task4/OPENMPI_CLANG_default.mk
+++ b/ex7/code/task4/OPENMPI_CLANG_default.mk
@ -0,0 +1,128 @@
+# Basic Defintions for using OpenMPI with CLANG compilers
+# requires setting of COMPILER=OPENMPI_CLANG_
+
+# Pass CLANG Compilers to the OpenMPI wrappers
+#    see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
+EXPORT = export OMPI_CXX=clang++; export OMPI_CC=clang; export OMPI_mpifort=flang
+
+CC	= mpicc
+CXX = mpicxx
+F77	= mpifort
+LINKER  = ${CXX}
+
+MPIRUN  = ${MPI_BIN}mpirun
+
+#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
+SILENCE_MPI = -Wno-weak-vtables -Wno-old-style-cast -Wno-cast-align -Wno-deprecated 
+SILENCE_MPI+= -Wno-sign-conversion -Wno-reserved-id-macro -Wno-c++98-compat-pedantic
+SILENCE_MPI+= -Wno-zero-as-null-pointer-constant -Wno-source-uses-openmp
+WARNINGS = -Weverything -Wno-c++98-compat -Wno-weak-vtables -ferror-limit=3 ${SILENCE_MPI}
+#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
+CXXFLAGS += -Ofast -std=c++17  ${WARNINGS} 
+#CXXFLAGS += -Ofast -std=c++17 
+# -ftrapv
+#
+CFLAGS   +=  -Ofast -Weverything -ferror-limit=3 ${MPI_COMPILE_FLAGS}
+
+# OpenMP
+#CXXFLAGS += -fopenmp
+#LINKFLAGS += -fopenmp
+
+# tidy_check
+SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
+SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
+#READABILITY=,readability*${SWITCH_OFF}
+#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
+TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
+#TIDYFLAGS += -checks='modernize*
+
+MPI_COMPILE_FLAGS = `${MPI_BIN}mpicxx -showme:compile`
+MPI_LINK_FLAGS    = `${MPI_BIN}mpicxx -showme:link`
+#TIDYFLAGS += ${MPI_COMPILE_FLAGS}
+TIDYFLAGS += -extra-arg="-I/usr/lib/x86_64-linux-gnu/openmpi/include"
+#check:
+#	echo ${MPI_COMPILE_FLAGS}
+
+default:	${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	@( ${EXPORT}; $(LINKER)  $^  ${LINKFLAGS} -o $@ )
+	@echo
+	@echo "Start with :  $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
+	@echo
+
+clean:
+	rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+codecheck: tidy_check
+tidy_check:
+	clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
+# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
+
+run: ${PROGRAM}
+	${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
+
+.c.o:
+	@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+# #    some tools
+# # Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
+# cache: ${PROGRAM}
+# 	valgrind --tool=callgrind --simulate-cache=yes ./$^
+# #	kcachegrind callgrind.out.<pid> &
+# 
+# # Check for wrong memory accesses, memory leaks, ...
+# # use smaller data sets
+# mem: ${PROGRAM}
+# 	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
+# 
+# #  Simple run time profiling of your code
+# #  CXXFLAGS += -g -pg
+# #  LINKFLAGS += -pg
+# prof: ${PROGRAM}
+# 	./$^
+# 	gprof -b ./$^ > gp.out
+# #	kprof -f gp.out -p gprof &
+# 
+
+
+mem: inspector
+prof: amplifier
+cache: amplifier
+
+gap_par_report:
+	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
+	
+# GUI for performance report
+amplifier: ${PROGRAM}
+	${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
+
+# GUI for Memory and Thread analyzer (race condition)
+inspector: ${PROGRAM}
+# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+	${BINDIR}../inspector_xe_2013/bin64/inspxe-gui & 
--- a/ex7/code/task4/OPENMPI_ICC_default.mk
+++ b/ex7/code/task4/OPENMPI_ICC_default.mk
@ -0,0 +1,107 @@
+# Basic Defintions for using OpenMPI with Intel compilers
+# requires setting of COMPILER=OPENMPI_ICC_
+
+# Pass Intel Compilers to the OpenMPI wrappers
+#    see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
+EXPORT = export OMPI_CXX=icpc; export OMPI_CC=icc; export OMPI_mpifort=ifort
+
+CC	= mpicc
+CXX = mpicxx
+F77	= mpifort
+LINKER  = ${CXX}
+
+MPIRUN  = ${MPI_BIN}mpirun
+
+WARNINGS = -Wall  -Wextra -pedantic -Woverloaded-virtual  -Wfloat-equal -Wshadow
+           #  -Weffc++ -Wunreachable-code -Winline
+CXXFLAGS += -fast -fargument-noalias  -DNDEBUG -std=c++17 ${WARNINGS}
+CFLAGS   += -O3 -fargument-noalias  -DNDEBUG -Wall  -Wextra -pedantic -Wfloat-equal -Wshadow
+# -vec-report=3 -mkl
+# -guide -parallel
+# -guide-opts=string  -guide-par[=n]  -guide-vec[=n]
+# -auto-p32 -simd
+
+# use MKL by INTEL
+LINKFLAGS += -O3 -mkl ${MPI_LINK_FLAGS}
+# ipo: warning #11021: unresolved __GI_memset
+# see:  https://software.intel.com/en-us/articles/ipo-warning-11021-unresolved-symbols-referenced-a-dynamic-library
+LINKFLAGS += 
+
+default:	${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	@( ${EXPORT}; $(LINKER)  $^  ${LINKFLAGS} -o $@ )
+	@echo
+	@echo "Start with :  $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
+	@echo
+
+clean:
+	rm -f ${PROGRAM} ${OBJECTS}
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+run: ${PROGRAM}
+	${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
+
+.c.o:
+	@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+# #    some tools
+# # Cache behaviour (CXXFLAGS += -g  tracks down to source lines)
+# cache: ${PROGRAM}
+# 	valgrind --tool=callgrind --simulate-cache=yes ./$^
+# #	kcachegrind callgrind.out.<pid> &
+# 
+# # Check for wrong memory accesses, memory leaks, ...
+# # use smaller data sets
+# mem: ${PROGRAM}
+# 	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
+# 
+# #  Simple run time profiling of your code
+# #  CXXFLAGS += -g -pg
+# #  LINKFLAGS += -pg
+# prof: ${PROGRAM}
+# 	./$^
+# 	gprof -b ./$^ > gp.out
+# #	kprof -f gp.out -p gprof &
+# 
+
+
+mem: inspector
+prof: amplifier
+cache: amplifier
+
+gap_par_report:
+	${CXX}  -c -guide -parallel $(SOURCES) 2> gap.txt
+	
+# GUI for performance report
+amplifier: ${PROGRAM}
+	${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
+
+# GUI for Memory and Thread analyzer (race condition)
+inspector: ${PROGRAM}
+# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
+	echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+	${BINDIR}../inspector_xe_2013/bin64/inspxe-gui & 
--- a/ex7/code/task4/PGI_NATIVE_default.mk
+++ b/ex7/code/task4/PGI_NATIVE_default.mk
@ -0,0 +1,125 @@
+# Use the MPI-wrappers from the PGI compiler suite.
+# requires setting of COMPILER=PGI_MPI_
+#
+# requires
+#          sudo apt install librdmacm1
+
+
+
+# Details for run time information
+# export PGI_ACC_TIME=1
+# unset PGI_ACC_TIME
+# export PGI_ACC_NOTIFY=1
+# export PGI_ACC_NOTIFY=3
+# unset PGI_ACC_NOTIFY
+
+
+PGI_PATH =  /opt/pgi/linux86-64/2019/bin
+#ifeq "$(HOSTNAME)" "mephisto.uni-graz.at"
+#  # mephisto
+#  PGI_PATH =  /share/apps/pgi/linux86-64/2016/bin
+#endif
+
+
+#MPI_ROOT=${PGI_PATH}mpi/mpich/bin/
+MPI_ROOT= ${PGI_PATH}/../mpi/openmpi-3.1.3/bin/
+MPIRUN  = ${MPI_ROOT}mpirun
+
+CC  = ${MPI_ROOT}mpicc
+CXX = ${MPI_ROOT}mpicxx
+#F77 = ${MPI_ROOT}mpif77
+ifndef LINKER
+  LINKER  = ${CC}
+endif
+LINKER  = ${CXX}
+
+WARNINGS = -Minform=warn
+
+PGI_PROFILING += -Minfo=loop,vect,opt,intensity,mp,accel
+#PGI_PROFILING += -Mprof=lines –Minfo=ccff
+
+CXXFLAGS +=  -e3 -std=c++17 -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
+CFLAGS   +=  -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
+#
+#  for OpenACC
+# Target architecture (nvidia,host)
+TA_ARCH = host
+#TA_ARCH = nvidia,host
+#TA_ARCH = -ta=nvidia:cc2+,cuda5.5,fastmath
+#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,cuda5.5,fastmath,keepgpu
+#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,fastmath,keepgpu
+
+#,keepgpu
+# CFLAGS = -O3 -ta=$(TA_ARCH)
+#CFLAGS   += -B -gopt $(TA_ARCH)
+#CXXFLAGS += -B -gopt $(TA_ARCH)
+#  -Minfo=all
+
+# libcudart.a is needed for direct CUDA calls
+#LINKFLAGS  = -gopt $(TA_ARCH) -L${BINDIR}../lib $(PGI_PROFILING)
+# -lcudart
+
+default:	${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+
+clean:
+	rm -f ${PROGRAM} ${OBJECTS} *.gpu *gprof.out
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar
+
+#run: clean ${PROGRAM}
+run: ${PROGRAM}
+	${MPIRUN} -np 4 ${OPTIRUN} ./${PROGRAM}
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+# #    some tools
+# #  Simple run time profiling of your code
+# #  CXXFLAGS += -g -pg
+# #  LINKFLAGS += -pg
+
+
+# Profiling options PGI, see: pgcollect -help
+CPU_PROF = -allcache
+GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
+#GPU_PROF = -cuda=branch:cc20
+#
+PROF_FILE = pgprof.out
+
+prof: ${PROGRAM}
+#	./$^
+#	$(CUDA_HOME)/bin/nvvp &
+#	export LD_LIBRARY_PATH=/state/partition1/apps/pgi/linux86-64/12.9/lib:$LD_LIBRARY_PATH
+	${OPTIRUN} ${BINDIR}pgcollect $(GPU_PROF) ./$^
+	${OPTIRUN} ${BINDIR}pgprof -exe ./$^  $(PROF_FILE) &
+
+
+# Memory checker (slooooow!!!):
+# see doc at /usr/local/cuda/doc/cuda-memcheck.pdf
+# mem: ${PROGRAM}
+# 	$(CUDA_HOME)memcheck ./$^
--- a/ex7/code/task4/accu.template/Doxyfile
+++ b/ex7/code/task4/accu.template/Doxyfile
--- a/ex7/code/task4/accu.template/Makefile
+++ b/ex7/code/task4/accu.template/Makefile
@ -0,0 +1,54 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# COMPILER=GCC_SEQ_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+MAIN = main
+SOURCES = ${MAIN}.cpp vdop.cpp geom.cpp par_geom.cpp
+
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= ${MAIN}.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+# -DNDEBUG
+# -pg slows down the code on my laptop when using CLANG_
+LINKFLAGS += -g 
+#-pg
+#CXXFLAGS += -Q --help=optimizers
+#CXXFLAGS += -fopt-info
+
+include ../${COMPILER}default.mk
+
+#############################################################################
+# additional specific cleaning in this directory
+clean_all::
+	@rm -f uv.txt
+
+
+#############################################################################
+# special testing
+# NPROCS	= 4
+#
+TFILE	= t.dat
+# TTMP	= t.tmp
+#
+graph: $(PROGRAM)
+# 	@rm -f $(TFILE).*
+	# next two lines only sequentially
+	./$(PROGRAM)
+	@mv  $(TFILE).000 $(TFILE)
+# 	$(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
+# 	@echo " "; echo "Manipulate data for graphics."; echo " "
+# 	@cat $(TFILE).* > $(TTMP)
+# 	@sort -b -k 2    $(TTMP)   -o $(TTMP).1
+# 	@sort -b -k 1    $(TTMP).1 -o $(TTMP).2
+# 	@awk  -f nl.awk  $(TTMP).2  > $(TFILE)
+# 	@rm -f $(TTMP).* $(TTMP) $(TFILE).*
+#
+	-gnuplot jac.dem
--- a/ex7/code/task4/accu.template/ascii_read_meshvector.m
+++ b/ex7/code/task4/accu.template/ascii_read_meshvector.m
@ -0,0 +1,43 @@
+function [ xc, ia, v ] = ascii_read_meshvector( fname )
+%
+% Loads the 2D triangular mesh (coordinates, vertex connectivity) 
+%  together with values on its vertices from an ASCII file.
+%   Matlab indexing is stored  (starts with 1).
+% 
+% The input file format is compatible 
+%  with Mesh_2d_3_matlab:Write_ascii_matlab(..) in jacobi_oo_stl/geom.h
+%
+%
+%  IN: fname - filename
+% OUT: xc    - coordinates
+%      ia    - mesh connectivity
+%      v     - solution vector
+
+DELIMETER = ' ';
+
+fprintf('Read file  %s\n',fname)
+
+% Read mesh constants
+nn = dlmread(fname,DELIMETER,[0 0 0 3]);  %% row_1, col_1, row_2, col_2  in C indexing!!!
+nnode = nn(1);
+ndim  = nn(2);
+nelem = nn(3);
+nvert = nn(4);
+
+% Read coordinates
+row_start = 0+1;
+row_end   = 0+nnode;
+xc = dlmread(fname,DELIMETER,[row_start 0 row_end ndim-1]);
+
+% Read connectivity
+row_start = row_end+1;
+row_end   = row_end+nelem;
+ia = dlmread(fname,DELIMETER,[row_start 0 row_end nvert-1]);
+
+% Read solution
+row_start = row_end+1;
+row_end   = row_end+nnode;
+v = dlmread(fname,DELIMETER,[row_start 0 row_end 0]);
+end
+
+
--- a/ex7/code/task4/accu.template/ascii_write_mesh.m
+++ b/ex7/code/task4/accu.template/ascii_write_mesh.m
@ -0,0 +1,49 @@
+function ascii_write_mesh( xc, ia, e, basename)
+%
+% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
+%  in an ASCII file.
+%  Matlab indexing is stored  (starts with 1).
+% 
+% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
+%
+% IN:
+% coordinates  xc: [2][nnode]
+% connectivity ia: [4][nelem]   with  t(4,:) are the subdomain numbers
+% edges         e: [7][nedges]  boundary edges
+%                              e([1,2],:) - start/end vertex of edge
+%                              e([3,4],:) - start/end values
+%                              e(5,:)     - segment number
+%                              e([6,7],:) - left/right subdomain
+%        basename: file name without extension
+% 
+% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
+%
+fname = [basename, '.txt'];
+
+nnode = int32(size(xc,2));
+ndim  = int32(size(xc,1));
+nelem = int32(size(ia,2));
+nvert_e = int32(3);
+
+
+dlmwrite(fname,nnode,'delimiter','\t','precision',16)                 % number of nodes
+dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16)        % space dimension
+dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16)       % number of elements
+dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16)     % number of vertices per element
+
+% dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16)       % coordinates
+dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
+
+% no subdomain info transferred
+tmp=int32(ia(1:3,:));
+% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16)      % connectivity in Matlab indexing
+dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16)    % connectivity in Matlab indexing
+
+% store only start and end point of boundary edges,
+nbedges = size(e,2);
+dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16)     % number boundary edges
+tmp=int32(e(1:2,:));
+% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16)    % boundary edges in Matlab indexing
+dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16)  % boundary edges in Matlab indexing
+
+end
--- a/ex7/code/task4/accu.template/ascii_write_subdomains.m
+++ b/ex7/code/task4/accu.template/ascii_write_subdomains.m
@ -0,0 +1,51 @@
+function ascii_write_subdomains( xc, ia, e, basename)
+%
+% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
+%  in an ASCII file.
+%  Matlab indexing is stored  (starts with 1).
+% 
+% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
+%
+% IN:
+% coordinates  xc: [2][nnode]
+% connectivity ia: [4][nelem]   with  t(4,:) are the subdomain numbers
+% edges         e: [7][nedges]  boundary edges
+%                              e([1,2],:) - start/end vertex of edge
+%                              e([3,4],:) - start/end values
+%                              e(5,:)     - segment number
+%                              e([6,7],:) - left/right subdomain
+%        basename: file name without extension
+% 
+% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
+%
+fname = [basename, '_sd.txt'];
+
+nnode = int32(size(xc,2));
+ndim  = int32(size(xc,1));
+nelem = int32(size(ia,2));
+nvert_e = int32(3);
+
+
+% dlmwrite(fname,nnode,'delimiter','\t','precision',16)                 % number of nodes
+% dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16)        % space dimension
+% dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16)       % number of elements
+dlmwrite(fname,nelem,'delimiter','\t','precision',16)       % number of elements
+% dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16)     % number of vertices per element
+
+% % dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16)       % coordinates
+% dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
+
+% subdomain info
+tmp=int32(ia(4,:));
+% % dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16)      % connectivity in Matlab indexing
+% dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16)    % connectivity in Matlab indexing
+dlmwrite(fname,tmp(:,:).','-append','delimiter','\t')    % connectivity in Matlab indexing
+
+% % store only start and end point of boundary edges,
+% nbedges = size(e,2);
+% dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16)     % number boundary edges
+% tmp=int32(e(1:2,:));
+% % dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16)    % boundary edges in Matlab indexing
+% dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16)  % boundary edges in Matlab indexing
+
+end
--- a/ex7/code/task4/accu.template/geom.cpp
+++ b/ex7/code/task4/accu.template/geom.cpp
--- a/ex7/code/task4/accu.template/geom.h
+++ b/ex7/code/task4/accu.template/geom.h
@ -0,0 +1,712 @@
+#ifndef GEOM_FILE
+#define GEOM_FILE
+#include <array>
+#include <functional>             // function; C++11
+#include <iostream>
+#include <memory>                  // shared_ptr
+#include <string>
+#include <vector>
+
+/**
+ * Basis class for finite element meshes.
+ */
+class Mesh
+{
+public:
+    /**
+     * Constructor initializing the members with default values.
+      *
+     * @param[in] ndim  space dimensions (dimension for coordinates)
+      * @param[in] nvert_e  number of vertices per element (dimension for connectivity)
+      * @param[in] ndof_e   degrees of freedom per element (= @p nvert_e for linear elements)
+      * @param[in] nedge_e  number of edges per element (= @p nvert_e for linear elements in 2D)
+     */
+    explicit Mesh(int ndim, int nvert_e = 0, int ndof_e = 0, int nedge_e = 0);
+
+    __attribute__((noinline))
+    Mesh(Mesh const &) = default;
+
+    Mesh &operator=(Mesh const &) = delete;
+
+
+    /**
+     * Destructor.
+     *
+     * See clang warning on
+     * <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
+     */
+    virtual ~Mesh();
+
+    /**
+     * Reads mesh data from a binary file.
+     *
+     * File format, see ascii_write_mesh.m
+     *
+     * @param[in] fname file name
+    */
+    explicit Mesh(std::string const &fname);
+
+    /**
+     * Reads mesh data from a binary file.
+     *
+     * File format, see ascii_write_mesh.m
+     *
+     * @param[in] fname file name
+    */
+    void ReadVertexBasedMesh(std::string const &fname);
+
+    /**
+     * Number of finite elements in (sub)domain.
+     * @return number of elements.
+     */
+    int Nelems() const
+    {
+        return _nelem;
+    }
+
+    /**
+     * Global number of vertices for each finite element.
+     * @return number of vertices per element.
+     */
+    int NverticesElements() const
+    {
+        return _nvert_e;
+    }
+
+    /**
+     * Global number of degrees of freedom (dof) for each finite element.
+     * @return degrees of freedom per element.
+     */
+    int NdofsElement() const
+    {
+        return _ndof_e;
+    }
+
+    /**
+     * Number of vertices in mesh.
+     * @return number of vertices.
+     */
+    int Nnodes() const
+    {
+        return _nnode;
+    }
+
+    /**
+     * Space dimension.
+     * @return number of dimensions.
+     */
+    int Ndims() const
+    {
+        return _ndim;
+    }
+
+    /**
+     * (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
+      *
+     * @param[in] nelem    number of elements
+      * @param[in] nvert_e  number of vertices per element
+     */
+    void Resize_Connectivity(int nelem, int nvert_e)
+    {
+        SetNelem(nelem);               // number of elements
+        SetNverticesElement(nvert_e);  // vertices per element
+        _ia.resize(nelem * nvert_e);
+    }
+
+    /**
+     * Read connectivity information (g1,g2,g3)_i.
+     * @return connectivity vector [nelems*ndofs].
+     */
+    const std::vector<int>  &GetConnectivity() const
+    {
+        return _ia;
+    }
+
+    /**
+     * Access/Change connectivity information (g1,g2,g3)_i.
+     * @return connectivity vector [nelems*ndofs].
+     */
+    std::vector<int>  &GetConnectivity()
+    {
+        return _ia;
+    }
+
+    /**
+     * (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
+      *
+     * @param[in] nnodes    number of nodes
+      * @param[in] ndim      space dimension
+     */
+    void Resize_Coords(int nnodes, int ndim)
+    {
+        SetNnode(nnodes);       // number of nodes
+        SetNdim(ndim);          // space dimension
+        _xc.resize(nnodes * ndim);
+    }
+
+    /**
+     * Read coordinates of vertices (x,y)_i.
+     * @return coordinates vector [nnodes*2].
+     */
+    const std::vector<double> &GetCoords() const
+    {
+        return _xc;
+    }
+
+    /**
+     * Access/Change coordinates of vertices (x,y)_i.
+     * @return coordinates vector [nnodes*2].
+     */
+    std::vector<double> &GetCoords()
+    {
+        return _xc;
+    }
+
+    /**
+     * Calculate values in vector @p v via function @p func(x,y)
+     * @param[in] v     vector
+      * @param[in] func  function of (x,y) returning a double value.
+     */
+    void SetValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
+    void SetBoundaryValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
+    void SetDirchletValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
+
+    /**
+     * Prints the information for a finite element mesh
+     */
+    void Debug() const;
+
+    /**
+     * Prints the edge based information for a finite element mesh
+     */
+    void DebugEdgeBased() const;
+
+    /**
+     * Determines the indices of those vertices with Dirichlet boundary conditions
+     * @return index vector.
+     */
+    virtual std::vector<int> Index_DirichletNodes() const;
+    virtual std::vector<int> Index_BoundaryNodes() const;
+
+    /**
+     * Write vector @p v together with its mesh information to an ASCii file @p fname.
+      *
+      * The data are written in C-style.
+      *
+      * @param[in] fname  file name
+     * @param[in] v      vector
+     */
+    void Write_ascii_matlab(std::string const &fname, std::vector<double> const &v) const;
+
+    /**
+     * Exports the mesh information to ASCii files  @p basename + {_coords|_elements}.txt.
+      *
+      * The data are written in C-style.
+      *
+      * @param[in] basename  first part of file names
+     */
+    void Export_scicomp(std::string const &basename) const;
+
+    /**
+     * Visualize @p v together with its mesh information via matlab or octave.
+      *
+      * Comment/uncomment those code lines in method Mesh:Visualize (geom.cpp)
+      * that are supported on your system.
+      *
+     * @param[in] v      vector
+      *
+      * @warning matlab files ascii_read_meshvector.m  visualize_results.m
+      *          must be in the executing directory.
+     */
+    void Visualize(std::vector<double> const &v) const;
+
+    /**
+     * Global number of edges.
+     * @return number of edges in mesh.
+     */
+    int Nedges() const
+    {
+        return _nedge;
+    }
+
+    /**
+     * Global number of edges for each finite element.
+     * @return number of edges per element.
+     */
+    int NedgesElements() const
+    {
+        return _nedge_e;
+    }
+
+    /**
+     * Read edge connectivity information (e1,e2,e3)_i.
+     * @return edge connectivity vector [nelems*_nedge_e].
+     */
+    const std::vector<int>  &GetEdgeConnectivity() const
+    {
+        return _ea;
+    }
+
+    /**
+     * Access/Change edge connectivity information (e1,e2,e3)_i.
+     * @return edge connectivity vector [nelems*_nedge_e].
+     */
+    std::vector<int>  &GetEdgeConnectivity()
+    {
+        return _ea;
+    }
+
+    /**
+     * Read edge information (v1,v2)_i.
+     * @return edge connectivity vector [_nedge*2].
+     */
+    const std::vector<int>  &GetEdges() const
+    {
+        return _edges;
+    }
+
+    /**
+     * Access/Change edge information (v1,v2)_i.
+     * @return edge connectivity vector [_nedge*2].
+     */
+    std::vector<int>  &GetEdges()
+    {
+        return _edges;
+    }
+
+    /**
+     * Determines all node to node connections from the vertex based mesh.
+      *
+     * @return vector[k][] containing all connections of vertex k, including to itself.
+     */
+    std::vector<std::vector<int>> Node2NodeGraph() const
+    {
+        //// Check version 2 wrt. version 1
+        //auto v1=Node2NodeGraph_1();
+        //auto v2=Node2NodeGraph_2();
+        //if ( equal(v1.cbegin(),v1.cend(),v2.begin()) )
+        //{
+        //std::cout << "\nidentical Versions\n";
+        //}
+        //else
+        //{
+        //std::cout << "\nE R R O R   in Versions\n";
+        //}
+
+        //return Node2NodeGraph_1();
+        return Node2NodeGraph_2();        // 2 times faster than version 1
+    }
+
+    /**
+     * Accesses the father-of-nodes relation.
+      *
+     * @return  vector of length 0 because no relation available.
+      *
+     */
+    virtual std::vector<int> const &GetFathersOfVertices() const
+    {
+        return _dummy;
+    }
+
+    /**
+     * Deletes all edge connectivity information (saves memory).
+     */
+    void Del_EdgeConnectivity();
+
+protected:
+    //public:
+    void SetNelem(int nelem)
+    {
+        _nelem = nelem;
+    }
+
+    void SetNverticesElement(int nvert)
+    {
+        _nvert_e = nvert;
+    }
+
+    void SetNdofsElement(int ndof)
+    {
+        _ndof_e = ndof;
+    }
+
+    void SetNnode(int nnode)
+    {
+        _nnode = nnode;
+    }
+
+    void SetNdim(int ndim)
+    {
+        _ndim = ndim;
+    }
+
+    void SetNedge(int nedge)
+    {
+        _nedge = nedge;
+    }
+
+    /**
+     * Reads vertex based mesh data from a binary file.
+     *
+     * File format, see ascii_write_mesh.m
+     *
+     * @param[in] fname file name
+    */
+    void ReadVectexBasedMesh(std::string const &fname);
+
+    /**
+     * The vertex based mesh data are used to derive the edge based data.
+     *
+     *  @warning Exactly 3 vertices, 3 edges per element are assumed (linear triangle in 2D)
+    */
+    void DeriveEdgeFromVertexBased()
+    {
+        //DeriveEdgeFromVertexBased_slow();
+        //DeriveEdgeFromVertexBased_fast();
+        DeriveEdgeFromVertexBased_fast_2();
+    }
+    void DeriveEdgeFromVertexBased_slow();
+    void DeriveEdgeFromVertexBased_fast();
+    void DeriveEdgeFromVertexBased_fast_2();
+
+
+
+    /**
+     * The edge based mesh data are used to derive the vertex based data.
+      *
+      *  @warning Exactly 3 vertices, 3 edges per element are assumed (linear triangle in 2D)
+    */
+    void DeriveVertexFromEdgeBased();
+
+    /**
+     * Determines the indices of those vertices with Dirichlet boundary conditions
+     * @return index vector.
+     */
+    int Nnbedges() const
+    {
+        return static_cast<int>(_bedges.size());
+    }
+
+    /**
+     * Checks whether the array dimensions fit to their appropriate size parameters
+     * @return index vector.
+     */
+    virtual bool Check_array_dimensions() const;
+
+    /**
+     * Permutes the vertex information in an edge based mesh.
+      *
+     * @param[in] old2new   new indices of original vertices.
+     */
+    void PermuteVertices_EdgeBased(std::vector<int> const &old2new);
+
+private:
+    /**
+     * Determines all node to node connections from the vertex based mesh.
+      *
+     * @return vector[k][] containing all connections of vertex k, including to itself.
+     */
+    std::vector<std::vector<int>> Node2NodeGraph_1() const;  // is correct
+
+    /**
+     * Determines all node to node connections from the vertex based mesh.
+      *
+      * Faster than @p Node2NodeGraph_1().
+      *
+     * @return vector[k][] containing all connections of vertex k, including to itself.
+     */
+    std::vector<std::vector<int>> Node2NodeGraph_2() const;  // is correct
+
+    //private:
+protected:
+    int _nelem;         //!< number elements
+    int _nvert_e;       //!< number of vertices per element
+    int _ndof_e;        //!< degrees of freedom (d.o.f.) per element
+    int _nnode;         //!< number nodes/vertices
+    int _ndim;          //!< space dimension of the problem (1, 2, or 3)
+    std::vector<int> _ia;    //!< element connectivity
+    std::vector<double> _xc; //!< coordinates
+
+protected:
+    // B.C.
+    std::vector<int> _bedges;     //!< boundary edges [nbedges][2] storing start/end vertex
+// 2020-01-08
+    std::vector<int> _sdedges;    //!< boundary edges [nbedges][2] with left/right subdomain number
+
+    //private:
+protected:
+    // edge based connectivity
+    int _nedge;              //!< number of edges in mesh
+    int _nedge_e;            //!< number of edges per element
+    std::vector<int> _edges; //!< edges of mesh (vertices ordered ascending)
+    std::vector<int> _ea;    //!< edge based element connectivity
+    // B.C.
+    std::vector<int> _ebedges; //!< boundary edges [nbedges]
+
+private:
+    const std::vector<int> _dummy; //!< empty dummy vector
+
+};
+
+
+// *********************************************************************
+
+class RefinedMesh: public Mesh
+{
+public:
+    /**
+     * Constructs a refined mesh according to the marked elements in @p ibref.
+     *
+     * If the vector @p ibref has size 0 then all elements will be refined.
+     *
+     * @param[in] cmesh  original mesh for coarsening.
+     * @param[in] ibref  vector containing True/False regarding refinement for each element
+     *
+     */
+    //explicit RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref = std::vector<bool>(0));
+    RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref);
+    //RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref);
+
+    /**
+    * Constructs a refined mesh by regulare refinement of all elements.
+    *
+    * @param[in] cmesh  original mesh for coarsening.
+    *
+    */
+    explicit RefinedMesh(Mesh const &cmesh)
+        : RefinedMesh(cmesh, std::vector<bool>(0))
+    {}
+
+
+    RefinedMesh(RefinedMesh const &) = delete;
+    //RefinedMesh(RefinedMesh const&&) = delete;
+
+    RefinedMesh &operator=(RefinedMesh const &) = delete;
+    //RefinedMesh& operator=(RefinedMesh const&&) = delete;
+
+    /**
+     * Destructor.
+     */
+    virtual ~RefinedMesh() override;
+
+    /**
+     * Refines the mesh according to the marked elements.
+     *
+     * @param[in] ibref  vector containing True/False regarding refinement for each element
+     *
+     * @return the refined mesh
+     *
+     */
+    Mesh RefineElements(std::vector<bool> const &ibref);
+
+    /**
+     * Refines all elements in the actual mesh.
+     *
+     * @param[in] nref  number of regular refinements to perform
+     *
+     */
+    void RefineAllElements(int nref = 1);
+
+    /**
+     * Accesses the father-of-nodes relation.
+      *
+     * @return  father-of-nodes relation [nnodes][2]
+      *
+     */
+    std::vector<int> const &GetFathersOfVertices() const override
+    {
+        return _vfathers;
+    }
+
+protected:
+    /**
+     * Checks whether the array dimensions fit to their appropriate size parameters
+     * @return index vector.
+     */
+    bool Check_array_dimensions() const override;
+
+    /**
+     * Permutes the vertex information in an edge based mesh.
+      *
+     * @param[in] old2new   new indices of original vertices.
+     */
+    void PermuteVertices_EdgeBased(std::vector<int> const &old2new);
+
+
+private:
+    //Mesh const              & _cmesh; //!< coarse mesh
+    std::vector<bool> const   _ibref; //!< refinement info
+    int                       _nref;  //!< number of regular refinements performed
+    std::vector<int>          _vfathers; //!< stores the 2 fathers of each vertex (equal fathers denote original coarse vertex)
+
+};
+
+// *********************************************************************
+
+class gMesh_Hierarchy
+{
+public:
+    /**
+     * Constructs mesh hierarchy of @p nlevel levels starting with coarse mesh @p cmesh.
+      * The coarse mesh @p cmesh will be @p nlevel-1 times geometrically refined.
+      *
+      * @param[in] cmesh   initial coarse mesh
+     * @param[in] nlevel  number levels in mesh hierarchy
+      *
+     */
+    gMesh_Hierarchy(Mesh const &cmesh, int nlevel);
+
+    size_t size() const
+    {
+        return _gmesh.size();
+    }
+
+    /**
+     * Access to mesh @p lev from mesh hierarchy.
+      *
+     * @return mesh @p lev
+      * @warning An out_of_range exception might be thrown.
+      *
+     */
+    Mesh const &operator[](int lev) const
+    {
+        return *_gmesh.at(lev);
+    }
+
+    /**
+     * Access to finest mesh in mesh hierarchy.
+      *
+     * @return finest mesh
+      *
+     */
+    Mesh const &finest() const
+    {
+        return *_gmesh.back();
+    }
+
+    /**
+     * Access to coarest mesh in mesh hierarchy.
+      *
+     * @return coarsest mesh
+      *
+     */
+    Mesh const &coarsest() const
+    {
+        return *_gmesh.front();
+    }
+
+private:
+    std::vector<std::shared_ptr<Mesh>> _gmesh; //!< mesh hierarchy from coarse ([0]) to fine.
+
+};
+
+
+
+// *********************************************************************
+/**
+ * 2D finite element mesh of the square consisting of linear triangular elements.
+ */
+class Mesh_2d_3_square: public Mesh
+{
+public:
+    /**
+     * Generates the f.e. mesh for the unit square.
+     *
+     * @param[in] nx    number of discretization intervals in x-direction
+     * @param[in] ny    number of discretization intervals in y-direction
+     * @param[in] myid  my MPI-rank / subdomain
+     * @param[in] procx number of ranks/subdomains in x-direction
+     * @param[in] procy number of processes in y-direction
+    */
+    Mesh_2d_3_square(int nx, int ny, int myid = 0, int procx = 1, int procy = 1);
+
+    /**
+     * Destructor
+     */
+    ~Mesh_2d_3_square() override;
+
+    /**
+     * Set solution vector based on a tensor product grid in the rectangle.
+     * @param[in] u solution vector
+     */
+    void SetU(std::vector<double> &u) const;
+
+    /**
+     * Set right hand side (rhs) vector on a tensor product grid in the rectangle.
+     * @param[in] f rhs vector
+     */
+    void SetF(std::vector<double> &f) const;
+
+    /**
+     * Determines the indices of those vertices with Dirichlet boundary conditions
+     * @return index vector.
+     */
+    std::vector<int> Index_DirichletNodes() const override;
+    std::vector<int> Index_BoundaryNodes() const override;
+
+    /**
+      * Stores the values of vector @p u of (sub)domain into a file @p name for further processing in gnuplot.
+      * The file stores rowise the x- and y- coordinates together with the value from  @p u .
+      * The domain [@p xl, @p xr] x [@p yb, @p yt] is discretized into @p nx x @p ny intervals.
+      *
+      * @param[in] name  basename of file name (file name will be extended by the rank number)
+      * @param[in] u     local vector
+      *
+      * @warning   Assumes tensor product grid in unit square; rowise numbered
+      *            (as generated in class constructor).
+      *            The output is provided for tensor product grid visualization
+      *            ( similar to Matlab-surf() ).
+      *
+      * @see Mesh_2d_3_square
+      */
+    void SaveVectorP(std::string const &name, std::vector<double> const &u) const;
+
+    // here will still need to implement in the class
+    //  GetBound(), AddBound()
+    //  or better a generalized way with indices and their appropriate ranks for MPI communication
+
+private:
+    /**
+      * Determines the coordinates of the discretization nodes of the domain [@p xl, @p xr] x [@p yb, @p yt]
+      * which is discretized into @p nx x @p ny intervals.
+      * @param[in] nx    number of discretization intervals in x-direction
+      * @param[in] ny    number of discretization intervals in y-direction
+      * @param[in] xl    x-coordinate of left boundary
+      * @param[in] xr    x-coordinate of right boundary
+      * @param[in] yb    y-coordinate of lower boundary
+      * @param[in] yt    y-coordinate of upper boundary
+      * @param[out] xc   coordinate vector of length 2n with x(2*k,2*k+1) as coordinates of node k
+      */
+
+    void GetCoordsInRectangle(int nx, int ny, double xl, double xr, double yb, double yt,
+                              double xc[]);
+    /**
+      * Determines the element connectivity of linear triangular elements of a FEM discretization
+      * of a rectangle using @p nx x @p ny equidistant intervals for discretization.
+      * @param[in] nx    number of discretization intervals in x-direction
+      * @param[in] ny    number of discretization intervals in y-direction
+      * @param[out] ia   element connectivity matrix with ia(3*s,3*s+1,3*s+2) as node numbers od element s
+      */
+    void GetConnectivityInRectangle(int nx, int ny, int ia[]);
+
+private:
+    int _myid;          //!< my MPI rank
+    int _procx;         //!< number of MPI ranks in x-direction
+    int _procy;         //!< number of MPI ranks in y-direction
+    std::array<int, 4> _neigh; //!< MPI ranks of neighbors (negative: no neighbor but b.c.)
+    int _color;         //!< red/black coloring (checker board) of subdomains
+
+    double _xl;         //!< x coordinate of lower left  corner of square
+    double _xr;         //!< x coordinate of lower right corner of square
+    double _yb;         //!< y coordinate or lower left  corner of square
+    double _yt;         //!< y coordinate of upper right corner of square
+    int    _nx;         //!< number of intervals in x-direction
+    int    _ny;         //!< number of intervals in y-direction
+};
+
+// *********************************************************************
+
+
+
+
+#endif
--- a/ex7/code/task4/accu.template/main.cpp
+++ b/ex7/code/task4/accu.template/main.cpp
@ -0,0 +1,105 @@
+//		MPI code in C++.
+//		See [Gropp/Lusk/Skjellum, "Using MPI", p.33/41 etc.]
+//		and  /opt/mpich/include/mpi2c++/comm.h  for details
+
+#include "geom.h"
+#include "par_geom.h"
+#include "vdop.h"
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+#include <mpi.h>            // MPI
+#include <omp.h>            // OpenMP
+using namespace std;
+
+
+int main(int argc, char **argv )
+{
+    MPI_Init(&argc, &argv);
+    MPI_Comm const icomm(MPI_COMM_WORLD);
+    omp_set_num_threads(1);                   // don't use OMP parallelization for a start
+//
+   {
+       int np;
+       MPI_Comm_size(icomm, &np);
+
+    //    assert(4 == np);                  // example is only provided for 4 MPI processes
+   }
+// #####################################################################
+// ---- Read the f.e. mesh and the mapping of elements to MPI processes
+    //Mesh const mesh_c("square_4.txt");    //    Files square_4.txt and square_4_sd.txt  are needed
+    ParMesh const mesh("square",icomm);
+    
+    int const numprocs = mesh.NumProcs();
+    int const myrank   = mesh.MyRank();
+    if ( 0 == myrank ) {
+        cout << "\n There are " << numprocs << " processes running.\n \n";
+    }
+        
+    int const check_rank=1;               // choose the MPI process you would like to check the mesh
+    //if ( check_rank == myrank ) mesh.Debug();
+    //if ( check_rank == myrank ) mesh.DebugEdgeBased();
+    
+
+// ##########################################################################
+// ---- allocate local vectors and check skalar product and vector accumulation
+        
+    if (check_rank==myrank) {printf("\n\n-------------- Task 9 --------------\n\n");}
+    if (check_rank==myrank) cout << "Mesh coordinates: " << mesh.GetCoords() << endl << endl;
+    MPI_Barrier(icomm);
+    vector<double> xl(mesh.Nnodes(), 1.0);
+
+    // for visualization I had to type in terminal:
+    // export LIBGL_ALWAYS_SOFTWARE=1
+    if (check_rank==myrank) mesh.Visualize(xl);
+
+    double ss = mesh.dscapr(xl,xl);
+    cout << myrank << " : scalar : " << ss << endl;
+
+    mesh.VecAccu(xl);
+    if (check_rank==myrank) mesh.Visualize(xl);
+
+
+    MPI_Barrier(icomm);
+    if (check_rank==myrank) {printf("\n\n-------------- Task 10 --------------\n\n");}
+    vector<int> y(mesh.Nnodes(), 1);
+    mesh.VecAccuInt(y);
+    if (check_rank==myrank) {
+        printf("Accumulated integer vector y:\n");
+        for (int i : y) {
+            cout << i << " ";
+        }
+    }
+
+
+    MPI_Barrier(icomm);
+    if (check_rank==myrank) {printf("\n\n-------------- Task 11 --------------\n\n");}
+    int global_nodes = mesh.GlobalNodes();
+    if (check_rank==myrank) cout << "Global nodes: " << global_nodes << endl;
+
+
+    MPI_Barrier(icomm);
+    if (check_rank==myrank) {printf("\n\n-------------- Task 12 --------------\n\n");}
+
+    // Set xl to 1s vector again
+    for (size_t k=0; k<xl.size(); ++k)
+    {
+        xl[k] = 1.0;
+    }
+    if (check_rank==myrank) mesh.Visualize(xl);
+    mesh.Average(xl);
+    if (check_rank==myrank) mesh.Visualize(xl);
+
+
+    // -------------- Task 13 --------------
+    // Should work with 2, 4 and 6 subdomains (change run target in GCC_default.mk)
+    // Check subdomains with different values for check_rank (0-5)
+
+
+
+    MPI_Finalize();
+    return 0;
+}
+
+
--- a/ex7/code/task4/accu.template/par_geom.cpp
+++ b/ex7/code/task4/accu.template/par_geom.cpp
@ -0,0 +1,626 @@
+// see:   http://llvm.org/docs/CodingStandards.html#include-style
+#include "vdop.h"
+//#include "geom.h"
+#include "par_geom.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <ctime>                  // contains clock()
+#include <fstream>
+#include <iostream>
+#include <list>
+#include <numeric>                // accumulate()
+#include <string>
+#include <vector>
+
+using namespace std;
+
+
+ParMesh::ParMesh(int ndim, int nvert_e, int ndof_e, int nedge_e, MPI_Comm const &icomm)
+    : Mesh(ndim, nvert_e, ndof_e, nedge_e),
+      _icomm(icomm), _numprocs(-1), _myrank(-1),
+      _v_l2g(0), _t_l2g(0), _v_g2l{{}}, _t_g2l{{}}, _valence(0),
+      _sendbuf(0), _sendcounts(0), _sdispls(0),
+      _loc_itf(0), _gloc_itf(0), _buf2loc(0)
+{
+    MPI_Comm_size(icomm, &_numprocs);
+    MPI_Comm_rank(icomm, &_myrank);
+}
+
+ParMesh::~ParMesh()
+{}
+
+
+
+ParMesh::ParMesh(std::string const &sname, MPI_Comm const &icomm)
+    : ParMesh(2, 3, 3, 3, icomm) // two dimensions, 3 vertices, 3 dofs, 3 edges per element
+{
+    //const int numprocs = _icomm.Get_size();
+    const string NS    = "_" + to_string(_numprocs);
+    const string fname = sname + NS + ".txt";
+    //cout << "############  " << fname << endl;
+    ReadVertexBasedMesh(fname);
+    cout << "\n End of sequential File read \n";
+    // ------------------------------------------------------------------------------
+    // Until this point  a l l  processes possess  a l l  mesh info in  g l o b a l  numbering
+    //
+    // Now, we have to select the data belonging to my_rank
+    // and we have to create the mapping local to global (l2g) and vice versa (g2l)
+    // ------------------------------------------------------------------------------
+
+    // save the global node mesh (maybe we need it later)
+    DeriveEdgeFromVertexBased();                     //                    and even more
+    Mesh global_mesh(*this);                         // requires a  l o t  of memory
+    Del_EdgeConnectivity();
+
+    // read the subdomain info
+    const string dname = sname + NS + "_sd" + ".txt";
+    vector<int> t2d = ReadElementSubdomains(dname);  // global mapping triangle to subdomain for all elements
+
+    //const int myrank   = _icomm.Get_rank();
+    Transform_Local2Global_Vertex(_myrank, t2d);      // Vertex based mesh: now in  l o c a l  indexing
+
+    DeriveEdgeFromVertexBased();                     // Generate also the  l o c a l  edge based information
+
+    Generate_VectorAdd();
+
+
+    // Now we have to organize the MPI communication of vertices on the subdomain interfaces
+
+    return;
+}
+
+vector<int> ParMesh::ReadElementSubdomains(string const &dname)
+{
+    ifstream ifs(dname);
+    if (!(ifs.is_open() && ifs.good())) {
+        cerr << "ParMesh::ReadElementSubdomain: Error cannot open file " << dname << endl;
+        assert(ifs.is_open());
+    }
+
+    int const OFFSET{1};             // Matlab to C indexing
+    cout << "ASCI file  " << dname << "  opened" << endl;
+
+    // Read some mesh constants
+    int nelem;
+    ifs >> nelem;
+    cout << nelem << "  " << Nelems() << endl;
+    assert( Nelems() == nelem);
+
+    // Allocate memory
+    vector<int> t2d(nelem, -1);
+    // Read element mapping
+    for (int k = 0; k < nelem; ++k) {
+        int tmp;
+        ifs >> tmp;
+        //t2d[k] = tmp - OFFSET;
+        // 2020-01-08 
+        t2d[k] = min(tmp, NumProcs()) - OFFSET;
+    }
+
+    return t2d;
+}
+
+void ParMesh::Transform_Local2Global_Vertex(int const myrank, vector<int> const &t2d)
+{
+    // number of local elements
+    const int l_ne = count(t2d.cbegin(), t2d.cend(), myrank);
+    //cout << myrank <<  "::  " << lne << endl;
+    vector<int> l_ia(l_ne * NverticesElements(), -1); // local elements still with global vertex numbers
+    _t_l2g.resize(l_ne, -1);
+
+    int lk = 0;
+    for (size_t k = 0; k < t2d.size(); ++k) {
+        if (myrank == t2d[k]) {
+            //if (0==myrank)
+            //{
+            //cout << lk << "   k  " <<  t2d[k] << endl;
+            //}
+            l_ia[3 * lk  ] = _ia[3 * k  ];
+            l_ia[3 * lk + 1] = _ia[3 * k + 1];
+            l_ia[3 * lk + 2] = _ia[3 * k + 2]; // local elements still with global vertex numbers
+            _t_l2g[lk] = k;                       // elements: local to global mapping
+            _t_g2l[k] = lk;                       //           global to local
+            ++lk;
+        }
+    }
+    // Checks:
+    assert( count(l_ia.cbegin(), l_ia.cend(), -1)   == 0 );
+    assert( count(_t_l2g.cbegin(), _t_l2g.cend(), -1) == 0 );
+
+    // Vertices: local to global mapping
+    auto tmp = l_ia;
+    sort(tmp.begin(), tmp.end());
+    auto ip = unique(tmp.begin(), tmp.end());
+    tmp.erase(ip, tmp.end());
+    _v_l2g = tmp;                                 // Vertices: local to global mapping
+    for (size_t lkv = 0; lkv < _v_l2g.size(); ++lkv) {
+        _v_g2l[_v_l2g[lkv]] = lkv;                //           global to local
+    }
+
+    // Boundary edges
+    vector<int> l_bedges;
+    vector<int> l_sdedges;
+    for (size_t b = 0; b < _bedges.size(); b += 2) {
+        int const v1 = _bedges[b   ];             // global vertex numbers
+        int const v2 = _bedges[b + 1];
+        try {
+            int const lv1 = _v_g2l.at(v1);         // map[] would add that element
+            int const lv2 = _v_g2l.at(v2);         //       but at() throws an exeption
+            l_bedges.push_back(lv1);
+            l_bedges.push_back(lv2);               // Boundaries: already in local indexing
+            // 2020-01-08
+            l_sdedges.push_back(_sdedges[b  ]);
+            l_sdedges.push_back(_sdedges[b+1]);
+        }
+        catch (std::out_of_range & err) {
+            //cerr << ".";
+        }
+    }
+
+    // number of local vertices
+    const int l_nn = _v_l2g.size();
+    vector<double> l_xc(Ndims()*l_nn);
+    for (int lkk = 0; lkk < l_nn; ++lkk) {
+        int k = _v_l2g.at(lkk);
+        l_xc[2 * lkk  ]   = _xc[2 * k  ];
+        l_xc[2 * lkk + 1] = _xc[2 * k + 1];
+    }
+
+
+    // Now, we represent the vertex mesh in  l o c a l  numbering
+    // elements
+
+    for (size_t i = 0; i < l_ia.size(); ++i) {
+        l_ia[i] = _v_g2l.at(l_ia[i]);              // element vertices: global to local
+    }
+    SetNelem(l_ne);
+    _ia = l_ia;
+    // boundary
+    _bedges = l_bedges;
+    _sdedges = l_sdedges;
+    // coordinates
+    SetNnode(l_nn);
+    _xc = l_xc;
+
+    return;
+}
+
+
+void ParMesh::Generate_VectorAdd()
+{
+    // Some checks
+    int lnn = Nnodes();                           // local number of vertices
+    assert(static_cast<int>(_v_l2g.size()) == lnn);
+    int ierr{-12345};
+
+    // ---- Determine global largest vertex index
+    int gidx_max{-1};                             // global largest vertex index
+    int lmax = *max_element(_v_l2g.cbegin(), _v_l2g.cend());
+    MPI_Allreduce(&lmax, &gidx_max, 1, MPI_INT, MPI_MAX, _icomm);
+    int gidx_min{-1};                             // global smallest vertex index
+    int lmin = *min_element(_v_l2g.cbegin(), _v_l2g.cend());
+    MPI_Allreduce(&lmin, &gidx_min, 1, MPI_INT, MPI_MIN, _icomm);
+    //cout << gidx_min << "  " << gidx_max << endl;
+    assert(0 == gidx_min);                        // global indices have to start with 0
+
+
+    // ---- Determine for all global vertices the number of subdomains it belongs to
+    vector<int> global(gidx_max+1, 0);              // global scalar array for vertices
+    for (auto const gidx : _v_l2g)  global[gidx] = 1;
+    // https://www.mpi-forum.org/docs/mpi-2.2/mpi22-report/node109.htm
+    ierr = MPI_Allreduce(MPI_IN_PLACE, global.data(), global.size(), MPI_INT, MPI_SUM, _icomm);
+    //if (0 == MyRank())  cout << global << endl;
+    //MPI_Barrier(_icomm);
+    //cout << _xc[2*_v_g2l.at(2)] << " , " << _xc[2*_v_g2l.at(2)+1] << endl;
+    //MPI_Barrier(_icomm);
+
+    //  now, global[] contains the number of subdomains a global vertex belongs to
+    if ( count(global.cbegin(), global.cend(), 0) > 0 )
+        cerr << "\n !!!   Non-continuous global vertex indexing  !!!\n";
+
+    // ---- Determine local interface vertices ( <==> global[] > 1 )
+    //      _loc_itf,  neigh_itf
+    //vector<int> loc_itf;                          // local indices of interface vertices on this MPI process
+    for (size_t lk = 0; lk < _v_l2g.size(); ++lk) {
+        int const gk = _v_l2g[lk];                // global index of local vertex lk
+        if ( global[gk] > 1 ) {
+            _loc_itf.push_back(lk);               // local indices of interface vertices on this MPI process
+        }
+    }
+
+    //MPI_Barrier(_icomm);
+    //if (0 == MyRank())  cout << "\n..._loc_itf...\n" << _loc_itf << "\n......\n";
+    //MPI_Barrier(_icomm);
+    // ---- global indices of local interface vertices
+    //auto gloc_itf(_loc_itf);
+    _gloc_itf=_loc_itf;
+    for_each(_gloc_itf.begin(), _gloc_itf.end(), [this] (auto & v) -> void { v = _v_l2g[v];} );
+    //MPI_Barrier(_icomm);
+    //if (0 == MyRank())  cout << "\n..._gloc_itf...\n" << _gloc_itf << "\n......\n";
+    //DebugVector(_gloc_itf,"_gloc_itf");
+
+    // ---- Determine the global length of interfaces
+    vector<int> vnn(NumProcs(), -1);              // number of interface vertices per MPI rank
+    int l_itf(_loc_itf.size());                    // # local interface vertices
+    ierr = MPI_Allgather(&l_itf, 1, MPI_INT, vnn.data(), 1, MPI_INT, _icomm);
+    assert(0 == ierr);
+    //cout << vnn << endl;
+
+    // ---- Now we consider only the inferface vertices
+    int snn = accumulate(vnn.cbegin(), vnn.cend(), 0); // required length of array for global interface indices
+    //cout << snn << "   " << gnn << endl;
+    vector<int> dispnn(NumProcs(), 0) ;           // displacement of interface vertices per MPI rank
+    partial_sum(vnn.cbegin(), vnn.cend() - 1, dispnn.begin() + 1);
+    //cout << dispnn << endl;
+
+    // ---- Get the global indices for all global interfaces
+    vector<int> g_itf(snn, -1);                    // collects all global indices of the global interfaces
+    // https://www.mpich.org/static//docs/v3.0.x/www3/MPI_Gatherv.html
+    ierr = MPI_Gatherv( _gloc_itf.data(), _gloc_itf.size(), MPI_INT,
+                        g_itf.data(), vnn.data(), dispnn.data(), MPI_INT, 0, _icomm);
+    assert(0 == ierr);
+    // https://www.mpich.org/static/docs/v3.1/www3/MPI_Bcast.html
+    ierr = MPI_Bcast(g_itf.data(), g_itf.size(), MPI_INT, 0, _icomm);
+    assert(0 == ierr);                            // Now, each MPI rank has the all global indices of the global interfaces
+    //MPI_Barrier(_icomm);
+    //if (MyRank() == 0) cout << "\n...g_itf...\n" << g_itf << "\n......\n";
+    //MPI_Barrier(_icomm);
+
+    // ----- Determine all MPI ranks a local interface vertex belongs to
+    vector<vector<int>> neigh_itf(_loc_itf.size());// subdomains a local interface vertex belongs to
+    for (size_t lk = 0; lk < _loc_itf.size(); ++lk) {
+        const int gvert = _gloc_itf[lk];           // global index of local interface node lk
+        for (int rank = 0; rank < NumProcs(); ++rank) {
+            auto const startl = g_itf.cbegin() + dispnn[rank];
+            auto const endl   = startl + vnn[rank];
+            if ( find( startl, endl, gvert) != endl) {
+                neigh_itf[lk].push_back(rank);
+            }
+        }
+    }
+
+    // ---- check the available info in  _loc_itf[lk], _gloc_itf[lk], neigh_itf[lk]
+    //MPI_Barrier(_icomm);
+    ////if (MyRank()==0)  cout << "\n...neigh_itf ...\n"  << neigh_itf << endl;
+    //if (MyRank() == 0) {
+        //for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
+            //cout << lk << " : local idx " << _loc_itf[lk] << " , global idx " <<  _gloc_itf[lk];
+            //cout << "  with MPI ranks " << neigh_itf[lk] << endl;
+        //}
+    //}
+    //MPI_Barrier(_icomm);
+    
+    // ---- store the valence (e.g., the number of subdomains it belongs to) of all local vertices
+    _valence.resize(Nnodes(),1);
+    for (size_t lk = 0; lk < _loc_itf.size(); ++lk)
+    {
+        _valence[_loc_itf[lk]] = neigh_itf[lk].size();
+    }
+    //DebugVector(_valence,"_valence",_icomm);
+
+    // ---- We ware going to use MPI_Alltoallv for data exchange on interfaces
+    //      https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node109.htm#Node109
+    //      https://www.open-mpi.org/doc/v4.0/man3/MPI_Alltoallv.3.php
+    //int MPI_Alltoallv(const void* sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void* recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm)
+    //
+    //  MPI_Alltoallv needs:
+    //      vector<double> sendbuf             (MPI_IN_PLACE: used also as recvbuf)
+    //      vector<int> sendcounts             (the same as for recv)
+    //      vector<int> sdispls                (the same as for recv)
+    //
+    //  We need to map the interface vertices onto the sendbuffer:
+    //      vector<int> loc_itf                 local  index of interface vertex lk
+    //      vector<int> gloc_itf                global index of interface vertex lk
+    //      vector<int> buf2loc                 local indices of sendbuffer positions (the same as for recv)
+
+    // ---- Determine sendcounts[] and sdipls[] from neigh_itf[]
+    //vector<int> _sendcounts(NumProcs(), 0);
+    _sendcounts.resize(NumProcs(), 0);
+    for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
+        auto const &kneigh = neigh_itf[lk];
+        for (size_t ns = 0; ns < kneigh.size(); ++ns) {
+            ++_sendcounts[kneigh[ns]];
+        }
+    }
+    //if (MyRank() == 0)  cout << "\n..._sendcounts ...\n"  << _sendcounts << endl;
+
+    //vector<int> _sdispls(NumProcs(), 0);
+    _sdispls.resize(NumProcs(), 0);
+    partial_sum(_sendcounts.cbegin(), _sendcounts.cend() - 1, _sdispls.begin() + 1);
+    //vector<int> _sdispls(NumProcs()+1, 0);
+    //partial_sum(_sendcounts.cbegin(), _sendcounts.cend(), _sdispls.begin() + 1);
+    //if (MyRank() == 0)  cout << "\n..._sdispls ...\n"  << _sdispls << endl;
+
+    // ---- Determine size of buffer 'nbuffer' and mapping 'buf2loc'
+    int const nbuffer = accumulate(_sendcounts.cbegin(), _sendcounts.cend(), 0);
+    //vector<int> _buf2loc(nbuffer, -1);
+    _buf2loc.resize(nbuffer, -1);
+    int buf_idx = 0;                              // position in buffer
+    for (int rank = 0; rank < NumProcs(); ++rank) {
+        assert( buf_idx ==  _sdispls[rank]);
+        for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
+            auto const &kneigh = neigh_itf[lk];
+            if (find(kneigh.cbegin(),kneigh.cend(),rank)!=kneigh.cend())
+            {
+               _buf2loc[buf_idx] =  _loc_itf[lk];
+               ++buf_idx;
+            }
+        }
+    }
+    //if (MyRank() == 0)  cout << "\n...buf2loc ...\n"  << buf2loc << endl;
+    //DebugVector(buf2loc,"buf2loc",_icomm);
+
+    // ---- Allocate send/recv buffer
+    //vector<double> _sendbuf(nbuffer,-1.0);
+    _sendbuf.resize(nbuffer,-1.0);
+
+    assert(CheckInterfaceExchange_InPlace());
+    cout << " Check of data exchange (InPlace) successful!\n";
+    assert(CheckInterfaceExchange());
+    cout << " Check of data exchange successful!\n";
+    assert(CheckInterfaceAdd_InPlace());
+    cout << " Check of data add successful!\n";
+    assert(CheckInterfaceAdd());
+    cout << " Check of data add (InPlace) successful!\n";
+    
+    vector<double> x(Nnodes(),-1.0);
+    VecAccu(x);
+    cout << " VecAccu (InPlace) successful!\n";
+
+    
+    return;
+}
+
+bool ParMesh::CheckInterfaceExchange_InPlace() const
+{
+    vector<double> x(Nnodes(),-1.0);
+    copy(_v_l2g.cbegin(),_v_l2g.cend(),x.begin());       // init x with global vertex indices
+
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        _sendbuf[ls] = x[_buf2loc.at(ls)];
+    }
+    int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
+                          _sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
+    assert(ierr==0);
+    //DebugVector(_sendbuf,"_sendbuf",_icomm);
+
+    vector<double> y(x);
+    for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = -1.0;   // only for interface nodes
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        y[_buf2loc.at(ls)] = _sendbuf[ls];
+    }
+
+    double const  eps=1e-10;
+    bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
+                          [eps](double a, double b) -> bool
+                          { return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
+                   );
+    return bv;
+}
+
+bool ParMesh::CheckInterfaceExchange() const
+{
+    vector<double> x(Nnodes(),-1.0);
+    copy(_v_l2g.cbegin(),_v_l2g.cend(),x.begin());       // init x with global vertex indices
+
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        _sendbuf[ls] = x[_buf2loc.at(ls)];
+    }
+    vector<double> recvbuf(_sendbuf.size());
+    int ierr = MPI_Alltoallv(_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
+                              recvbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
+    //DebugVector(_sendbuf,"_sendbuf",_icomm);
+    //DebugVector(recvbuf,"recvbuf",_icomm);
+    assert(ierr==0);
+
+    vector<double> y(x);
+    for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = -1.0;   // only for interface nodes
+    for(size_t ls = 0; ls<recvbuf.size(); ++ls)
+    {
+        y[_buf2loc.at(ls)] = recvbuf[ls];
+    }
+    //cout << "WRONG  : " << count(y.cbegin(),y.cend(), -1.0) << endl;
+
+    double const  eps=1e-10;
+    bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
+                          [eps](double a, double b) -> bool
+                          { return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
+                   );
+    return bv;
+}
+
+bool ParMesh::CheckInterfaceAdd_InPlace() const
+{
+    vector<double> x(Nnodes(),-1.0);
+    for (size_t i=0; i<x.size(); ++i)
+    {
+        x[i] = _xc[2*i]+_xc[2*i+1];                      // init x with coordinate values
+    }
+
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        _sendbuf[ls] = x[_buf2loc.at(ls)];
+    }
+    int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
+                          _sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
+    assert(ierr==0);
+    //DebugVector(_sendbuf,"_sendbuf",_icomm);
+
+    vector<double> y(x);
+    for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = 0.0;   // only for interface nodes
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        y[_buf2loc.at(ls)] += _sendbuf[ls];
+    }
+    MPI_Barrier(_icomm);
+    //DebugVector(x,"x",_icomm);
+    //DebugVector(y,"y",_icomm);
+    for (size_t i= 0; i<y.size(); ++i) y[i]/=_valence[i];   // divide by valence
+
+    double const  eps=1e-10;
+    bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
+                          [eps](double a, double b) -> bool
+                          { return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
+                   );
+    return bv;
+}
+
+bool ParMesh::CheckInterfaceAdd() const
+{
+    vector<double> x(Nnodes(),-1.0);
+    for (size_t i=0; i<x.size(); ++i)
+    {
+        //x[i] = _xc[2*i]+_xc[2*i+1];                      // init x with coordinate values
+        x[i] = _v_l2g[i];
+    }
+    
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        _sendbuf[ls] = x[_buf2loc.at(ls)];
+    }
+    vector<double> recvbuf(_sendbuf.size());
+    int ierr = MPI_Alltoallv(_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
+                              recvbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
+    //DebugVector(_sendbuf,"_sendbuf",_icomm);
+    //DebugVector(recvbuf,"recvbuf",_icomm);
+    assert(ierr==0);
+ 
+    vector<double> y(x);
+    for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = 0.0;   // only for interface nodes
+    for(size_t ls = 0; ls<recvbuf.size(); ++ls)
+    {
+        //if (0==MyRank()) cout << ls << ": " << _buf2loc.at(ls) << "  " << y[_buf2loc.at(ls)] << "("<< x[_buf2loc.at(ls)] << ")" << "  " << recvbuf[ls] << "  (" <<  _sendbuf[ls] << ")" << endl;
+        y[_buf2loc.at(ls)] += recvbuf[ls];
+    }
+    MPI_Barrier(_icomm);
+    //DebugVector(x,"x",_icomm);
+    //DebugVector(y,"y",_icomm);
+    for (size_t i= 0; i<y.size(); ++i) y[i]/=_valence[i];   // divide by valence
+
+    double const  eps=1e-10;
+    bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
+                          [eps](double a, double b) -> bool
+                          { return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
+                   );
+    return bv;
+}
+
+
+// ----------
+
+void ParMesh::VecAccu(std::vector<double> &w) const
+{
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        _sendbuf[ls] = w[_buf2loc.at(ls)];
+    }
+    int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
+                          _sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
+    assert(ierr==0);
+    //DebugVector(_sendbuf,"_sendbuf",_icomm);
+
+    for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0;   // only for interface nodes
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        w[_buf2loc.at(ls)] += _sendbuf[ls];
+    }
+
+    return;
+}
+
+
+// ##########################################################################
+// ##########################################################################
+
+
+// ---- EX10 ----
+void ParMesh::VecAccuInt(std::vector<int> &w) const
+{
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        _sendbuf[ls] = w[_buf2loc.at(ls)];
+    }
+    int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
+                          _sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
+    assert(ierr==0);
+    //DebugVector(_sendbuf,"_sendbuf",_icomm);
+
+    for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0;   // only for interface nodes
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        w[_buf2loc.at(ls)] += _sendbuf[ls];
+    }
+
+    return;
+}
+
+
+// ---- EX11 ----
+int ParMesh::GlobalNodes() const
+{
+    int local_count = 0;
+    for (int i=0; i<Nnodes(); ++i) {
+        local_count += 1.0 / _valence[i];
+    }
+
+    int global_nodes = 0;
+    MPI_Allreduce(&local_count, &global_nodes, 1, MPI_INT, MPI_SUM, _icomm);
+
+    return global_nodes;
+}
+
+
+// ---- EX12 ----
+void ParMesh::Average(std::vector<double> &w) const
+{
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        _sendbuf[ls] = w[_buf2loc.at(ls)];
+    }
+    int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
+                          _sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
+    assert(ierr==0);
+    //DebugVector(_sendbuf,"_sendbuf",_icomm);
+
+    for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0;   // only for interface nodes
+    for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
+    {
+        w[_buf2loc.at(ls)] += _sendbuf[ls];
+    }
+
+    // Divide interface nodes value by its valence
+    for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] /= _valence[_loc_itf.at(lk)];
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/ex7/code/task4/accu.template/par_geom.h
+++ b/ex7/code/task4/accu.template/par_geom.h
@ -0,0 +1,150 @@
+#ifndef PAR_GEOM_FILE
+#define PAR_GEOM_FILE
+#include "geom.h"
+#include "vdop.h"
+#include <array>
+#include <functional>             // function; C++11
+#include <iostream>
+#include <map>
+#include <memory>                 // shared_ptr
+#include <mpi.h>                  // MPI
+#include <string>
+#include <vector>
+
+class ParMesh: public Mesh
+{
+public:
+    /**
+     * Constructor initializing the members with default values.
+      *
+      * @param[in] ndim     space dimensions (dimension for coordinates)
+      * @param[in] nvert_e  number of vertices per element (dimension for connectivity)
+      * @param[in] ndof_e   degrees of freedom per element (= @p nvert_e for linear elements)
+      * @param[in] nedge_e  number of edges per element (= @p nvert_e for linear elements in 2D)
+      * @param[in] icomm    MPI communicator
+     */
+    explicit ParMesh(int ndim, int nvert_e = 0, int ndof_e = 0, int nedge_e = 0, MPI_Comm const &icomm = MPI_COMM_WORLD);
+
+    ParMesh(ParMesh const &) = default;
+
+    ParMesh &operator=(ParMesh const &) = delete;
+
+
+    /**
+     * Destructor.
+     *
+     * See clang warning on
+     * <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
+     */
+    virtual ~ParMesh();
+
+    /**
+     * Reads mesh data from a binary file.
+     *
+     * @param[in] sname suffix of file name
+     * @param[in] icomm  MPI communicator
+     * @see  ascii_write_mesh.m for the file format.
+    */
+    explicit ParMesh(std::string const &sname, MPI_Comm const &icomm = MPI_COMM_WORLD);
+
+    void VecAccu(std::vector<double> &w) const;
+    void VecAccuInt(std::vector<int> &w) const;
+    int GlobalNodes() const;
+    void Average(std::vector<double> &w) const;
+
+    /**  Inner product
+     * @param[in] x  vector
+     * @param[in] y  vector
+     * @return       resulting Euclidian inner product <x,y>
+    */
+    double dscapr(std::vector<double> const &x, std::vector<double> const &y) const
+    {
+        return par_scalar(x, y, _icomm);
+    }
+
+private:
+    /**
+     * Reads the global triangle to subdomain mapping.
+     *
+     * @param[in] dname file name
+     *
+     * @see ascii_write_subdomains.m for the file format
+    */
+    std::vector<int> ReadElementSubdomains(std::string const &dname);
+
+
+    /**
+     * Transform
+      *
+     * @param[in] myrank MPI rank of this process
+      * @param[in] t2d       global mapping triangle to subdomain for all elements (vertex based)
+    */
+    void Transform_Local2Global_Vertex(int myrank, std::vector<int> const &t2d);
+
+
+    /**
+     * Transform
+    */
+    void Generate_VectorAdd();
+
+    bool CheckInterfaceExchange_InPlace() const;
+    bool CheckInterfaceExchange() const;
+    bool CheckInterfaceAdd_InPlace() const;
+    bool CheckInterfaceAdd() const;
+
+
+public:
+    /**     MPI rank of the calling process in communication group.
+    *
+    * @return       MPI rank of the calling process
+    */
+    int MyRank() const
+    {
+        return _myrank;
+    }
+
+    /**  Number of MPI processes in communication group.
+     *
+     * @return       Number of MPI processes
+    */
+    int NumProcs() const
+    {
+        return _numprocs;
+    }
+
+    /**  Returns recent
+     * @return       MPI communicator
+    */
+    MPI_Comm GetCommunicator() const
+    {
+        return _icomm;
+    }
+
+private:
+    // Don't use  &_icomm  ==> Error
+    MPI_Comm const _icomm;                     //!< MPI communicator for the group of processes
+    int _numprocs;                             //!< number of MPI processes
+    int _myrank;                               //!< my MPI rank
+    std::vector<int> _v_l2g;                   //!< vertices:  local to global mapping
+    std::vector<int> _t_l2g;                   //!< triangles: local to global mapping
+    std::map<int, int> _v_g2l;                 //!< vertices:  global to local mapping
+    std::map<int, int> _t_g2l;                 //!< triangles: global to local mapping
+
+    //std::vector<int> e_l2g;                    //!< edges:     local to global mapping
+
+    std::vector<int> _valence;                 //!< valence of local vertices, i.e. number of subdomains they belong to
+    //  MPI_Alltoallv needs:
+    mutable std::vector<double> _sendbuf;      //!< send buffer  a n d  receiving buffer (MPI_IN_PLACE)
+    std::vector<int>    _sendcounts;           //!< number of data to send to each MPI rank (the same as for recv)
+    std::vector<int>    _sdispls;              //!< offset of data to send to each MPI rank wrt. _senbuffer (the same as for recv)
+    //
+    //  We need to map the interface vertices onto the sendbuffer:
+    std::vector<int> _loc_itf;                 //!< local  index of interface vertex lk
+    std::vector<int> _gloc_itf;                //!< global index of interface vertex lk
+    std::vector<int> _buf2loc;                 //!< local indices of sendbuffer positions (the same as for recv)
+
+
+};
+
+
+#endif
--- a/ex7/code/task4/accu.template/square_2.m
+++ b/ex7/code/task4/accu.template/square_2.m
@ -0,0 +1,71 @@
+% Square: 
+%   flatpak run org.octave.Octave <filename>
+%      or
+%   octave --no-window-system --no-gui  -qf <filename>
+
+clear all
+clc
+% %% L-shape
+% g=[2 0 2 0 0 1 0;        % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 0 1 1 0;
+%    2 2 1 1 0.5 1 0;
+%    2 1 1 0.5 2 1 0;
+%    2 1 0 2 2 1 0;
+%    2 0 0 2 0 1 0]';
+
+%% square
+% g=[2 0 1 0 0 1 0;        % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 0 1 1 0;
+%    2 1 0 1 1 1 0;
+%    2 0 0 1 0 1 0]';
+
+%% 2 squares
+g=[2 0 1 0 0 1 0;        % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 1 1 0 1 1 2;
+   2 1 0 1 1 1 0;
+   2 0 0 1 0 1 0;
+   2 1 2 0 0 2 0;        % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 2 2 0 1 2 0;
+   2 2 1 1 1 2 0
+   ]';
+
+% %% 4 squares
+% g=[2 0 1 0 0 1 0;        % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 0 1 1 2;
+%    2 1 0 1 1 1 3;
+%    2 0 0 1 0 1 0;
+%    2 1 2 0 0 2 0;        % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 0 1 2 0;
+%    2 2 1 1 1 2 4;
+% %    2 1 1 1 0 2 1;
+% %    2 0 1 1 1 3 1;        % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 1 2 3 4;
+%    2 1 0 2 2 3 0;
+%    2 0 0 2 1 3 0;
+% %    2 1 2 1 1 4 2;        % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 1 2 4 0;
+%    2 2 1 2 2 4 0
+% %    2 1 1 2 1 4 3
+%    ]';
+
+[p,e,t] = initmesh(g,'hmax',0.1); 
+pdemesh(p,e,t)
+
+%% GH
+% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
+%
+% coordinates  p: [2][nnode]
+% connectivity t: [4][nelem]   with  t(4,:) are the subdomain numbers
+% edges        e: [7][nedges]  boundary edges
+%                              e([1,2],:) - start/end vertex of edge
+%                              e([3,4],:) - start/end values
+%                              e(5,:)     - segment number
+%                              e([6,7],:) - left/right subdomain
+
+ascii_write_mesh( p, t, e, mfilename);
+
+ascii_write_subdomains( p, t, e, mfilename);
+
+
+% tmp=t(1:3,:)
+
--- a/ex7/code/task4/accu.template/square_2.txt
+++ b/ex7/code/task4/accu.template/square_2.txt
--- a/ex7/code/task4/accu.template/square_2_sd.txt
+++ b/ex7/code/task4/accu.template/square_2_sd.txt
@ -0,0 +1,653 @@
+652
+1
+2
+1
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+2
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+1
+1
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+1
+1
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+2
+2
+1
+1
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+2
+1
+2
+1
--- a/ex7/code/task4/accu.template/square_4.m
+++ b/ex7/code/task4/accu.template/square_4.m
@ -0,0 +1,71 @@
+% Square: 
+%   flatpak run org.octave.Octave <filename>
+%      or
+%   octave --no-window-system --no-gui  -qf <filename>
+
+clear all
+clc
+% %% L-shape
+% g=[2 0 2 0 0 1 0;        % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 0 1 1 0;
+%    2 2 1 1 0.5 1 0;
+%    2 1 1 0.5 2 1 0;
+%    2 1 0 2 2 1 0;
+%    2 0 0 2 0 1 0]';
+
+%% square
+% g=[2 0 1 0 0 1 0;        % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 0 1 1 0;
+%    2 1 0 1 1 1 0;
+%    2 0 0 1 0 1 0]';
+
+% %% 2 squares
+% g=[2 0 1 0 0 1 0;        % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 0 1 1 2;
+%    2 1 0 1 1 1 0;
+%    2 0 0 1 0 1 0;
+%    2 1 2 0 0 2 0;        % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 0 1 2 0;
+%    2 2 1 1 1 2 0
+%    ]';
+
+%% 4 squares
+g=[2 0 1 0 0 1 0;        % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 1 1 0 1 1 2;
+   2 1 0 1 1 1 3;
+   2 0 0 1 0 1 0;
+   2 1 2 0 0 2 0;        % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 2 2 0 1 2 0;
+   2 2 1 1 1 2 4;
+%    2 1 1 1 0 2 1;
+%    2 0 1 1 1 3 1;        % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 1 1 1 2 3 4;
+   2 1 0 2 2 3 0;
+   2 0 0 2 1 3 0;
+%    2 1 2 1 1 4 2;        % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 2 2 1 2 4 0;
+   2 2 1 2 2 4 0
+%    2 1 1 2 1 4 3
+   ]';
+
+[p,e,t] = initmesh(g,'hmax',0.1); 
+pdemesh(p,e,t)
+
+%% GH
+% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
+%
+% coordinates  p: [2][nnode]
+% connectivity t: [4][nelem]   with  t(4,:) are the subdomain numbers
+% edges        e: [7][nedges]  boundary edges
+%                              e([1,2],:) - start/end vertex of edge
+%                              e([3,4],:) - start/end values
+%                              e(5,:)     - segment number
+%                              e([6,7],:) - left/right subdomain
+
+ascii_write_mesh( p, t, e, mfilename);
+
+ascii_write_subdomains( p, t, e, mfilename);
+
+
+% tmp=t(1:3,:)
+
--- a/ex7/code/task4/accu.template/square_4.pdf
+++ b/ex7/code/task4/accu.template/square_4.pdf
--- a/ex7/code/task4/accu.template/square_4.txt
+++ b/ex7/code/task4/accu.template/square_4.txt
--- a/ex7/code/task4/accu.template/square_4_sd.txt
+++ b/ex7/code/task4/accu.template/square_4_sd.txt
--- a/ex7/code/task4/accu.template/square_6.m
+++ b/ex7/code/task4/accu.template/square_6.m
@ -0,0 +1,98 @@
+% Square: 
+%   flatpak run org.octave.Octave <filename>
+%      or
+%   octave --no-window-system --no-gui  -qf <filename>
+
+clear all
+clc
+% %% L-shape
+% g=[2 0 2 0 0 1 0;        % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 0 1 1 0;
+%    2 2 1 1 0.5 1 0;
+%    2 1 1 0.5 2 1 0;
+%    2 1 0 2 2 1 0;
+%    2 0 0 2 0 1 0]';
+
+%% square
+% g=[2 0 1 0 0 1 0;        % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 0 1 1 0;
+%    2 1 0 1 1 1 0;
+%    2 0 0 1 0 1 0]';
+
+% %% 2 squares
+% g=[2 0 1 0 0 1 0;        % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 0 1 1 2;
+%    2 1 0 1 1 1 0;
+%    2 0 0 1 0 1 0;
+%    2 1 2 0 0 2 0;        % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 0 1 2 0;
+%    2 2 1 1 1 2 0
+%    ]';
+
+% %% 4 squares
+% g=[2 0 1 0 0 1 0;        % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 0 1 1 2;
+%    2 1 0 1 1 1 3;
+%    2 0 0 1 0 1 0;
+%    2 1 2 0 0 2 0;        % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 0 1 2 0;
+%    2 2 1 1 1 2 4;
+% %    2 1 1 1 0 2 1;
+% %    2 0 1 1 1 3 1;        % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 1 1 1 2 3 4;
+%    2 1 0 2 2 3 0;
+%    2 0 0 2 1 3 0;
+% %    2 1 2 1 1 4 2;        % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+%    2 2 2 1 2 4 0;
+%    2 2 1 2 2 4 0
+% %    2 1 1 2 1 4 3
+%    ]';
+
+%% 6 squares
+g=[2 0 1 0 0 1 0;        % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 1 1 0 1 1 2;
+   2 1 0 1 1 1 3;
+   2 0 0 1 0 1 0;
+   2 1 2 0 0 2 0;        % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 2 2 0 1 2 5;
+   2 2 1 1 1 2 4;
+%    2 1 1 1 0 2 1;
+%    2 0 1 1 1 3 1;        % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 1 1 1 2 3 4;
+   2 1 0 2 2 3 0;
+   2 0 0 2 1 3 0;
+%    2 1 2 1 1 4 2;        % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 2 2 1 2 4 6;
+   2 2 1 2 2 4 0;
+%    2 1 1 2 1 4 3;
+   2 2 3 0 0 5 0;        % 5 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 3 3 0 1 5 0;
+   2 3 2 1 1 5 6;
+%    2 2 2 1 0 5 2;
+%    2 2 3 1 1 6 5;        % 6 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
+   2 3 3 1 2 6 0;
+   2 3 2 2 2 6 0
+%    2 2 2 2 1 6 4
+   ]';
+
+[p,e,t] = initmesh(g,'hmax',0.1); 
+pdemesh(p,e,t)
+
+%% GH
+% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
+%
+% coordinates  p: [2][nnode]
+% connectivity t: [4][nelem]   with  t(4,:) are the subdomain numbers
+% edges        e: [7][nedges]  boundary edges
+%                              e([1,2],:) - start/end vertex of edge
+%                              e([3,4],:) - start/end values
+%                              e(5,:)     - segment number
+%                              e([6,7],:) - left/right subdomain
+
+ascii_write_mesh( p, t, e, mfilename);
+
+ascii_write_subdomains( p, t, e, mfilename);
+
+
+% tmp=t(1:3,:)
+
--- a/ex7/code/task4/accu.template/square_6.txt
+++ b/ex7/code/task4/accu.template/square_6.txt
--- a/ex7/code/task4/accu.template/square_6_sd.txt
+++ b/ex7/code/task4/accu.template/square_6_sd.txt
--- a/ex7/code/task4/accu.template/uv.txt
+++ b/ex7/code/task4/accu.template/uv.txt
@ -0,0 +1,704 @@
+188 2 327 3
+1 0 
+1 1 
+2 0 
+2 1 
+1 0.1 
+1 0.2 
+1 0.3 
+1 0.4 
+1 0.5 
+1 0.6 
+1 0.7 
+1 0.8 
+1 0.9 
+1.1 0 
+1.2 0 
+1.3 0 
+1.4 0 
+1.5 0 
+1.6 0 
+1.7 0 
+1.8 0 
+1.9 0 
+2 0.1 
+2 0.2 
+2 0.3 
+2 0.4 
+2 0.5 
+2 0.6 
+2 0.7 
+2 0.8 
+2 0.9 
+1.9 1 
+1.8 1 
+1.7 1 
+1.6 1 
+1.5 1 
+1.4 1 
+1.3 1 
+1.2 1 
+1.1 1 
+1.49824 0.488715 
+1.05193 0.0620153 
+1.96625 0.0483577 
+1.04793 0.955298 
+1.95161 0.955198 
+1.73603 0.345854 
+1.28857 0.282229 
+1.36738 0.727982 
+1.64625 0.722546 
+1.91195 0.84478 
+1.08323 0.845761 
+1.85001 0.0860639 
+1.52152 0.243093 
+1.22787 0.537745 
+1.67462 0.537899 
+1.81754 0.691116 
+1.16397 0.184115 
+1.49817 0.807652 
+1.74693 0.844955 
+1.84393 0.263468 
+1.24154 0.844531 
+1.08066 0.749219 
+1.76054 0.083432 
+1.50008 0.655952 
+1.84529 0.488219 
+1.17462 0.387723 
+1.35549 0.162537 
+1.44621 0.349021 
+1.60832 0.424124 
+1.6628 0.210825 
+1.133 0.617724 
+1.58119 0.13085 
+1.61703 0.869587 
+1.38903 0.856845 
+1.73 0.670898 
+1.91197 0.644767 
+1.16231 0.798105 
+1.14015 0.0810055 
+1.07152 0.14153 
+1.82658 0.899363 
+1.90413 0.172802 
+1.16585 0.900048 
+1.91126 0.746232 
+1.37648 0.580317 
+1.32975 0.43121 
+1.23911 0.676272 
+1.09469 0.495657 
+1.38535 0.261717 
+1.57154 0.56268 
+1.82457 0.587265 
+1.6065 0.316664 
+1.82091 0.349209 
+1.13118 0.291188 
+1.25373 0.0995169 
+1.4471 0.089654 
+1.64761 0.140153 
+1.80397 0.168364 
+1.82574 0.797144 
+1.69201 0.442852 
+1.52834 0.406743 
+1.464 0.903223 
+1.41795 0.780259 
+1.54963 0.730766 
+1.92222 0.448553 
+1.56289 0.928378 
+1.31213 0.892653 
+1.07541 0.655465 
+1.73457 0.92605 
+1.67051 0.812302 
+1.93201 0.256721 
+1.32454 0.794509 
+1.64979 0.933767 
+1.08946 0.92638 
+1.93126 0.0917278 
+1.90811 0.925959 
+1.54511 0.0706685 
+1.40439 0.48925 
+1.91756 0.544695 
+1.26613 0.200964 
+1.73971 0.756545 
+1.74585 0.247972 
+1.21468 0.316416 
+1.91124 0.347099 
+1.28928 0.613103 
+1.07981 0.375561 
+1.53607 0.314626 
+1.42573 0.699103 
+1.58547 0.659664 
+1.16172 0.695142 
+1.76076 0.51266 
+1.24656 0.452978 
+1.46284 0.568585 
+1.46297 0.192615 
+1.56643 0.474578 
+1.05798 0.441083 
+1.35115 0.0700935 
+1.60594 0.183437 
+1.35214 0.350198 
+1.07574 0.57016 
+1.7861 0.425088 
+1.71861 0.156597 
+1.24404 0.927617 
+1.58815 0.801244 
+1.2391 0.752451 
+1.1502 0.552118 
+1.64984 0.364251 
+1.07328 0.22312 
+1.47839 0.735193 
+1.37306 0.930749 
+1.68388 0.877557 
+1.54616 0.862383 
+1.77797 0.635848 
+1.62727 0.0716422 
+1.44232 0.837419 
+1.69071 0.0900998 
+1.85764 0.411619 
+1.85866 0.956351 
+1.96709 0.142936 
+1.13894 0.956686 
+1.31226 0.5173 
+1.47028 0.271985 
+1.26747 0.369611 
+1.65816 0.624881 
+1.36475 0.659896 
+1.20042 0.615405 
+1.32361 0.226305 
+1.45486 0.428791 
+1.59228 0.249245 
+1.62324 0.488742 
+1.7872 0.3017 
+1.66598 0.289144 
+1.7382 0.594826 
+1.21269 0.254385 
+1.16824 0.476568 
+1.58631 0.365552 
+1.11512 0.435004 
+1.54649 0.18839 
+1.30407 0.704311 
+1.39725 0.409587 
+1.51629 0.133942 
+1.42593 0.633055 
+1 0.95 
+1.05 1 
+2 0.95 
+1 0.05 
+2 0.05 
+2 0.15 
+1.95 1 
+22 3 43 
+81 24 110 
+6 5 79 
+43 3 186 
+7 6 147 
+8 7 125 
+9 8 135 
+10 9 139 
+107 10 139 
+11 10 107 
+12 11 62 
+114 23 158 
+94 16 136 
+95 18 116 
+87 9 135 
+42 14 78 
+13 12 51 
+16 17 136 
+17 18 95 
+18 19 116 
+20 21 63 
+21 22 52 
+52 22 114 
+43 23 114 
+110 25 123 
+184 4 188 
+26 27 104 
+104 27 118 
+24 25 110 
+121 46 171 
+30 31 50 
+50 31 115 
+45 32 115 
+80 33 108 
+115 32 157 
+44 40 183 
+33 34 108 
+44 13 113 
+108 34 112 
+36 37 101 
+101 37 149 
+1 14 42 
+14 15 78 
+158 23 187 
+37 38 149 
+34 35 112 
+42 5 185 
+28 29 76 
+106 38 142 
+78 15 94 
+117 41 132 
+51 12 62 
+92 46 170 
+100 41 167 
+164 48 178 
+31 45 115 
+35 36 105 
+82 39 159 
+62 11 107 
+22 43 114 
+25 26 123 
+45 31 184 
+97 60 121 
+119 47 173 
+88 47 166 
+102 48 127 
+89 41 134 
+76 29 83 
+29 30 83 
+109 49 120 
+5 42 79 
+122 47 162 
+127 48 164 
+75 49 163 
+103 49 143 
+103 64 128 
+163 55 172 
+21 52 63 
+39 40 159 
+106 61 111 
+131 54 174 
+131 66 162 
+63 52 97 
+81 60 97 
+99 55 169 
+101 58 151 
+30 50 83 
+99 46 140 
+124 54 160 
+93 7 147 
+126 53 168 
+93 57 173 
+102 74 111 
+168 53 177 
+104 65 156 
+133 53 161 
+116 19 153 
+138 68 179 
+40 44 113 
+77 62 129 
+88 67 133 
+153 20 155 
+102 58 154 
+75 56 120 
+89 64 132 
+38 39 142 
+109 59 150 
+76 56 90 
+83 50 98 
+90 56 152 
+51 62 77 
+107 71 129 
+15 16 94 
+94 67 119 
+78 57 79 
+42 78 79 
+98 59 120 
+112 73 150 
+20 63 155 
+92 60 123 
+77 61 82 
+51 77 82 
+80 59 98 
+56 76 83 
+132 64 181 
+117 84 160 
+88 68 138 
+41 89 132 
+111 61 144 
+145 54 165 
+93 66 125 
+129 71 165 
+119 67 166 
+100 68 126 
+100 69 134 
+89 55 163 
+56 75 152 
+27 28 118 
+99 69 146 
+96 70 137 
+168 70 171 
+90 65 118 
+125 66 176 
+79 57 147 
+95 67 136 
+57 78 94 
+19 20 153 
+116 72 180 
+121 70 141 
+133 95 180 
+146 91 171 
+52 81 97 
+50 80 98 
+56 83 98 
+134 69 169 
+46 92 140 
+146 69 175 
+126 68 161 
+143 73 151 
+61 77 144 
+124 84 164 
+101 74 154 
+152 75 172 
+103 58 148 
+28 76 118 
+140 92 156 
+105 101 151 
+36 101 105 
+82 61 142 
+106 74 149 
+129 86 144 
+139 87 145 
+105 73 112 
+59 80 108 
+49 75 120 
+109 73 143 
+60 81 110 
+123 104 156 
+48 102 111 
+74 106 111 
+35 105 112 
+59 108 150 
+13 51 113 
+51 82 113 
+81 52 114 
+80 50 115 
+32 33 157 
+95 116 180 
+96 72 153 
+131 85 160 
+117 85 179 
+76 90 118 
+65 104 118 
+138 85 162 
+57 94 119 
+56 98 120 
+59 109 120 
+121 60 170 
+70 96 141 
+66 93 122 
+122 93 173 
+26 104 123 
+60 110 123 
+85 117 160 
+144 86 178 
+174 87 176 
+7 93 125 
+137 70 168 
+126 91 175 
+127 64 148 
+58 102 148 
+64 89 128 
+49 103 128 
+124 86 165 
+62 107 129 
+65 90 130 
+55 99 130 
+145 87 174 
+66 122 162 
+84 117 132 
+164 84 181 
+68 88 161 
+67 95 133 
+55 89 169 
+41 100 134 
+135 125 176 
+8 125 135 
+67 94 136 
+17 95 136 
+70 121 171 
+72 96 137 
+167 117 179 
+47 88 138 
+9 87 139 
+71 107 139 
+130 99 140 
+65 130 140 
+63 97 141 
+97 121 141 
+39 82 142 
+61 106 142 
+58 103 143 
+49 109 143 
+48 111 178 
+77 129 144 
+54 124 165 
+71 139 145 
+69 100 175 
+46 99 146 
+6 79 147 
+57 93 147 
+64 103 148 
+102 127 148 
+74 101 149 
+38 106 149 
+73 109 150 
+108 112 150 
+73 105 151 
+58 143 151 
+130 90 172 
+55 130 172 
+141 96 155 
+72 116 153 
+58 101 154 
+74 102 154 
+63 141 155 
+96 153 155 
+92 123 156 
+65 140 156 
+33 80 157 
+80 115 157 
+24 81 158 
+81 114 158 
+113 82 159 
+40 113 159 
+84 124 160 
+54 131 160 
+53 126 161 
+88 133 161 
+85 131 162 
+47 138 162 
+128 89 163 
+49 128 163 
+86 124 178 
+64 127 181 
+86 129 165 
+71 145 165 
+67 88 166 
+47 119 166 
+68 100 167 
+41 117 167 
+91 126 168 
+53 133 177 
+69 99 169 
+89 134 169 
+60 92 170 
+46 121 170 
+46 146 171 
+91 168 171 
+90 152 172 
+75 163 172 
+57 119 173 
+47 122 173 
+66 131 174 
+54 145 174 
+100 126 175 
+91 146 175 
+87 135 176 
+66 174 176 
+72 137 177 
+137 168 177 
+111 144 178 
+124 164 178 
+85 138 179 
+68 167 179 
+177 133 180 
+72 177 180 
+84 132 181 
+127 164 181 
+13 44 182 
+182 44 183 
+2 182 183 
+1 42 185 
+23 43 186 
+24 158 187 
+32 45 188 
+45 184 188 
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
--- a/ex7/code/task4/accu.template/vdop.cpp
+++ b/ex7/code/task4/accu.template/vdop.cpp
@ -0,0 +1,135 @@
+#include "vdop.h"
+#include <cassert>               // assert()
+#include <cmath>
+#include <iostream>
+#include <vector>
+using namespace std;
+
+
+void vddiv(vector<double> & x, vector<double> const& y,
+                               vector<double> const& z)
+{
+    assert( x.size()==y.size() && y.size()==z.size() );
+    size_t n = x.size();
+
+#pragma omp parallel for
+    for (size_t k = 0; k < n; ++k)
+    {
+        x[k] = y[k] / z[k];
+    }
+    return;
+}
+
+//******************************************************************************
+
+void vdaxpy(std::vector<double> & x, std::vector<double> const& y,
+                       double alpha, std::vector<double> const& z )
+{
+    assert( x.size()==y.size() && y.size()==z.size() );
+    size_t n = x.size();
+
+#pragma omp parallel for
+    for (size_t k = 0; k < n; ++k)
+    {
+        x[k] = y[k] + alpha * z[k];
+    }
+    return;
+}
+//******************************************************************************
+
+double dscapr(std::vector<double> const& x, std::vector<double> const& y)
+{
+    assert( x.size()==y.size());
+    size_t n = x.size();
+
+    double    s = 0.0;
+//#pragma omp parallel for reduction(+:s)
+    for (size_t k = 0; k < n; ++k)
+    {
+        s += x[k] * y[k];
+    }
+
+    return s;
+}
+
+//******************************************************************************
+//void DebugVector(vector<double> const &v)
+//{
+    //cout << "\nVector  (nnode = " << v.size() << ")\n";
+    //for (size_t j = 0; j < v.size(); ++j)
+    //{
+        //cout.setf(ios::right, ios::adjustfield);
+        //cout << v[j] << "   ";
+    //}
+    //cout << endl;
+
+    //return;
+//}
+//******************************************************************************
+bool CompareVectors(std::vector<double> const& x, int const n, double const y[], double const eps)
+{
+    bool bn = (static_cast<int>(x.size())==n);
+    if (!bn)
+    {
+        cout << "#########   Error: " << "number of elements" << endl;
+    }
+    //bool bv = equal(x.cbegin(),x.cend(),y);
+    bool bv = equal(x.cbegin(),x.cend(),y,
+                          [eps](double a, double b) -> bool
+                          { return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
+    );
+    if (!bv)
+    {
+        assert(static_cast<int>(x.size())==n);
+        cout << "#########   Error: " << "values" << endl;
+    }
+    return bn && bv;
+}
+
+//******************************************************************************
+double par_scalar(vector<double> const &x, vector<double> const &y, MPI_Comm const& icomm)
+{
+  const double s = dscapr(x,y);
+        double sg;
+  MPI_Allreduce(&s,&sg,1,MPI_DOUBLE,MPI_SUM,icomm);
+
+  return(sg);
+}
+
+//******************************************************************************
+void ExchangeAll(vector<double> const &xin, vector<double> &yout, MPI_Comm const &icomm)
+{
+    int myrank, numprocs,ierr(-1);
+    MPI_Comm_rank(icomm, &myrank);                          // my MPI-rank
+    MPI_Comm_size(icomm, &numprocs);
+    int const N=xin.size();
+    int const sendcount = N/numprocs;                       // equal sized junks
+    assert(sendcount*numprocs==N);                          // really all junk sized?
+    assert(xin.size()==yout.size());
+
+    auto sendbuf = xin.data();
+    auto recvbuf = yout.data();
+    ierr = MPI_Alltoall(sendbuf, sendcount, MPI_DOUBLE,
+                        recvbuf, sendcount, MPI_DOUBLE, icomm);
+    assert(0==ierr);
+
+    return;
+}
+
+//******************************************************************************
+void ExchangeAllInPlace(vector<double> &xin, MPI_Comm const &icomm)
+{
+    int myrank, numprocs,ierr(-1);
+    MPI_Comm_rank(icomm, &myrank);                          // my MPI-rank
+    MPI_Comm_size(icomm, &numprocs);
+    int const N=xin.size();
+    int const sendcount = N/numprocs;                       // equal sized junks
+    assert(sendcount*numprocs==N);                          // really all junk sized?
+
+    auto sendbuf = xin.data();
+    ierr = MPI_Alltoall(MPI_IN_PLACE, sendcount, MPI_DOUBLE,
+                        sendbuf, sendcount, MPI_DOUBLE, icomm);
+    assert(0==ierr);
+
+    return;
+}
--- a/ex7/code/task4/accu.template/vdop.h
+++ b/ex7/code/task4/accu.template/vdop.h
@ -0,0 +1,166 @@
+#ifndef VDOP_FILE
+#define VDOP_FILE
+#include <iostream>
+#include <mpi.h>                  // MPI
+#include <string>
+#include <vector>
+
+/** @brief  Element-wise vector divison x_k = y_k/z_k.
+ *
+ * @param[out] x  target vector
+ * @param[in]  y  source vector
+ * @param[in]  z  source vector
+ *
+ */
+void vddiv(std::vector<double> &x, std::vector<double> const &y,
+           std::vector<double> const &z);
+
+/** @brief  Element-wise daxpy operation x(k) = y(k) + alpha*z(k).
+ *
+ * @param[out] x  target vector
+ * @param[in]  y  source vector
+ * @param[in]  alpha  scalar
+ * @param[in]  z  source vector
+ *
+ */
+void vdaxpy(std::vector<double> &x, std::vector<double> const &y,
+            double alpha, std::vector<double> const &z );
+
+
+/** @brief  Calculates the Euclidean inner product of two vectors.
+ *
+ * @param[in]  x vector
+ * @param[in]  y vector
+ * @return Euclidean inner product @f$\langle x,y \rangle@f$
+ *
+ */
+double dscapr(std::vector<double> const &x, std::vector<double> const &y);
+
+
+inline
+double L2_scapr(std::vector<double> const &x, std::vector<double> const &y)
+{
+    return dscapr(x, y) / x.size();
+}
+
+
+/** 	Parallel inner product
+  @param[in] x	    vector
+  @param[in] y	    vector
+  @param[in] icomm  MPI communicator
+  @return 	        resulting Euclidian inner product <x,y>
+*/
+double par_scalar(std::vector<double> const &x, std::vector<double> const &y,
+                   MPI_Comm const& icomm=MPI_COMM_WORLD);
+
+
+
+/*  ReadId : Input and broadcast of an integer */
+inline
+int ReadIn(std::string const &ss = std::string(), MPI_Comm const &icomm = MPI_COMM_WORLD)
+{
+    MPI_Barrier(icomm);
+    int myrank;		                    /*  my rank number */
+    MPI_Comm_rank(icomm, &myrank);
+    int id;
+
+    if (myrank == 0) {
+        std::cout << "\n\n  " << ss << " :  Which process do you want to debug ? \n";
+        std::cin >> id;
+    }
+    MPI_Bcast(&id, 1, MPI_INT, 0, icomm);
+
+    return id;
+}
+
+/**
+ * Print entries of a vector to standard output.
+ * 
+ * @param[in]   v       vector values
+ * @param[in]   ss      string containing the vector name
+ * @param[in]   icomm   communicator group for MPI
+ *
+*/
+//void DebugVector(std::vector<double> const &v);
+template <class T>
+void DebugVector(std::vector<T> const &v, std::string const &ss = std::string(), MPI_Comm const &icomm = MPI_COMM_WORLD)
+{
+    MPI_Barrier(icomm);
+    int numprocs;		                /*  # processes    */
+    MPI_Comm_size(icomm, &numprocs);
+    int myrank;		                    /*  my rank number */
+    MPI_Comm_rank(icomm, &myrank);
+
+    int readid = ReadIn(ss);				/* Read  readid    */
+
+    while ( (0 <= readid) && (readid < numprocs) ) {
+        if (myrank == readid) {
+            std::cout << "\n\n process " << readid;
+            std::cout << "\n ....  " << ss << " (nnode = " << v.size() << ")\n";
+            for (size_t j = 0; j < v.size(); ++j) {
+                std::cout.setf(std::ios::right, std::ios::adjustfield);
+                std::cout << v[j] << "   ";
+            }
+            std::cout << std::endl;
+            fflush(stdout);
+        }
+
+        readid = ReadIn(ss, icomm);			/* Read  readid    */
+    }
+    MPI_Barrier(icomm);
+    return;
+}
+
+/** @brief  Compares an STL vector with POD vector.
+ *
+ * The accuracy criteria @f$ |x_k-y_k| < \varepsilon \left({1+0.5(|x_k|+|y_k|)}\right) @f$
+ * follows the book by
+ * <a href="https://www.springer.com/la/book/9783319446592">Stoyan/Baran</a>, p.8.
+ *
+ * @param[in]  x    STL vector
+ * @param[in]  n    length of POD vector
+ * @param[in]  y    POD vector
+ * @param[in]  eps  relative accuracy criteria (default := 0.0).
+ * @return true iff pairwise vector elements are relatively close to each other.
+ *
+ */
+bool CompareVectors(std::vector<double> const &x, int n, double const y[], double const eps = 0.0);
+
+
+/** Output operator for vector
+ * 	@param[in,out] s	output stream, e.g. @p cout
+ *  @param[in]     v    vector
+ *
+ *	@return    output stream
+*/
+template <class T>
+std::ostream &operator<<(std::ostream &s, std::vector<T> const &v)
+{
+    for (auto vp : v) {
+        s << vp << "  ";
+    }
+    return s;
+}
+
+/** Exchanges equal size partions of vector @p xin with all MPI processes.
+ * The received data are return in vector @p yout .
+ * 
+ *  @param[in]  xin   input vector
+ *  @param[out] yout  output vector
+ *  @param[in]  icomm MPI communicator
+ *
+*/
+void ExchangeAll(std::vector<double> const &xin, std::vector<double> &yout, MPI_Comm const &icomm = MPI_COMM_WORLD);
+
+/** Exchanges equal size partions of vector @p xin with all MPI processes.
+ * The received data are return in vector @p xin  .
+ * 
+ *  @param[in,out]  xin   input/output vector
+ *  @param[in]  icomm MPI communicator
+ *
+*/
+void ExchangeAllInPlace(std::vector<double> &xin, MPI_Comm const &icomm = MPI_COMM_WORLD);
+
+
+
+#endif
--- a/ex7/code/task4/accu.template/visualize_results.m
+++ b/ex7/code/task4/accu.template/visualize_results.m
@ -0,0 +1,20 @@
+%% Visualize results
+%
+%   flatpak run org.octave.Octave <filename>
+%      or
+%   octave --no-window-system --no-gui  -qf <filename>
+%
+%      or
+%   matlab -nosplash <   <filename>
+
+clear all
+clc
+
+%%
+fname = 'uv.txt';
+
+[xc,ia,v] = ascii_read_meshvector(fname);
+
+h = trisurf(ia, xc(:,1), xc(:,2), v);
+
+waitfor(h)                     % wait for closing the figure