ex7
This commit is contained in:
parent
89326dd329
commit
2467b9ae03
44 changed files with 22631 additions and 0 deletions
154
ex7/code/task2/GCC_default.mk
Normal file
154
ex7/code/task2/GCC_default.mk
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
#startmake as follows to avoid warnings caused by OpenMPI code
|
||||
# make 2>&1 | grep -v openmpi
|
||||
|
||||
|
||||
MPI_ROOT=/usr/bin/
|
||||
|
||||
CC = ${MPI_ROOT}mpicc
|
||||
CXX = ${MPI_ROOT}mpicxx
|
||||
F77 = ${MPI_ROOT}mpif77
|
||||
LINKER = ${CXX}
|
||||
|
||||
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
|
||||
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
|
||||
#MPIRUN = ${MPI_ROOT}mpiexec
|
||||
|
||||
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
|
||||
# solution according to https://github.com/open-mpi/ompi/issues/11063
|
||||
MPIRUN += -mca btl ^openib
|
||||
|
||||
# KFU:sauron
|
||||
CXXFLAGS += -I/software/boost/1_72_0/include
|
||||
|
||||
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
|
||||
|
||||
# WARNINGS += -Weffc++ -Wextra
|
||||
# -Wno-pragmas
|
||||
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
|
||||
# -ftree-vectorizer-verbose=5 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=2
|
||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
|
||||
# info on vectorization
|
||||
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
|
||||
#-fdump-tree-pre=stderr
|
||||
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
|
||||
#CXXFLAGS += ${VECTORIZE}
|
||||
|
||||
# -funroll-all-loops -msse3
|
||||
#GCC -march=knl -march=broadwell -march=haswell
|
||||
|
||||
# for debugging purpose (save code)
|
||||
# -fsanitize=leak # only one out the trhee can be used
|
||||
# -fsanitize=address
|
||||
# -fsanitize=thread
|
||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||
#CXXFLAGS += ${SANITARY}
|
||||
#LINKFLAGS +=${SANITARY}
|
||||
|
||||
# OpenMP
|
||||
CXXFLAGS += -fopenmp
|
||||
LINKFLAGS += -fopenmp
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
||||
@rm -rf html latex
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
zip: clean
|
||||
@echo "Zip the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
# 2>&1 | grep -v openmpi
|
||||
|
||||
# special: get rid of compiler warnings genereate by openmpi-files
|
||||
#.cpp.o:
|
||||
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
|
||||
# |grep -sv openmpi
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
|
||||
# Graphical interface
|
||||
# valkyrie
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ 11
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
codecheck: $(SOURCES)
|
||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
107
ex7/code/task2/ICC_NATIVE_default.mk
Normal file
107
ex7/code/task2/ICC_NATIVE_default.mk
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Basic Defintions for using INTEL-MPI with its compilers
|
||||
# requires setting of COMPILER=ICC_NATIVE_
|
||||
|
||||
# MPI_ROOT should be defined by shell
|
||||
# path to icpc is contained in $PATH
|
||||
MPI_BIN = $(shell dirname `which icpc` | sed 's/bin\/intel64/mpi\/intel64\/bin/g')/
|
||||
MPI_LIB = $(shell echo ${MPI_BIN} | sed 's/bin/lib/g')
|
||||
|
||||
# Intel-MPI wrappers used gcc as default !!
|
||||
CC = ${MPI_BIN}mpicc -cc=icc
|
||||
CXX = ${MPI_BIN}mpicxx -cxx=icpc
|
||||
F77 = ${MPI_BIN}mpif77 -f77=ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
MPIRUN = ${MPI_BIN}mpirun
|
||||
|
||||
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
|
||||
# -Weffc++ -Wunreachable-code -Winline
|
||||
CXXFLAGS += -O3 -fargument-noalias -DNDEBUG -std=c++17 ${WARNINGS} ${MPI_COMPILE_FLAGS}
|
||||
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal \
|
||||
-Wshadow ${MPI_COMPILE_FLAGS}
|
||||
# -vec-report=3 -mkl
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# use MKL by INTEL
|
||||
LINKFLAGS += -mkl ${MPI_LINK_FLAGS}
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: ${PROGRAM}
|
||||
(export LD_LIBRARY_PATH=${MPI_LIB}:${LD_LIBRARY_PATH} ;${MPIRUN} -np 4 ./$^ ${PROG_ARGS})
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &
|
||||
112
ex7/code/task2/ICC_default.mk
Normal file
112
ex7/code/task2/ICC_default.mk
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
# Basic Defintions for using INTEL compilers with OpenMPI headers and libraries
|
||||
# requires setting of COMPILER=ICC_
|
||||
|
||||
# MPI_ROOT should be defined by shell
|
||||
MPI_ROOT=/usr
|
||||
|
||||
CC = icc
|
||||
CXX = icpc
|
||||
F77 = ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
MPIRUN = ${MPI_ROOT}/bin/mpirun
|
||||
|
||||
# no differences when C or C++ is used !! (always used options from mpicxx)
|
||||
MPI_COMPILE_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:compile`
|
||||
MPI_LINK_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:link`
|
||||
# MPI_LINK_FLAGS = -pthread -L/usr/lib/openmpi/lib -lmpi_cxx -lmpi -lopen-rte -lopen-pal -ldl -Wl,--export-dynamic -lnsl -lutil -lm -ldl
|
||||
|
||||
|
||||
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
|
||||
# -Weffc++ -Wunreachable-code -Winline
|
||||
CXXFLAGS += -O3 -std=c++17 -fargument-noalias -DNDEBUG ${WARNINGS} ${MPI_COMPILE_FLAGS}
|
||||
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal \
|
||||
-Wshadow ${MPI_COMPILE_FLAGS}
|
||||
# -vec-report=3 -mkl
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# use MKL by INTEL
|
||||
LINKFLAGS += -mkl
|
||||
# use MPI by Compiler
|
||||
LINKFLAGS += ${MPI_LINK_FLAGS}
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &
|
||||
128
ex7/code/task2/OPENMPI_CLANG_default.mk
Normal file
128
ex7/code/task2/OPENMPI_CLANG_default.mk
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
# Basic Defintions for using OpenMPI with CLANG compilers
|
||||
# requires setting of COMPILER=OPENMPI_CLANG_
|
||||
|
||||
# Pass CLANG Compilers to the OpenMPI wrappers
|
||||
# see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
|
||||
EXPORT = export OMPI_CXX=clang++; export OMPI_CC=clang; export OMPI_mpifort=flang
|
||||
|
||||
CC = mpicc
|
||||
CXX = mpicxx
|
||||
F77 = mpifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
MPIRUN = ${MPI_BIN}mpirun
|
||||
|
||||
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
|
||||
SILENCE_MPI = -Wno-weak-vtables -Wno-old-style-cast -Wno-cast-align -Wno-deprecated
|
||||
SILENCE_MPI+= -Wno-sign-conversion -Wno-reserved-id-macro -Wno-c++98-compat-pedantic
|
||||
SILENCE_MPI+= -Wno-zero-as-null-pointer-constant -Wno-source-uses-openmp
|
||||
WARNINGS = -Weverything -Wno-c++98-compat -Wno-weak-vtables -ferror-limit=3 ${SILENCE_MPI}
|
||||
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
||||
CXXFLAGS += -Ofast -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -Ofast -std=c++17
|
||||
# -ftrapv
|
||||
#
|
||||
CFLAGS += -Ofast -Weverything -ferror-limit=3 ${MPI_COMPILE_FLAGS}
|
||||
|
||||
# OpenMP
|
||||
#CXXFLAGS += -fopenmp
|
||||
#LINKFLAGS += -fopenmp
|
||||
|
||||
# tidy_check
|
||||
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
|
||||
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
|
||||
#READABILITY=,readability*${SWITCH_OFF}
|
||||
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
#TIDYFLAGS += -checks='modernize*
|
||||
|
||||
MPI_COMPILE_FLAGS = `${MPI_BIN}mpicxx -showme:compile`
|
||||
MPI_LINK_FLAGS = `${MPI_BIN}mpicxx -showme:link`
|
||||
#TIDYFLAGS += ${MPI_COMPILE_FLAGS}
|
||||
TIDYFLAGS += -extra-arg="-I/usr/lib/x86_64-linux-gnu/openmpi/include"
|
||||
#check:
|
||||
# echo ${MPI_COMPILE_FLAGS}
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
@( ${EXPORT}; $(LINKER) $^ ${LINKFLAGS} -o $@ )
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
codecheck: tidy_check
|
||||
tidy_check:
|
||||
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
|
||||
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
|
||||
|
||||
.c.o:
|
||||
@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &
|
||||
107
ex7/code/task2/OPENMPI_ICC_default.mk
Normal file
107
ex7/code/task2/OPENMPI_ICC_default.mk
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Basic Defintions for using OpenMPI with Intel compilers
|
||||
# requires setting of COMPILER=OPENMPI_ICC_
|
||||
|
||||
# Pass Intel Compilers to the OpenMPI wrappers
|
||||
# see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
|
||||
EXPORT = export OMPI_CXX=icpc; export OMPI_CC=icc; export OMPI_mpifort=ifort
|
||||
|
||||
CC = mpicc
|
||||
CXX = mpicxx
|
||||
F77 = mpifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
MPIRUN = ${MPI_BIN}mpirun
|
||||
|
||||
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
|
||||
# -Weffc++ -Wunreachable-code -Winline
|
||||
CXXFLAGS += -fast -fargument-noalias -DNDEBUG -std=c++17 ${WARNINGS}
|
||||
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal -Wshadow
|
||||
# -vec-report=3 -mkl
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# use MKL by INTEL
|
||||
LINKFLAGS += -O3 -mkl ${MPI_LINK_FLAGS}
|
||||
# ipo: warning #11021: unresolved __GI_memset
|
||||
# see: https://software.intel.com/en-us/articles/ipo-warning-11021-unresolved-symbols-referenced-a-dynamic-library
|
||||
LINKFLAGS +=
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
@( ${EXPORT}; $(LINKER) $^ ${LINKFLAGS} -o $@ )
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
|
||||
|
||||
.c.o:
|
||||
@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &
|
||||
125
ex7/code/task2/PGI_NATIVE_default.mk
Normal file
125
ex7/code/task2/PGI_NATIVE_default.mk
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
# Use the MPI-wrappers from the PGI compiler suite.
|
||||
# requires setting of COMPILER=PGI_MPI_
|
||||
#
|
||||
# requires
|
||||
# sudo apt install librdmacm1
|
||||
|
||||
|
||||
|
||||
# Details for run time information
|
||||
# export PGI_ACC_TIME=1
|
||||
# unset PGI_ACC_TIME
|
||||
# export PGI_ACC_NOTIFY=1
|
||||
# export PGI_ACC_NOTIFY=3
|
||||
# unset PGI_ACC_NOTIFY
|
||||
|
||||
|
||||
PGI_PATH = /opt/pgi/linux86-64/2019/bin
|
||||
#ifeq "$(HOSTNAME)" "mephisto.uni-graz.at"
|
||||
# # mephisto
|
||||
# PGI_PATH = /share/apps/pgi/linux86-64/2016/bin
|
||||
#endif
|
||||
|
||||
|
||||
#MPI_ROOT=${PGI_PATH}mpi/mpich/bin/
|
||||
MPI_ROOT= ${PGI_PATH}/../mpi/openmpi-3.1.3/bin/
|
||||
MPIRUN = ${MPI_ROOT}mpirun
|
||||
|
||||
CC = ${MPI_ROOT}mpicc
|
||||
CXX = ${MPI_ROOT}mpicxx
|
||||
#F77 = ${MPI_ROOT}mpif77
|
||||
ifndef LINKER
|
||||
LINKER = ${CC}
|
||||
endif
|
||||
LINKER = ${CXX}
|
||||
|
||||
WARNINGS = -Minform=warn
|
||||
|
||||
PGI_PROFILING += -Minfo=loop,vect,opt,intensity,mp,accel
|
||||
#PGI_PROFILING += -Mprof=lines –Minfo=ccff
|
||||
|
||||
CXXFLAGS += -e3 -std=c++17 -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
|
||||
CFLAGS += -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
|
||||
#
|
||||
# for OpenACC
|
||||
# Target architecture (nvidia,host)
|
||||
TA_ARCH = host
|
||||
#TA_ARCH = nvidia,host
|
||||
#TA_ARCH = -ta=nvidia:cc2+,cuda5.5,fastmath
|
||||
#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,cuda5.5,fastmath,keepgpu
|
||||
#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,fastmath,keepgpu
|
||||
|
||||
#,keepgpu
|
||||
# CFLAGS = -O3 -ta=$(TA_ARCH)
|
||||
#CFLAGS += -B -gopt $(TA_ARCH)
|
||||
#CXXFLAGS += -B -gopt $(TA_ARCH)
|
||||
# -Minfo=all
|
||||
|
||||
# libcudart.a is needed for direct CUDA calls
|
||||
#LINKFLAGS = -gopt $(TA_ARCH) -L${BINDIR}../lib $(PGI_PROFILING)
|
||||
# -lcudart
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS} *.gpu *gprof.out
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
#run: clean ${PROGRAM}
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ${OPTIRUN} ./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
|
||||
|
||||
# Profiling options PGI, see: pgcollect -help
|
||||
CPU_PROF = -allcache
|
||||
GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
|
||||
#GPU_PROF = -cuda=branch:cc20
|
||||
#
|
||||
PROF_FILE = pgprof.out
|
||||
|
||||
prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# $(CUDA_HOME)/bin/nvvp &
|
||||
# export LD_LIBRARY_PATH=/state/partition1/apps/pgi/linux86-64/12.9/lib:$LD_LIBRARY_PATH
|
||||
${OPTIRUN} ${BINDIR}pgcollect $(GPU_PROF) ./$^
|
||||
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
|
||||
|
||||
|
||||
# Memory checker (slooooow!!!):
|
||||
# see doc at /usr/local/cuda/doc/cuda-memcheck.pdf
|
||||
# mem: ${PROGRAM}
|
||||
# $(CUDA_HOME)memcheck ./$^
|
||||
2877
ex7/code/task2/first.template/Doxyfile
Normal file
2877
ex7/code/task2/first.template/Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
23
ex7/code/task2/first.template/Makefile
Normal file
23
ex7/code/task2/first.template/Makefile
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
#
|
||||
# Compile with
|
||||
# make 2>&1 | grep -v openmpi
|
||||
# to avoid warnings caused by OpenMPI
|
||||
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
MAIN = main
|
||||
SOURCES = ${MAIN}.cpp greetings.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = ${MAIN}.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
LINKFLAGS +=
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
85
ex7/code/task2/first.template/greetings.cpp
Normal file
85
ex7/code/task2/first.template/greetings.cpp
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
#include "greetings.h"
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <mpi.h> // MPI
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
// see http://www.open-mpi.org/doc/current
|
||||
// for details on MPI functions
|
||||
|
||||
void greetings(MPI_Comm const &icomm)
|
||||
{
|
||||
int myrank, numprocs;
|
||||
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
|
||||
MPI_Comm_size(icomm, &numprocs); // #MPI processes
|
||||
char *name = new char [MPI_MAX_PROCESSOR_NAME],
|
||||
*chbuf = new char [MPI_MAX_PROCESSOR_NAME];
|
||||
|
||||
int reslen, ierr;
|
||||
MPI_Get_processor_name( name, &reslen);
|
||||
|
||||
if (0==myrank) {
|
||||
cout << " " << myrank << " runs on " << name << endl;
|
||||
for (int i = 1; i < numprocs; ++i) {
|
||||
MPI_Status stat;
|
||||
stat.MPI_ERROR = 0; // M U S T be initialized!!
|
||||
|
||||
ierr = MPI_Recv(chbuf, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, icomm, &stat);
|
||||
assert(0==ierr);
|
||||
|
||||
cout << " " << stat.MPI_SOURCE << " runs on " << chbuf;
|
||||
int count;
|
||||
MPI_Get_count(&stat, MPI_CHAR, &count); // size of received data
|
||||
cout << " (length: " << count << " )" << endl;
|
||||
// stat.Get_error() // Error code
|
||||
}
|
||||
}
|
||||
else {
|
||||
int dest = 0;
|
||||
ierr = MPI_Send(name, strlen(name) + 1, MPI_CHAR, dest, myrank, icomm);
|
||||
assert(0==ierr);
|
||||
}
|
||||
delete [] chbuf;
|
||||
delete [] name;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void greetings_cpp(MPI_Comm const &icomm)
|
||||
{
|
||||
int myrank, numprocs;
|
||||
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
|
||||
MPI_Comm_size(icomm, &numprocs); // #MPI processes
|
||||
string name(MPI_MAX_PROCESSOR_NAME,'#'), // C++
|
||||
recvbuf(MPI_MAX_PROCESSOR_NAME,'#'); // C++: receive buffer, don't change size
|
||||
|
||||
int reslen, ierr;
|
||||
MPI_Get_processor_name(name.data(), &reslen);
|
||||
name.resize(reslen); // C++
|
||||
|
||||
if (0==myrank) {
|
||||
cout << " " << myrank << " runs on " << name << endl;
|
||||
for (int i = 1; i < numprocs; ++i) {
|
||||
MPI_Status stat;
|
||||
stat.MPI_ERROR = 0; // M U S T be initialized!!
|
||||
|
||||
ierr = MPI_Recv(recvbuf.data(), MPI_MAX_PROCESSOR_NAME, MPI_CHAR, i, MPI_ANY_TAG, icomm, &stat);
|
||||
assert(0==ierr);
|
||||
|
||||
int count;
|
||||
MPI_Get_count(&stat, MPI_CHAR, &count); // size of received data
|
||||
string const chbuf(recvbuf,0,count); // C++
|
||||
cout << " " << stat.MPI_SOURCE << " runs on " << chbuf;
|
||||
cout << " (length: " << count << " )" << endl;
|
||||
// stat.Get_error() // Error code
|
||||
}
|
||||
}
|
||||
else {
|
||||
int dest = 0;
|
||||
ierr = MPI_Send(name.data(), name.size(), MPI_CHAR, dest, myrank, icomm);
|
||||
assert(0==ierr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
16
ex7/code/task2/first.template/greetings.h
Normal file
16
ex7/code/task2/first.template/greetings.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
// general header for all functions in directory
|
||||
|
||||
#ifndef GREETINGS_FILE
|
||||
#define GREETINGS_FILE
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
/** Each process finds out its host, sends this information
|
||||
to root process 0 which prints this information for each process.
|
||||
@param[in] icomm the MPI process group that is used.
|
||||
*/
|
||||
|
||||
void greetings(MPI_Comm const &icomm);
|
||||
void greetings_cpp(MPI_Comm const &icomm);
|
||||
|
||||
#endif
|
||||
33
ex7/code/task2/first.template/main.cpp
Normal file
33
ex7/code/task2/first.template/main.cpp
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
// MPI code in C++.
|
||||
// See [Gropp/Lusk/Skjellum, "Using MPI", p.33/41 etc.]
|
||||
// and /opt/mpich/include/mpi2c++/comm.h for details
|
||||
|
||||
#include "greetings.h"
|
||||
#include <iostream> // MPI
|
||||
#include <mpi.h>
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
MPI_Comm icomm = MPI_COMM_WORLD;
|
||||
MPI_Init(&argc, &argv);
|
||||
int myrank, numprocs;
|
||||
// numprocs = 1; // delete this line when uncommenting the next line
|
||||
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
|
||||
MPI_Comm_size(icomm, &numprocs);
|
||||
|
||||
if (0==myrank) {
|
||||
cout << "\n There are " << numprocs << " processes running.\n \n";
|
||||
}
|
||||
|
||||
// greetings(icomm);
|
||||
greetings_cpp(icomm);
|
||||
|
||||
if (0==myrank) cout << endl;
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
154
ex7/code/task3/GCC_default.mk
Normal file
154
ex7/code/task3/GCC_default.mk
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
#startmake as follows to avoid warnings caused by OpenMPI code
|
||||
# make 2>&1 | grep -v openmpi
|
||||
|
||||
|
||||
MPI_ROOT=/usr/bin/
|
||||
|
||||
CC = ${MPI_ROOT}mpicc
|
||||
CXX = ${MPI_ROOT}mpicxx
|
||||
F77 = ${MPI_ROOT}mpif77
|
||||
LINKER = ${CXX}
|
||||
|
||||
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
|
||||
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
|
||||
#MPIRUN = ${MPI_ROOT}mpiexec
|
||||
|
||||
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
|
||||
# solution according to https://github.com/open-mpi/ompi/issues/11063
|
||||
MPIRUN += -mca btl ^openib
|
||||
|
||||
# KFU:sauron
|
||||
CXXFLAGS += -I/software/boost/1_72_0/include
|
||||
|
||||
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
|
||||
|
||||
# WARNINGS += -Weffc++ -Wextra
|
||||
# -Wno-pragmas
|
||||
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
|
||||
# -ftree-vectorizer-verbose=5 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=2
|
||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
|
||||
# info on vectorization
|
||||
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
|
||||
#-fdump-tree-pre=stderr
|
||||
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
|
||||
#CXXFLAGS += ${VECTORIZE}
|
||||
|
||||
# -funroll-all-loops -msse3
|
||||
#GCC -march=knl -march=broadwell -march=haswell
|
||||
|
||||
# for debugging purpose (save code)
|
||||
# -fsanitize=leak # only one out the trhee can be used
|
||||
# -fsanitize=address
|
||||
# -fsanitize=thread
|
||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||
#CXXFLAGS += ${SANITARY}
|
||||
#LINKFLAGS +=${SANITARY}
|
||||
|
||||
# OpenMP
|
||||
CXXFLAGS += -fopenmp
|
||||
LINKFLAGS += -fopenmp
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
||||
@rm -rf html latex
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
zip: clean
|
||||
@echo "Zip the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
# 2>&1 | grep -v openmpi
|
||||
|
||||
# special: get rid of compiler warnings genereate by openmpi-files
|
||||
#.cpp.o:
|
||||
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
|
||||
# |grep -sv openmpi
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
|
||||
# Graphical interface
|
||||
# valkyrie
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ 11
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
codecheck: $(SOURCES)
|
||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
23
ex7/code/task3/Makefile
Normal file
23
ex7/code/task3/Makefile
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
#
|
||||
# Compile with
|
||||
# make 2>&1 | grep -v openmpi
|
||||
# to avoid warnings caused by OpenMPI
|
||||
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
MAIN = main
|
||||
SOURCES = ${MAIN}.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = ${MAIN}.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
LINKFLAGS +=
|
||||
|
||||
include ./${COMPILER}default.mk
|
||||
149
ex7/code/task3/main.cpp
Normal file
149
ex7/code/task3/main.cpp
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <mpi.h>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
|
||||
void DebugVector(const vector<double> &xin, MPI_Comm icomm) {
|
||||
int rank, size;
|
||||
MPI_Comm_rank(icomm, &rank);
|
||||
MPI_Comm_size(icomm, &size);
|
||||
|
||||
int next_process = 0;
|
||||
while (next_process != -1) {
|
||||
// Print the local vector for each process
|
||||
if (rank==next_process){
|
||||
cout << "x_" << rank << " = ";
|
||||
for (const auto& value : xin) {
|
||||
cout << value << " ";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
MPI_Barrier(icomm);
|
||||
|
||||
if (rank == 0) {
|
||||
cout << "Enter rank (0-" << size - 1 << ") or -1 to exit: ";
|
||||
cin >> next_process;
|
||||
}
|
||||
MPI_Bcast(&next_process, 1, MPI_INT, 0, icomm);
|
||||
MPI_Barrier(icomm);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
double par_scalar(const vector<double>& x, const vector<double>& y, MPI_Comm icomm) {
|
||||
|
||||
double local_dot = 0.0;
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
local_dot += x[i] * y[i];
|
||||
}
|
||||
|
||||
double global_dot = 0.0;
|
||||
MPI_Allreduce(&local_dot, &global_dot, 1, MPI_DOUBLE, MPI_SUM, icomm);
|
||||
|
||||
return global_dot;
|
||||
}
|
||||
|
||||
|
||||
tuple<double,double> find_global_minmax(const vector<double>& xin, MPI_Comm icomm) {
|
||||
int rank, size;
|
||||
MPI_Comm_rank(icomm, &rank);
|
||||
MPI_Comm_size(icomm, &size);
|
||||
// Find local min/max
|
||||
double local_min = *min_element(xin.begin(), xin.end());
|
||||
double local_max = *max_element(xin.begin(), xin.end());
|
||||
// Gather local mins/maxs in vector
|
||||
vector<double> local_min_vector(size);
|
||||
vector<double> local_max_vector(size);
|
||||
MPI_Gather(&local_min, 1, MPI_DOUBLE, local_min_vector.data(), 1, MPI_DOUBLE, 0, icomm);
|
||||
MPI_Gather(&local_max, 1, MPI_DOUBLE, local_max_vector.data(), 1, MPI_DOUBLE, 0, icomm);
|
||||
// Find global min/max
|
||||
double global_min(0);
|
||||
double global_max(0);
|
||||
if (rank==0) {
|
||||
global_min = *min_element(local_min_vector.begin(), local_min_vector.end());
|
||||
global_max = *max_element(local_max_vector.begin(), local_max_vector.end());
|
||||
}
|
||||
// Broadcast global min/max
|
||||
MPI_Bcast(&global_min, 1, MPI_DOUBLE, 0, icomm);
|
||||
MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, icomm);
|
||||
|
||||
return make_tuple(global_min, global_max);
|
||||
}
|
||||
|
||||
|
||||
tuple<double,double> find_global_minmax_Allreduce(const vector<double>& xin, MPI_Comm icomm) {
|
||||
double local_min = *min_element(xin.begin(), xin.end());
|
||||
double local_max = *max_element(xin.begin(), xin.end());
|
||||
double global_min(0);
|
||||
double global_max(0);
|
||||
MPI_Allreduce(&local_min, &global_min, 1, MPI_DOUBLE, MPI_MIN, icomm);
|
||||
MPI_Allreduce(&local_max, &global_max, 1, MPI_DOUBLE, MPI_MAX, icomm);
|
||||
return make_tuple(global_min, global_max);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
MPI_Comm icomm = MPI_COMM_WORLD;
|
||||
MPI_Init(&argc, &argv);
|
||||
int rank, size;
|
||||
MPI_Comm_rank(icomm, &rank);
|
||||
MPI_Comm_size(icomm, &size);
|
||||
|
||||
if (rank==0) {
|
||||
cout << "\n There are " << size << " processes running.\n";
|
||||
}
|
||||
|
||||
// Create vectors
|
||||
size_t n=20;
|
||||
vector<double> local_vector(n);
|
||||
vector<double> local_vector_inv(n);
|
||||
for (size_t i=0; i<n; ++i) {
|
||||
// local_vector[i] = rank*n + i+1;
|
||||
// local_vector_inv[i] = 1.0/(local_vector[i]);
|
||||
|
||||
local_vector[i] = rank*100.0 + (i%5)*10.0 + i; // EX8
|
||||
local_vector_inv[i] = 1.0/(local_vector[i]+1.0);
|
||||
}
|
||||
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
if (rank == 0) {printf("\n\n-------------- Task 5 --------------\n\n");}
|
||||
DebugVector(local_vector, icomm);
|
||||
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
if (rank == 0) {printf("\n\n-------------- Task 6 --------------\n\n");}
|
||||
double result = par_scalar(local_vector, local_vector_inv, icomm);
|
||||
if (rank == 0) {printf("Global scalar product: %f\n", result);}
|
||||
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
if (rank == 0) {printf("\n\n-------------- Task 7 --------------\n\n");}
|
||||
auto [min, max] = find_global_minmax(local_vector, icomm);
|
||||
if (rank == 0) {printf("Global min: %.0f | global max: %.0f\n\n", min, max);}
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
tuple(min, max) = find_global_minmax_Allreduce(local_vector, icomm);
|
||||
if (rank == 0) {printf("Global min: %.0f | global max: %.0f\n", min, max);}
|
||||
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
if (rank == 0) {printf("\n\n-------------- Task 8 --------------\n\n");}
|
||||
|
||||
if (rank == 0) {printf("\n---- MPI_Alltoall ----\n");}
|
||||
vector<double> recv(n);
|
||||
MPI_Alltoall(local_vector.data(), 5, MPI_DOUBLE, recv.data(), 5, MPI_DOUBLE, icomm);
|
||||
DebugVector(recv, icomm);
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
if (rank == 0) {printf("\n---- MPI_Alltoall using MPI_IN_PLACE ----\n");}
|
||||
MPI_Alltoall(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, local_vector.data(), 5, MPI_DOUBLE, icomm);
|
||||
DebugVector(local_vector, icomm);
|
||||
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
154
ex7/code/task4/GCC_default.mk
Normal file
154
ex7/code/task4/GCC_default.mk
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
#startmake as follows to avoid warnings caused by OpenMPI code
|
||||
# make 2>&1 | grep -v openmpi
|
||||
|
||||
|
||||
MPI_ROOT=/usr/bin/
|
||||
|
||||
CC = ${MPI_ROOT}mpicc
|
||||
CXX = ${MPI_ROOT}mpicxx
|
||||
F77 = ${MPI_ROOT}mpif77
|
||||
LINKER = ${CXX}
|
||||
|
||||
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
|
||||
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
|
||||
#MPIRUN = ${MPI_ROOT}mpiexec
|
||||
|
||||
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
|
||||
# solution according to https://github.com/open-mpi/ompi/issues/11063
|
||||
MPIRUN += -mca btl ^openib
|
||||
|
||||
# KFU:sauron
|
||||
CXXFLAGS += -I/software/boost/1_72_0/include
|
||||
|
||||
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
|
||||
|
||||
# WARNINGS += -Weffc++ -Wextra
|
||||
# -Wno-pragmas
|
||||
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
|
||||
# -ftree-vectorizer-verbose=5 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=2
|
||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
|
||||
# info on vectorization
|
||||
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
|
||||
#-fdump-tree-pre=stderr
|
||||
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
|
||||
#CXXFLAGS += ${VECTORIZE}
|
||||
|
||||
# -funroll-all-loops -msse3
|
||||
#GCC -march=knl -march=broadwell -march=haswell
|
||||
|
||||
# for debugging purpose (save code)
|
||||
# -fsanitize=leak # only one out the trhee can be used
|
||||
# -fsanitize=address
|
||||
# -fsanitize=thread
|
||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||
#CXXFLAGS += ${SANITARY}
|
||||
#LINKFLAGS +=${SANITARY}
|
||||
|
||||
# OpenMP
|
||||
CXXFLAGS += -fopenmp
|
||||
LINKFLAGS += -fopenmp
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
||||
@rm -rf html latex
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 6 ./$^
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
zip: clean
|
||||
@echo "Zip the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
# 2>&1 | grep -v openmpi
|
||||
|
||||
# special: get rid of compiler warnings genereate by openmpi-files
|
||||
#.cpp.o:
|
||||
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
|
||||
# |grep -sv openmpi
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
|
||||
# Graphical interface
|
||||
# valkyrie
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ 11
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
codecheck: $(SOURCES)
|
||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
107
ex7/code/task4/ICC_NATIVE_default.mk
Normal file
107
ex7/code/task4/ICC_NATIVE_default.mk
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Basic Defintions for using INTEL-MPI with its compilers
|
||||
# requires setting of COMPILER=ICC_NATIVE_
|
||||
|
||||
# MPI_ROOT should be defined by shell
|
||||
# path to icpc is contained in $PATH
|
||||
MPI_BIN = $(shell dirname `which icpc` | sed 's/bin\/intel64/mpi\/intel64\/bin/g')/
|
||||
MPI_LIB = $(shell echo ${MPI_BIN} | sed 's/bin/lib/g')
|
||||
|
||||
# Intel-MPI wrappers used gcc as default !!
|
||||
CC = ${MPI_BIN}mpicc -cc=icc
|
||||
CXX = ${MPI_BIN}mpicxx -cxx=icpc
|
||||
F77 = ${MPI_BIN}mpif77 -f77=ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
MPIRUN = ${MPI_BIN}mpirun
|
||||
|
||||
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
|
||||
# -Weffc++ -Wunreachable-code -Winline
|
||||
CXXFLAGS += -O3 -fargument-noalias -DNDEBUG -std=c++17 ${WARNINGS} ${MPI_COMPILE_FLAGS}
|
||||
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal \
|
||||
-Wshadow ${MPI_COMPILE_FLAGS}
|
||||
# -vec-report=3 -mkl
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# use MKL by INTEL
|
||||
LINKFLAGS += -mkl ${MPI_LINK_FLAGS}
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: ${PROGRAM}
|
||||
(export LD_LIBRARY_PATH=${MPI_LIB}:${LD_LIBRARY_PATH} ;${MPIRUN} -np 4 ./$^ ${PROG_ARGS})
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &
|
||||
112
ex7/code/task4/ICC_default.mk
Normal file
112
ex7/code/task4/ICC_default.mk
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
# Basic Defintions for using INTEL compilers with OpenMPI headers and libraries
|
||||
# requires setting of COMPILER=ICC_
|
||||
|
||||
# MPI_ROOT should be defined by shell
|
||||
MPI_ROOT=/usr
|
||||
|
||||
CC = icc
|
||||
CXX = icpc
|
||||
F77 = ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
MPIRUN = ${MPI_ROOT}/bin/mpirun
|
||||
|
||||
# no differences when C or C++ is used !! (always used options from mpicxx)
|
||||
MPI_COMPILE_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:compile`
|
||||
MPI_LINK_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:link`
|
||||
# MPI_LINK_FLAGS = -pthread -L/usr/lib/openmpi/lib -lmpi_cxx -lmpi -lopen-rte -lopen-pal -ldl -Wl,--export-dynamic -lnsl -lutil -lm -ldl
|
||||
|
||||
|
||||
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
|
||||
# -Weffc++ -Wunreachable-code -Winline
|
||||
CXXFLAGS += -O3 -std=c++17 -fargument-noalias -DNDEBUG ${WARNINGS} ${MPI_COMPILE_FLAGS}
|
||||
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal \
|
||||
-Wshadow ${MPI_COMPILE_FLAGS}
|
||||
# -vec-report=3 -mkl
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# use MKL by INTEL
|
||||
LINKFLAGS += -mkl
|
||||
# use MPI by Compiler
|
||||
LINKFLAGS += ${MPI_LINK_FLAGS}
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &
|
||||
128
ex7/code/task4/OPENMPI_CLANG_default.mk
Normal file
128
ex7/code/task4/OPENMPI_CLANG_default.mk
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
# Basic Defintions for using OpenMPI with CLANG compilers
|
||||
# requires setting of COMPILER=OPENMPI_CLANG_
|
||||
|
||||
# Pass CLANG Compilers to the OpenMPI wrappers
|
||||
# see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
|
||||
EXPORT = export OMPI_CXX=clang++; export OMPI_CC=clang; export OMPI_mpifort=flang
|
||||
|
||||
CC = mpicc
|
||||
CXX = mpicxx
|
||||
F77 = mpifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
MPIRUN = ${MPI_BIN}mpirun
|
||||
|
||||
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
|
||||
SILENCE_MPI = -Wno-weak-vtables -Wno-old-style-cast -Wno-cast-align -Wno-deprecated
|
||||
SILENCE_MPI+= -Wno-sign-conversion -Wno-reserved-id-macro -Wno-c++98-compat-pedantic
|
||||
SILENCE_MPI+= -Wno-zero-as-null-pointer-constant -Wno-source-uses-openmp
|
||||
WARNINGS = -Weverything -Wno-c++98-compat -Wno-weak-vtables -ferror-limit=3 ${SILENCE_MPI}
|
||||
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
||||
CXXFLAGS += -Ofast -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -Ofast -std=c++17
|
||||
# -ftrapv
|
||||
#
|
||||
CFLAGS += -Ofast -Weverything -ferror-limit=3 ${MPI_COMPILE_FLAGS}
|
||||
|
||||
# OpenMP
|
||||
#CXXFLAGS += -fopenmp
|
||||
#LINKFLAGS += -fopenmp
|
||||
|
||||
# tidy_check
|
||||
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
|
||||
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
|
||||
#READABILITY=,readability*${SWITCH_OFF}
|
||||
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
#TIDYFLAGS += -checks='modernize*
|
||||
|
||||
MPI_COMPILE_FLAGS = `${MPI_BIN}mpicxx -showme:compile`
|
||||
MPI_LINK_FLAGS = `${MPI_BIN}mpicxx -showme:link`
|
||||
#TIDYFLAGS += ${MPI_COMPILE_FLAGS}
|
||||
TIDYFLAGS += -extra-arg="-I/usr/lib/x86_64-linux-gnu/openmpi/include"
|
||||
#check:
|
||||
# echo ${MPI_COMPILE_FLAGS}
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
@( ${EXPORT}; $(LINKER) $^ ${LINKFLAGS} -o $@ )
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
codecheck: tidy_check
|
||||
tidy_check:
|
||||
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
|
||||
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
|
||||
|
||||
.c.o:
|
||||
@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &
|
||||
107
ex7/code/task4/OPENMPI_ICC_default.mk
Normal file
107
ex7/code/task4/OPENMPI_ICC_default.mk
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Basic Defintions for using OpenMPI with Intel compilers
|
||||
# requires setting of COMPILER=OPENMPI_ICC_
|
||||
|
||||
# Pass Intel Compilers to the OpenMPI wrappers
|
||||
# see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
|
||||
EXPORT = export OMPI_CXX=icpc; export OMPI_CC=icc; export OMPI_mpifort=ifort
|
||||
|
||||
CC = mpicc
|
||||
CXX = mpicxx
|
||||
F77 = mpifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
MPIRUN = ${MPI_BIN}mpirun
|
||||
|
||||
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
|
||||
# -Weffc++ -Wunreachable-code -Winline
|
||||
CXXFLAGS += -fast -fargument-noalias -DNDEBUG -std=c++17 ${WARNINGS}
|
||||
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal -Wshadow
|
||||
# -vec-report=3 -mkl
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# use MKL by INTEL
|
||||
LINKFLAGS += -O3 -mkl ${MPI_LINK_FLAGS}
|
||||
# ipo: warning #11021: unresolved __GI_memset
|
||||
# see: https://software.intel.com/en-us/articles/ipo-warning-11021-unresolved-symbols-referenced-a-dynamic-library
|
||||
LINKFLAGS +=
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
@( ${EXPORT}; $(LINKER) $^ ${LINKFLAGS} -o $@ )
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
|
||||
|
||||
.c.o:
|
||||
@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &
|
||||
125
ex7/code/task4/PGI_NATIVE_default.mk
Normal file
125
ex7/code/task4/PGI_NATIVE_default.mk
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
# Use the MPI-wrappers from the PGI compiler suite.
|
||||
# requires setting of COMPILER=PGI_MPI_
|
||||
#
|
||||
# requires
|
||||
# sudo apt install librdmacm1
|
||||
|
||||
|
||||
|
||||
# Details for run time information
|
||||
# export PGI_ACC_TIME=1
|
||||
# unset PGI_ACC_TIME
|
||||
# export PGI_ACC_NOTIFY=1
|
||||
# export PGI_ACC_NOTIFY=3
|
||||
# unset PGI_ACC_NOTIFY
|
||||
|
||||
|
||||
PGI_PATH = /opt/pgi/linux86-64/2019/bin
|
||||
#ifeq "$(HOSTNAME)" "mephisto.uni-graz.at"
|
||||
# # mephisto
|
||||
# PGI_PATH = /share/apps/pgi/linux86-64/2016/bin
|
||||
#endif
|
||||
|
||||
|
||||
#MPI_ROOT=${PGI_PATH}mpi/mpich/bin/
|
||||
MPI_ROOT= ${PGI_PATH}/../mpi/openmpi-3.1.3/bin/
|
||||
MPIRUN = ${MPI_ROOT}mpirun
|
||||
|
||||
CC = ${MPI_ROOT}mpicc
|
||||
CXX = ${MPI_ROOT}mpicxx
|
||||
#F77 = ${MPI_ROOT}mpif77
|
||||
ifndef LINKER
|
||||
LINKER = ${CC}
|
||||
endif
|
||||
LINKER = ${CXX}
|
||||
|
||||
WARNINGS = -Minform=warn
|
||||
|
||||
PGI_PROFILING += -Minfo=loop,vect,opt,intensity,mp,accel
|
||||
#PGI_PROFILING += -Mprof=lines –Minfo=ccff
|
||||
|
||||
CXXFLAGS += -e3 -std=c++17 -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
|
||||
CFLAGS += -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
|
||||
#
|
||||
# for OpenACC
|
||||
# Target architecture (nvidia,host)
|
||||
TA_ARCH = host
|
||||
#TA_ARCH = nvidia,host
|
||||
#TA_ARCH = -ta=nvidia:cc2+,cuda5.5,fastmath
|
||||
#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,cuda5.5,fastmath,keepgpu
|
||||
#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,fastmath,keepgpu
|
||||
|
||||
#,keepgpu
|
||||
# CFLAGS = -O3 -ta=$(TA_ARCH)
|
||||
#CFLAGS += -B -gopt $(TA_ARCH)
|
||||
#CXXFLAGS += -B -gopt $(TA_ARCH)
|
||||
# -Minfo=all
|
||||
|
||||
# libcudart.a is needed for direct CUDA calls
|
||||
#LINKFLAGS = -gopt $(TA_ARCH) -L${BINDIR}../lib $(PGI_PROFILING)
|
||||
# -lcudart
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS} *.gpu *gprof.out
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
#run: clean ${PROGRAM}
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ${OPTIRUN} ./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
|
||||
|
||||
# Profiling options PGI, see: pgcollect -help
|
||||
CPU_PROF = -allcache
|
||||
GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
|
||||
#GPU_PROF = -cuda=branch:cc20
|
||||
#
|
||||
PROF_FILE = pgprof.out
|
||||
|
||||
prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# $(CUDA_HOME)/bin/nvvp &
|
||||
# export LD_LIBRARY_PATH=/state/partition1/apps/pgi/linux86-64/12.9/lib:$LD_LIBRARY_PATH
|
||||
${OPTIRUN} ${BINDIR}pgcollect $(GPU_PROF) ./$^
|
||||
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
|
||||
|
||||
|
||||
# Memory checker (slooooow!!!):
|
||||
# see doc at /usr/local/cuda/doc/cuda-memcheck.pdf
|
||||
# mem: ${PROGRAM}
|
||||
# $(CUDA_HOME)memcheck ./$^
|
||||
2877
ex7/code/task4/accu.template/Doxyfile
Normal file
2877
ex7/code/task4/accu.template/Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
54
ex7/code/task4/accu.template/Makefile
Executable file
54
ex7/code/task4/accu.template/Makefile
Executable file
|
|
@ -0,0 +1,54 @@
|
|||
#
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# COMPILER=GCC_SEQ_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
MAIN = main
|
||||
SOURCES = ${MAIN}.cpp vdop.cpp geom.cpp par_geom.cpp
|
||||
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = ${MAIN}.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
# -DNDEBUG
|
||||
# -pg slows down the code on my laptop when using CLANG_
|
||||
LINKFLAGS += -g
|
||||
#-pg
|
||||
#CXXFLAGS += -Q --help=optimizers
|
||||
#CXXFLAGS += -fopt-info
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
|
||||
#############################################################################
|
||||
# additional specific cleaning in this directory
|
||||
clean_all::
|
||||
@rm -f uv.txt
|
||||
|
||||
|
||||
#############################################################################
|
||||
# special testing
|
||||
# NPROCS = 4
|
||||
#
|
||||
TFILE = t.dat
|
||||
# TTMP = t.tmp
|
||||
#
|
||||
graph: $(PROGRAM)
|
||||
# @rm -f $(TFILE).*
|
||||
# next two lines only sequentially
|
||||
./$(PROGRAM)
|
||||
@mv $(TFILE).000 $(TFILE)
|
||||
# $(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
|
||||
# @echo " "; echo "Manipulate data for graphics."; echo " "
|
||||
# @cat $(TFILE).* > $(TTMP)
|
||||
# @sort -b -k 2 $(TTMP) -o $(TTMP).1
|
||||
# @sort -b -k 1 $(TTMP).1 -o $(TTMP).2
|
||||
# @awk -f nl.awk $(TTMP).2 > $(TFILE)
|
||||
# @rm -f $(TTMP).* $(TTMP) $(TFILE).*
|
||||
#
|
||||
-gnuplot jac.dem
|
||||
43
ex7/code/task4/accu.template/ascii_read_meshvector.m
Normal file
43
ex7/code/task4/accu.template/ascii_read_meshvector.m
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
function [ xc, ia, v ] = ascii_read_meshvector( fname )
|
||||
%
|
||||
% Loads the 2D triangular mesh (coordinates, vertex connectivity)
|
||||
% together with values on its vertices from an ASCII file.
|
||||
% Matlab indexing is stored (starts with 1).
|
||||
%
|
||||
% The input file format is compatible
|
||||
% with Mesh_2d_3_matlab:Write_ascii_matlab(..) in jacobi_oo_stl/geom.h
|
||||
%
|
||||
%
|
||||
% IN: fname - filename
|
||||
% OUT: xc - coordinates
|
||||
% ia - mesh connectivity
|
||||
% v - solution vector
|
||||
|
||||
DELIMETER = ' ';
|
||||
|
||||
fprintf('Read file %s\n',fname)
|
||||
|
||||
% Read mesh constants
|
||||
nn = dlmread(fname,DELIMETER,[0 0 0 3]); %% row_1, col_1, row_2, col_2 in C indexing!!!
|
||||
nnode = nn(1);
|
||||
ndim = nn(2);
|
||||
nelem = nn(3);
|
||||
nvert = nn(4);
|
||||
|
||||
% Read coordinates
|
||||
row_start = 0+1;
|
||||
row_end = 0+nnode;
|
||||
xc = dlmread(fname,DELIMETER,[row_start 0 row_end ndim-1]);
|
||||
|
||||
% Read connectivity
|
||||
row_start = row_end+1;
|
||||
row_end = row_end+nelem;
|
||||
ia = dlmread(fname,DELIMETER,[row_start 0 row_end nvert-1]);
|
||||
|
||||
% Read solution
|
||||
row_start = row_end+1;
|
||||
row_end = row_end+nnode;
|
||||
v = dlmread(fname,DELIMETER,[row_start 0 row_end 0]);
|
||||
end
|
||||
|
||||
|
||||
49
ex7/code/task4/accu.template/ascii_write_mesh.m
Normal file
49
ex7/code/task4/accu.template/ascii_write_mesh.m
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
function ascii_write_mesh( xc, ia, e, basename)
|
||||
%
|
||||
% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
|
||||
% in an ASCII file.
|
||||
% Matlab indexing is stored (starts with 1).
|
||||
%
|
||||
% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
|
||||
%
|
||||
% IN:
|
||||
% coordinates xc: [2][nnode]
|
||||
% connectivity ia: [4][nelem] with t(4,:) are the subdomain numbers
|
||||
% edges e: [7][nedges] boundary edges
|
||||
% e([1,2],:) - start/end vertex of edge
|
||||
% e([3,4],:) - start/end values
|
||||
% e(5,:) - segment number
|
||||
% e([6,7],:) - left/right subdomain
|
||||
% basename: file name without extension
|
||||
%
|
||||
% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
|
||||
%
|
||||
fname = [basename, '.txt'];
|
||||
|
||||
nnode = int32(size(xc,2));
|
||||
ndim = int32(size(xc,1));
|
||||
nelem = int32(size(ia,2));
|
||||
nvert_e = int32(3);
|
||||
|
||||
|
||||
dlmwrite(fname,nnode,'delimiter','\t','precision',16) % number of nodes
|
||||
dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16) % space dimension
|
||||
dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16) % number of elements
|
||||
dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16) % number of vertices per element
|
||||
|
||||
% dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16) % coordinates
|
||||
dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
|
||||
|
||||
% no subdomain info transferred
|
||||
tmp=int32(ia(1:3,:));
|
||||
% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
|
||||
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
|
||||
|
||||
% store only start and end point of boundary edges,
|
||||
nbedges = size(e,2);
|
||||
dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16) % number boundary edges
|
||||
tmp=int32(e(1:2,:));
|
||||
% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
|
||||
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
|
||||
|
||||
end
|
||||
51
ex7/code/task4/accu.template/ascii_write_subdomains.m
Normal file
51
ex7/code/task4/accu.template/ascii_write_subdomains.m
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
function ascii_write_subdomains( xc, ia, e, basename)
|
||||
%
|
||||
% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
|
||||
% in an ASCII file.
|
||||
% Matlab indexing is stored (starts with 1).
|
||||
%
|
||||
% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
|
||||
%
|
||||
% IN:
|
||||
% coordinates xc: [2][nnode]
|
||||
% connectivity ia: [4][nelem] with t(4,:) are the subdomain numbers
|
||||
% edges e: [7][nedges] boundary edges
|
||||
% e([1,2],:) - start/end vertex of edge
|
||||
% e([3,4],:) - start/end values
|
||||
% e(5,:) - segment number
|
||||
% e([6,7],:) - left/right subdomain
|
||||
% basename: file name without extension
|
||||
%
|
||||
% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
|
||||
%
|
||||
fname = [basename, '_sd.txt'];
|
||||
|
||||
nnode = int32(size(xc,2));
|
||||
ndim = int32(size(xc,1));
|
||||
nelem = int32(size(ia,2));
|
||||
nvert_e = int32(3);
|
||||
|
||||
|
||||
% dlmwrite(fname,nnode,'delimiter','\t','precision',16) % number of nodes
|
||||
% dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16) % space dimension
|
||||
% dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16) % number of elements
|
||||
dlmwrite(fname,nelem,'delimiter','\t','precision',16) % number of elements
|
||||
% dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16) % number of vertices per element
|
||||
|
||||
% % dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16) % coordinates
|
||||
% dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
|
||||
|
||||
% subdomain info
|
||||
tmp=int32(ia(4,:));
|
||||
% % dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
|
||||
% dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
|
||||
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t') % connectivity in Matlab indexing
|
||||
|
||||
% % store only start and end point of boundary edges,
|
||||
% nbedges = size(e,2);
|
||||
% dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16) % number boundary edges
|
||||
% tmp=int32(e(1:2,:));
|
||||
% % dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
|
||||
% dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
|
||||
|
||||
end
|
||||
1282
ex7/code/task4/accu.template/geom.cpp
Normal file
1282
ex7/code/task4/accu.template/geom.cpp
Normal file
File diff suppressed because it is too large
Load diff
712
ex7/code/task4/accu.template/geom.h
Normal file
712
ex7/code/task4/accu.template/geom.h
Normal file
|
|
@ -0,0 +1,712 @@
|
|||
#ifndef GEOM_FILE
|
||||
#define GEOM_FILE
|
||||
#include <array>
|
||||
#include <functional> // function; C++11
|
||||
#include <iostream>
|
||||
#include <memory> // shared_ptr
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* Basis class for finite element meshes.
|
||||
*/
|
||||
class Mesh
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor initializing the members with default values.
|
||||
*
|
||||
* @param[in] ndim space dimensions (dimension for coordinates)
|
||||
* @param[in] nvert_e number of vertices per element (dimension for connectivity)
|
||||
* @param[in] ndof_e degrees of freedom per element (= @p nvert_e for linear elements)
|
||||
* @param[in] nedge_e number of edges per element (= @p nvert_e for linear elements in 2D)
|
||||
*/
|
||||
explicit Mesh(int ndim, int nvert_e = 0, int ndof_e = 0, int nedge_e = 0);
|
||||
|
||||
__attribute__((noinline))
|
||||
Mesh(Mesh const &) = default;
|
||||
|
||||
Mesh &operator=(Mesh const &) = delete;
|
||||
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* See clang warning on
|
||||
* <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
|
||||
*/
|
||||
virtual ~Mesh();
|
||||
|
||||
/**
|
||||
* Reads mesh data from a binary file.
|
||||
*
|
||||
* File format, see ascii_write_mesh.m
|
||||
*
|
||||
* @param[in] fname file name
|
||||
*/
|
||||
explicit Mesh(std::string const &fname);
|
||||
|
||||
/**
|
||||
* Reads mesh data from a binary file.
|
||||
*
|
||||
* File format, see ascii_write_mesh.m
|
||||
*
|
||||
* @param[in] fname file name
|
||||
*/
|
||||
void ReadVertexBasedMesh(std::string const &fname);
|
||||
|
||||
/**
|
||||
* Number of finite elements in (sub)domain.
|
||||
* @return number of elements.
|
||||
*/
|
||||
int Nelems() const
|
||||
{
|
||||
return _nelem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Global number of vertices for each finite element.
|
||||
* @return number of vertices per element.
|
||||
*/
|
||||
int NverticesElements() const
|
||||
{
|
||||
return _nvert_e;
|
||||
}
|
||||
|
||||
/**
|
||||
* Global number of degrees of freedom (dof) for each finite element.
|
||||
* @return degrees of freedom per element.
|
||||
*/
|
||||
int NdofsElement() const
|
||||
{
|
||||
return _ndof_e;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of vertices in mesh.
|
||||
* @return number of vertices.
|
||||
*/
|
||||
int Nnodes() const
|
||||
{
|
||||
return _nnode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Space dimension.
|
||||
* @return number of dimensions.
|
||||
*/
|
||||
int Ndims() const
|
||||
{
|
||||
return _ndim;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
|
||||
*
|
||||
* @param[in] nelem number of elements
|
||||
* @param[in] nvert_e number of vertices per element
|
||||
*/
|
||||
void Resize_Connectivity(int nelem, int nvert_e)
|
||||
{
|
||||
SetNelem(nelem); // number of elements
|
||||
SetNverticesElement(nvert_e); // vertices per element
|
||||
_ia.resize(nelem * nvert_e);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read connectivity information (g1,g2,g3)_i.
|
||||
* @return connectivity vector [nelems*ndofs].
|
||||
*/
|
||||
const std::vector<int> &GetConnectivity() const
|
||||
{
|
||||
return _ia;
|
||||
}
|
||||
|
||||
/**
|
||||
* Access/Change connectivity information (g1,g2,g3)_i.
|
||||
* @return connectivity vector [nelems*ndofs].
|
||||
*/
|
||||
std::vector<int> &GetConnectivity()
|
||||
{
|
||||
return _ia;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
|
||||
*
|
||||
* @param[in] nnodes number of nodes
|
||||
* @param[in] ndim space dimension
|
||||
*/
|
||||
void Resize_Coords(int nnodes, int ndim)
|
||||
{
|
||||
SetNnode(nnodes); // number of nodes
|
||||
SetNdim(ndim); // space dimension
|
||||
_xc.resize(nnodes * ndim);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read coordinates of vertices (x,y)_i.
|
||||
* @return coordinates vector [nnodes*2].
|
||||
*/
|
||||
const std::vector<double> &GetCoords() const
|
||||
{
|
||||
return _xc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Access/Change coordinates of vertices (x,y)_i.
|
||||
* @return coordinates vector [nnodes*2].
|
||||
*/
|
||||
std::vector<double> &GetCoords()
|
||||
{
|
||||
return _xc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate values in vector @p v via function @p func(x,y)
|
||||
* @param[in] v vector
|
||||
* @param[in] func function of (x,y) returning a double value.
|
||||
*/
|
||||
void SetValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
|
||||
void SetBoundaryValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
|
||||
void SetDirchletValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
|
||||
|
||||
/**
|
||||
* Prints the information for a finite element mesh
|
||||
*/
|
||||
void Debug() const;
|
||||
|
||||
/**
|
||||
* Prints the edge based information for a finite element mesh
|
||||
*/
|
||||
void DebugEdgeBased() const;
|
||||
|
||||
/**
|
||||
* Determines the indices of those vertices with Dirichlet boundary conditions
|
||||
* @return index vector.
|
||||
*/
|
||||
virtual std::vector<int> Index_DirichletNodes() const;
|
||||
virtual std::vector<int> Index_BoundaryNodes() const;
|
||||
|
||||
/**
|
||||
* Write vector @p v together with its mesh information to an ASCii file @p fname.
|
||||
*
|
||||
* The data are written in C-style.
|
||||
*
|
||||
* @param[in] fname file name
|
||||
* @param[in] v vector
|
||||
*/
|
||||
void Write_ascii_matlab(std::string const &fname, std::vector<double> const &v) const;
|
||||
|
||||
/**
|
||||
* Exports the mesh information to ASCii files @p basename + {_coords|_elements}.txt.
|
||||
*
|
||||
* The data are written in C-style.
|
||||
*
|
||||
* @param[in] basename first part of file names
|
||||
*/
|
||||
void Export_scicomp(std::string const &basename) const;
|
||||
|
||||
/**
|
||||
* Visualize @p v together with its mesh information via matlab or octave.
|
||||
*
|
||||
* Comment/uncomment those code lines in method Mesh:Visualize (geom.cpp)
|
||||
* that are supported on your system.
|
||||
*
|
||||
* @param[in] v vector
|
||||
*
|
||||
* @warning matlab files ascii_read_meshvector.m visualize_results.m
|
||||
* must be in the executing directory.
|
||||
*/
|
||||
void Visualize(std::vector<double> const &v) const;
|
||||
|
||||
/**
|
||||
* Global number of edges.
|
||||
* @return number of edges in mesh.
|
||||
*/
|
||||
int Nedges() const
|
||||
{
|
||||
return _nedge;
|
||||
}
|
||||
|
||||
/**
|
||||
* Global number of edges for each finite element.
|
||||
* @return number of edges per element.
|
||||
*/
|
||||
int NedgesElements() const
|
||||
{
|
||||
return _nedge_e;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read edge connectivity information (e1,e2,e3)_i.
|
||||
* @return edge connectivity vector [nelems*_nedge_e].
|
||||
*/
|
||||
const std::vector<int> &GetEdgeConnectivity() const
|
||||
{
|
||||
return _ea;
|
||||
}
|
||||
|
||||
/**
|
||||
* Access/Change edge connectivity information (e1,e2,e3)_i.
|
||||
* @return edge connectivity vector [nelems*_nedge_e].
|
||||
*/
|
||||
std::vector<int> &GetEdgeConnectivity()
|
||||
{
|
||||
return _ea;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read edge information (v1,v2)_i.
|
||||
* @return edge connectivity vector [_nedge*2].
|
||||
*/
|
||||
const std::vector<int> &GetEdges() const
|
||||
{
|
||||
return _edges;
|
||||
}
|
||||
|
||||
/**
|
||||
* Access/Change edge information (v1,v2)_i.
|
||||
* @return edge connectivity vector [_nedge*2].
|
||||
*/
|
||||
std::vector<int> &GetEdges()
|
||||
{
|
||||
return _edges;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines all node to node connections from the vertex based mesh.
|
||||
*
|
||||
* @return vector[k][] containing all connections of vertex k, including to itself.
|
||||
*/
|
||||
std::vector<std::vector<int>> Node2NodeGraph() const
|
||||
{
|
||||
//// Check version 2 wrt. version 1
|
||||
//auto v1=Node2NodeGraph_1();
|
||||
//auto v2=Node2NodeGraph_2();
|
||||
//if ( equal(v1.cbegin(),v1.cend(),v2.begin()) )
|
||||
//{
|
||||
//std::cout << "\nidentical Versions\n";
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
//std::cout << "\nE R R O R in Versions\n";
|
||||
//}
|
||||
|
||||
//return Node2NodeGraph_1();
|
||||
return Node2NodeGraph_2(); // 2 times faster than version 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Accesses the father-of-nodes relation.
|
||||
*
|
||||
* @return vector of length 0 because no relation available.
|
||||
*
|
||||
*/
|
||||
virtual std::vector<int> const &GetFathersOfVertices() const
|
||||
{
|
||||
return _dummy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes all edge connectivity information (saves memory).
|
||||
*/
|
||||
void Del_EdgeConnectivity();
|
||||
|
||||
protected:
|
||||
//public:
|
||||
void SetNelem(int nelem)
|
||||
{
|
||||
_nelem = nelem;
|
||||
}
|
||||
|
||||
void SetNverticesElement(int nvert)
|
||||
{
|
||||
_nvert_e = nvert;
|
||||
}
|
||||
|
||||
void SetNdofsElement(int ndof)
|
||||
{
|
||||
_ndof_e = ndof;
|
||||
}
|
||||
|
||||
void SetNnode(int nnode)
|
||||
{
|
||||
_nnode = nnode;
|
||||
}
|
||||
|
||||
void SetNdim(int ndim)
|
||||
{
|
||||
_ndim = ndim;
|
||||
}
|
||||
|
||||
void SetNedge(int nedge)
|
||||
{
|
||||
_nedge = nedge;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads vertex based mesh data from a binary file.
|
||||
*
|
||||
* File format, see ascii_write_mesh.m
|
||||
*
|
||||
* @param[in] fname file name
|
||||
*/
|
||||
void ReadVectexBasedMesh(std::string const &fname);
|
||||
|
||||
/**
|
||||
* The vertex based mesh data are used to derive the edge based data.
|
||||
*
|
||||
* @warning Exactly 3 vertices, 3 edges per element are assumed (linear triangle in 2D)
|
||||
*/
|
||||
void DeriveEdgeFromVertexBased()
|
||||
{
|
||||
//DeriveEdgeFromVertexBased_slow();
|
||||
//DeriveEdgeFromVertexBased_fast();
|
||||
DeriveEdgeFromVertexBased_fast_2();
|
||||
}
|
||||
void DeriveEdgeFromVertexBased_slow();
|
||||
void DeriveEdgeFromVertexBased_fast();
|
||||
void DeriveEdgeFromVertexBased_fast_2();
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* The edge based mesh data are used to derive the vertex based data.
|
||||
*
|
||||
* @warning Exactly 3 vertices, 3 edges per element are assumed (linear triangle in 2D)
|
||||
*/
|
||||
void DeriveVertexFromEdgeBased();
|
||||
|
||||
/**
|
||||
* Determines the indices of those vertices with Dirichlet boundary conditions
|
||||
* @return index vector.
|
||||
*/
|
||||
int Nnbedges() const
|
||||
{
|
||||
return static_cast<int>(_bedges.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether the array dimensions fit to their appropriate size parameters
|
||||
* @return index vector.
|
||||
*/
|
||||
virtual bool Check_array_dimensions() const;
|
||||
|
||||
/**
|
||||
* Permutes the vertex information in an edge based mesh.
|
||||
*
|
||||
* @param[in] old2new new indices of original vertices.
|
||||
*/
|
||||
void PermuteVertices_EdgeBased(std::vector<int> const &old2new);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Determines all node to node connections from the vertex based mesh.
|
||||
*
|
||||
* @return vector[k][] containing all connections of vertex k, including to itself.
|
||||
*/
|
||||
std::vector<std::vector<int>> Node2NodeGraph_1() const; // is correct
|
||||
|
||||
/**
|
||||
* Determines all node to node connections from the vertex based mesh.
|
||||
*
|
||||
* Faster than @p Node2NodeGraph_1().
|
||||
*
|
||||
* @return vector[k][] containing all connections of vertex k, including to itself.
|
||||
*/
|
||||
std::vector<std::vector<int>> Node2NodeGraph_2() const; // is correct
|
||||
|
||||
//private:
|
||||
protected:
|
||||
int _nelem; //!< number elements
|
||||
int _nvert_e; //!< number of vertices per element
|
||||
int _ndof_e; //!< degrees of freedom (d.o.f.) per element
|
||||
int _nnode; //!< number nodes/vertices
|
||||
int _ndim; //!< space dimension of the problem (1, 2, or 3)
|
||||
std::vector<int> _ia; //!< element connectivity
|
||||
std::vector<double> _xc; //!< coordinates
|
||||
|
||||
protected:
|
||||
// B.C.
|
||||
std::vector<int> _bedges; //!< boundary edges [nbedges][2] storing start/end vertex
|
||||
// 2020-01-08
|
||||
std::vector<int> _sdedges; //!< boundary edges [nbedges][2] with left/right subdomain number
|
||||
|
||||
//private:
|
||||
protected:
|
||||
// edge based connectivity
|
||||
int _nedge; //!< number of edges in mesh
|
||||
int _nedge_e; //!< number of edges per element
|
||||
std::vector<int> _edges; //!< edges of mesh (vertices ordered ascending)
|
||||
std::vector<int> _ea; //!< edge based element connectivity
|
||||
// B.C.
|
||||
std::vector<int> _ebedges; //!< boundary edges [nbedges]
|
||||
|
||||
private:
|
||||
const std::vector<int> _dummy; //!< empty dummy vector
|
||||
|
||||
};
|
||||
|
||||
|
||||
// *********************************************************************
|
||||
|
||||
class RefinedMesh: public Mesh
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructs a refined mesh according to the marked elements in @p ibref.
|
||||
*
|
||||
* If the vector @p ibref has size 0 then all elements will be refined.
|
||||
*
|
||||
* @param[in] cmesh original mesh for coarsening.
|
||||
* @param[in] ibref vector containing True/False regarding refinement for each element
|
||||
*
|
||||
*/
|
||||
//explicit RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref = std::vector<bool>(0));
|
||||
RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref);
|
||||
//RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref);
|
||||
|
||||
/**
|
||||
* Constructs a refined mesh by regulare refinement of all elements.
|
||||
*
|
||||
* @param[in] cmesh original mesh for coarsening.
|
||||
*
|
||||
*/
|
||||
explicit RefinedMesh(Mesh const &cmesh)
|
||||
: RefinedMesh(cmesh, std::vector<bool>(0))
|
||||
{}
|
||||
|
||||
|
||||
RefinedMesh(RefinedMesh const &) = delete;
|
||||
//RefinedMesh(RefinedMesh const&&) = delete;
|
||||
|
||||
RefinedMesh &operator=(RefinedMesh const &) = delete;
|
||||
//RefinedMesh& operator=(RefinedMesh const&&) = delete;
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~RefinedMesh() override;
|
||||
|
||||
/**
|
||||
* Refines the mesh according to the marked elements.
|
||||
*
|
||||
* @param[in] ibref vector containing True/False regarding refinement for each element
|
||||
*
|
||||
* @return the refined mesh
|
||||
*
|
||||
*/
|
||||
Mesh RefineElements(std::vector<bool> const &ibref);
|
||||
|
||||
/**
|
||||
* Refines all elements in the actual mesh.
|
||||
*
|
||||
* @param[in] nref number of regular refinements to perform
|
||||
*
|
||||
*/
|
||||
void RefineAllElements(int nref = 1);
|
||||
|
||||
/**
|
||||
* Accesses the father-of-nodes relation.
|
||||
*
|
||||
* @return father-of-nodes relation [nnodes][2]
|
||||
*
|
||||
*/
|
||||
std::vector<int> const &GetFathersOfVertices() const override
|
||||
{
|
||||
return _vfathers;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Checks whether the array dimensions fit to their appropriate size parameters
|
||||
* @return index vector.
|
||||
*/
|
||||
bool Check_array_dimensions() const override;
|
||||
|
||||
/**
|
||||
* Permutes the vertex information in an edge based mesh.
|
||||
*
|
||||
* @param[in] old2new new indices of original vertices.
|
||||
*/
|
||||
void PermuteVertices_EdgeBased(std::vector<int> const &old2new);
|
||||
|
||||
|
||||
private:
|
||||
//Mesh const & _cmesh; //!< coarse mesh
|
||||
std::vector<bool> const _ibref; //!< refinement info
|
||||
int _nref; //!< number of regular refinements performed
|
||||
std::vector<int> _vfathers; //!< stores the 2 fathers of each vertex (equal fathers denote original coarse vertex)
|
||||
|
||||
};
|
||||
|
||||
// *********************************************************************
|
||||
|
||||
class gMesh_Hierarchy
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructs mesh hierarchy of @p nlevel levels starting with coarse mesh @p cmesh.
|
||||
* The coarse mesh @p cmesh will be @p nlevel-1 times geometrically refined.
|
||||
*
|
||||
* @param[in] cmesh initial coarse mesh
|
||||
* @param[in] nlevel number levels in mesh hierarchy
|
||||
*
|
||||
*/
|
||||
gMesh_Hierarchy(Mesh const &cmesh, int nlevel);
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return _gmesh.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Access to mesh @p lev from mesh hierarchy.
|
||||
*
|
||||
* @return mesh @p lev
|
||||
* @warning An out_of_range exception might be thrown.
|
||||
*
|
||||
*/
|
||||
Mesh const &operator[](int lev) const
|
||||
{
|
||||
return *_gmesh.at(lev);
|
||||
}
|
||||
|
||||
/**
|
||||
* Access to finest mesh in mesh hierarchy.
|
||||
*
|
||||
* @return finest mesh
|
||||
*
|
||||
*/
|
||||
Mesh const &finest() const
|
||||
{
|
||||
return *_gmesh.back();
|
||||
}
|
||||
|
||||
/**
|
||||
* Access to coarest mesh in mesh hierarchy.
|
||||
*
|
||||
* @return coarsest mesh
|
||||
*
|
||||
*/
|
||||
Mesh const &coarsest() const
|
||||
{
|
||||
return *_gmesh.front();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<Mesh>> _gmesh; //!< mesh hierarchy from coarse ([0]) to fine.
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
// *********************************************************************
|
||||
/**
|
||||
* 2D finite element mesh of the square consisting of linear triangular elements.
|
||||
*/
|
||||
class Mesh_2d_3_square: public Mesh
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Generates the f.e. mesh for the unit square.
|
||||
*
|
||||
* @param[in] nx number of discretization intervals in x-direction
|
||||
* @param[in] ny number of discretization intervals in y-direction
|
||||
* @param[in] myid my MPI-rank / subdomain
|
||||
* @param[in] procx number of ranks/subdomains in x-direction
|
||||
* @param[in] procy number of processes in y-direction
|
||||
*/
|
||||
Mesh_2d_3_square(int nx, int ny, int myid = 0, int procx = 1, int procy = 1);
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
~Mesh_2d_3_square() override;
|
||||
|
||||
/**
|
||||
* Set solution vector based on a tensor product grid in the rectangle.
|
||||
* @param[in] u solution vector
|
||||
*/
|
||||
void SetU(std::vector<double> &u) const;
|
||||
|
||||
/**
|
||||
* Set right hand side (rhs) vector on a tensor product grid in the rectangle.
|
||||
* @param[in] f rhs vector
|
||||
*/
|
||||
void SetF(std::vector<double> &f) const;
|
||||
|
||||
/**
|
||||
* Determines the indices of those vertices with Dirichlet boundary conditions
|
||||
* @return index vector.
|
||||
*/
|
||||
std::vector<int> Index_DirichletNodes() const override;
|
||||
std::vector<int> Index_BoundaryNodes() const override;
|
||||
|
||||
/**
|
||||
* Stores the values of vector @p u of (sub)domain into a file @p name for further processing in gnuplot.
|
||||
* The file stores rowise the x- and y- coordinates together with the value from @p u .
|
||||
* The domain [@p xl, @p xr] x [@p yb, @p yt] is discretized into @p nx x @p ny intervals.
|
||||
*
|
||||
* @param[in] name basename of file name (file name will be extended by the rank number)
|
||||
* @param[in] u local vector
|
||||
*
|
||||
* @warning Assumes tensor product grid in unit square; rowise numbered
|
||||
* (as generated in class constructor).
|
||||
* The output is provided for tensor product grid visualization
|
||||
* ( similar to Matlab-surf() ).
|
||||
*
|
||||
* @see Mesh_2d_3_square
|
||||
*/
|
||||
void SaveVectorP(std::string const &name, std::vector<double> const &u) const;
|
||||
|
||||
// here will still need to implement in the class
|
||||
// GetBound(), AddBound()
|
||||
// or better a generalized way with indices and their appropriate ranks for MPI communication
|
||||
|
||||
private:
|
||||
/**
|
||||
* Determines the coordinates of the discretization nodes of the domain [@p xl, @p xr] x [@p yb, @p yt]
|
||||
* which is discretized into @p nx x @p ny intervals.
|
||||
* @param[in] nx number of discretization intervals in x-direction
|
||||
* @param[in] ny number of discretization intervals in y-direction
|
||||
* @param[in] xl x-coordinate of left boundary
|
||||
* @param[in] xr x-coordinate of right boundary
|
||||
* @param[in] yb y-coordinate of lower boundary
|
||||
* @param[in] yt y-coordinate of upper boundary
|
||||
* @param[out] xc coordinate vector of length 2n with x(2*k,2*k+1) as coordinates of node k
|
||||
*/
|
||||
|
||||
void GetCoordsInRectangle(int nx, int ny, double xl, double xr, double yb, double yt,
|
||||
double xc[]);
|
||||
/**
|
||||
* Determines the element connectivity of linear triangular elements of a FEM discretization
|
||||
* of a rectangle using @p nx x @p ny equidistant intervals for discretization.
|
||||
* @param[in] nx number of discretization intervals in x-direction
|
||||
* @param[in] ny number of discretization intervals in y-direction
|
||||
* @param[out] ia element connectivity matrix with ia(3*s,3*s+1,3*s+2) as node numbers od element s
|
||||
*/
|
||||
void GetConnectivityInRectangle(int nx, int ny, int ia[]);
|
||||
|
||||
private:
|
||||
int _myid; //!< my MPI rank
|
||||
int _procx; //!< number of MPI ranks in x-direction
|
||||
int _procy; //!< number of MPI ranks in y-direction
|
||||
std::array<int, 4> _neigh; //!< MPI ranks of neighbors (negative: no neighbor but b.c.)
|
||||
int _color; //!< red/black coloring (checker board) of subdomains
|
||||
|
||||
double _xl; //!< x coordinate of lower left corner of square
|
||||
double _xr; //!< x coordinate of lower right corner of square
|
||||
double _yb; //!< y coordinate or lower left corner of square
|
||||
double _yt; //!< y coordinate of upper right corner of square
|
||||
int _nx; //!< number of intervals in x-direction
|
||||
int _ny; //!< number of intervals in y-direction
|
||||
};
|
||||
|
||||
// *********************************************************************
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
105
ex7/code/task4/accu.template/main.cpp
Normal file
105
ex7/code/task4/accu.template/main.cpp
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
// MPI code in C++.
|
||||
// See [Gropp/Lusk/Skjellum, "Using MPI", p.33/41 etc.]
|
||||
// and /opt/mpich/include/mpi2c++/comm.h for details
|
||||
|
||||
#include "geom.h"
|
||||
#include "par_geom.h"
|
||||
#include "vdop.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <mpi.h> // MPI
|
||||
#include <omp.h> // OpenMP
|
||||
using namespace std;
|
||||
|
||||
|
||||
int main(int argc, char **argv )
|
||||
{
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm const icomm(MPI_COMM_WORLD);
|
||||
omp_set_num_threads(1); // don't use OMP parallelization for a start
|
||||
//
|
||||
{
|
||||
int np;
|
||||
MPI_Comm_size(icomm, &np);
|
||||
|
||||
// assert(4 == np); // example is only provided for 4 MPI processes
|
||||
}
|
||||
// #####################################################################
|
||||
// ---- Read the f.e. mesh and the mapping of elements to MPI processes
|
||||
//Mesh const mesh_c("square_4.txt"); // Files square_4.txt and square_4_sd.txt are needed
|
||||
ParMesh const mesh("square",icomm);
|
||||
|
||||
int const numprocs = mesh.NumProcs();
|
||||
int const myrank = mesh.MyRank();
|
||||
if ( 0 == myrank ) {
|
||||
cout << "\n There are " << numprocs << " processes running.\n \n";
|
||||
}
|
||||
|
||||
int const check_rank=1; // choose the MPI process you would like to check the mesh
|
||||
//if ( check_rank == myrank ) mesh.Debug();
|
||||
//if ( check_rank == myrank ) mesh.DebugEdgeBased();
|
||||
|
||||
|
||||
// ##########################################################################
|
||||
// ---- allocate local vectors and check skalar product and vector accumulation
|
||||
|
||||
if (check_rank==myrank) {printf("\n\n-------------- Task 9 --------------\n\n");}
|
||||
if (check_rank==myrank) cout << "Mesh coordinates: " << mesh.GetCoords() << endl << endl;
|
||||
MPI_Barrier(icomm);
|
||||
vector<double> xl(mesh.Nnodes(), 1.0);
|
||||
|
||||
// for visualization I had to type in terminal:
|
||||
// export LIBGL_ALWAYS_SOFTWARE=1
|
||||
if (check_rank==myrank) mesh.Visualize(xl);
|
||||
|
||||
double ss = mesh.dscapr(xl,xl);
|
||||
cout << myrank << " : scalar : " << ss << endl;
|
||||
|
||||
mesh.VecAccu(xl);
|
||||
if (check_rank==myrank) mesh.Visualize(xl);
|
||||
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
if (check_rank==myrank) {printf("\n\n-------------- Task 10 --------------\n\n");}
|
||||
vector<int> y(mesh.Nnodes(), 1);
|
||||
mesh.VecAccuInt(y);
|
||||
if (check_rank==myrank) {
|
||||
printf("Accumulated integer vector y:\n");
|
||||
for (int i : y) {
|
||||
cout << i << " ";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
if (check_rank==myrank) {printf("\n\n-------------- Task 11 --------------\n\n");}
|
||||
int global_nodes = mesh.GlobalNodes();
|
||||
if (check_rank==myrank) cout << "Global nodes: " << global_nodes << endl;
|
||||
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
if (check_rank==myrank) {printf("\n\n-------------- Task 12 --------------\n\n");}
|
||||
|
||||
// Set xl to 1s vector again
|
||||
for (size_t k=0; k<xl.size(); ++k)
|
||||
{
|
||||
xl[k] = 1.0;
|
||||
}
|
||||
if (check_rank==myrank) mesh.Visualize(xl);
|
||||
mesh.Average(xl);
|
||||
if (check_rank==myrank) mesh.Visualize(xl);
|
||||
|
||||
|
||||
// -------------- Task 13 --------------
|
||||
// Should work with 2, 4 and 6 subdomains (change run target in GCC_default.mk)
|
||||
// Check subdomains with different values for check_rank (0-5)
|
||||
|
||||
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
626
ex7/code/task4/accu.template/par_geom.cpp
Normal file
626
ex7/code/task4/accu.template/par_geom.cpp
Normal file
|
|
@ -0,0 +1,626 @@
|
|||
// see: http://llvm.org/docs/CodingStandards.html#include-style
|
||||
#include "vdop.h"
|
||||
//#include "geom.h"
|
||||
#include "par_geom.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <ctime> // contains clock()
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <numeric> // accumulate()
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
ParMesh::ParMesh(int ndim, int nvert_e, int ndof_e, int nedge_e, MPI_Comm const &icomm)
|
||||
: Mesh(ndim, nvert_e, ndof_e, nedge_e),
|
||||
_icomm(icomm), _numprocs(-1), _myrank(-1),
|
||||
_v_l2g(0), _t_l2g(0), _v_g2l{{}}, _t_g2l{{}}, _valence(0),
|
||||
_sendbuf(0), _sendcounts(0), _sdispls(0),
|
||||
_loc_itf(0), _gloc_itf(0), _buf2loc(0)
|
||||
{
|
||||
MPI_Comm_size(icomm, &_numprocs);
|
||||
MPI_Comm_rank(icomm, &_myrank);
|
||||
}
|
||||
|
||||
ParMesh::~ParMesh()
|
||||
{}
|
||||
|
||||
|
||||
|
||||
ParMesh::ParMesh(std::string const &sname, MPI_Comm const &icomm)
|
||||
: ParMesh(2, 3, 3, 3, icomm) // two dimensions, 3 vertices, 3 dofs, 3 edges per element
|
||||
{
|
||||
//const int numprocs = _icomm.Get_size();
|
||||
const string NS = "_" + to_string(_numprocs);
|
||||
const string fname = sname + NS + ".txt";
|
||||
//cout << "############ " << fname << endl;
|
||||
ReadVertexBasedMesh(fname);
|
||||
cout << "\n End of sequential File read \n";
|
||||
// ------------------------------------------------------------------------------
|
||||
// Until this point a l l processes possess a l l mesh info in g l o b a l numbering
|
||||
//
|
||||
// Now, we have to select the data belonging to my_rank
|
||||
// and we have to create the mapping local to global (l2g) and vice versa (g2l)
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
// save the global node mesh (maybe we need it later)
|
||||
DeriveEdgeFromVertexBased(); // and even more
|
||||
Mesh global_mesh(*this); // requires a l o t of memory
|
||||
Del_EdgeConnectivity();
|
||||
|
||||
// read the subdomain info
|
||||
const string dname = sname + NS + "_sd" + ".txt";
|
||||
vector<int> t2d = ReadElementSubdomains(dname); // global mapping triangle to subdomain for all elements
|
||||
|
||||
//const int myrank = _icomm.Get_rank();
|
||||
Transform_Local2Global_Vertex(_myrank, t2d); // Vertex based mesh: now in l o c a l indexing
|
||||
|
||||
DeriveEdgeFromVertexBased(); // Generate also the l o c a l edge based information
|
||||
|
||||
Generate_VectorAdd();
|
||||
|
||||
|
||||
// Now we have to organize the MPI communication of vertices on the subdomain interfaces
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
vector<int> ParMesh::ReadElementSubdomains(string const &dname)
|
||||
{
|
||||
ifstream ifs(dname);
|
||||
if (!(ifs.is_open() && ifs.good())) {
|
||||
cerr << "ParMesh::ReadElementSubdomain: Error cannot open file " << dname << endl;
|
||||
assert(ifs.is_open());
|
||||
}
|
||||
|
||||
int const OFFSET{1}; // Matlab to C indexing
|
||||
cout << "ASCI file " << dname << " opened" << endl;
|
||||
|
||||
// Read some mesh constants
|
||||
int nelem;
|
||||
ifs >> nelem;
|
||||
cout << nelem << " " << Nelems() << endl;
|
||||
assert( Nelems() == nelem);
|
||||
|
||||
// Allocate memory
|
||||
vector<int> t2d(nelem, -1);
|
||||
// Read element mapping
|
||||
for (int k = 0; k < nelem; ++k) {
|
||||
int tmp;
|
||||
ifs >> tmp;
|
||||
//t2d[k] = tmp - OFFSET;
|
||||
// 2020-01-08
|
||||
t2d[k] = min(tmp, NumProcs()) - OFFSET;
|
||||
}
|
||||
|
||||
return t2d;
|
||||
}
|
||||
|
||||
void ParMesh::Transform_Local2Global_Vertex(int const myrank, vector<int> const &t2d)
|
||||
{
|
||||
// number of local elements
|
||||
const int l_ne = count(t2d.cbegin(), t2d.cend(), myrank);
|
||||
//cout << myrank << ":: " << lne << endl;
|
||||
vector<int> l_ia(l_ne * NverticesElements(), -1); // local elements still with global vertex numbers
|
||||
_t_l2g.resize(l_ne, -1);
|
||||
|
||||
int lk = 0;
|
||||
for (size_t k = 0; k < t2d.size(); ++k) {
|
||||
if (myrank == t2d[k]) {
|
||||
//if (0==myrank)
|
||||
//{
|
||||
//cout << lk << " k " << t2d[k] << endl;
|
||||
//}
|
||||
l_ia[3 * lk ] = _ia[3 * k ];
|
||||
l_ia[3 * lk + 1] = _ia[3 * k + 1];
|
||||
l_ia[3 * lk + 2] = _ia[3 * k + 2]; // local elements still with global vertex numbers
|
||||
_t_l2g[lk] = k; // elements: local to global mapping
|
||||
_t_g2l[k] = lk; // global to local
|
||||
++lk;
|
||||
}
|
||||
}
|
||||
// Checks:
|
||||
assert( count(l_ia.cbegin(), l_ia.cend(), -1) == 0 );
|
||||
assert( count(_t_l2g.cbegin(), _t_l2g.cend(), -1) == 0 );
|
||||
|
||||
// Vertices: local to global mapping
|
||||
auto tmp = l_ia;
|
||||
sort(tmp.begin(), tmp.end());
|
||||
auto ip = unique(tmp.begin(), tmp.end());
|
||||
tmp.erase(ip, tmp.end());
|
||||
_v_l2g = tmp; // Vertices: local to global mapping
|
||||
for (size_t lkv = 0; lkv < _v_l2g.size(); ++lkv) {
|
||||
_v_g2l[_v_l2g[lkv]] = lkv; // global to local
|
||||
}
|
||||
|
||||
// Boundary edges
|
||||
vector<int> l_bedges;
|
||||
vector<int> l_sdedges;
|
||||
for (size_t b = 0; b < _bedges.size(); b += 2) {
|
||||
int const v1 = _bedges[b ]; // global vertex numbers
|
||||
int const v2 = _bedges[b + 1];
|
||||
try {
|
||||
int const lv1 = _v_g2l.at(v1); // map[] would add that element
|
||||
int const lv2 = _v_g2l.at(v2); // but at() throws an exeption
|
||||
l_bedges.push_back(lv1);
|
||||
l_bedges.push_back(lv2); // Boundaries: already in local indexing
|
||||
// 2020-01-08
|
||||
l_sdedges.push_back(_sdedges[b ]);
|
||||
l_sdedges.push_back(_sdedges[b+1]);
|
||||
}
|
||||
catch (std::out_of_range & err) {
|
||||
//cerr << ".";
|
||||
}
|
||||
}
|
||||
|
||||
// number of local vertices
|
||||
const int l_nn = _v_l2g.size();
|
||||
vector<double> l_xc(Ndims()*l_nn);
|
||||
for (int lkk = 0; lkk < l_nn; ++lkk) {
|
||||
int k = _v_l2g.at(lkk);
|
||||
l_xc[2 * lkk ] = _xc[2 * k ];
|
||||
l_xc[2 * lkk + 1] = _xc[2 * k + 1];
|
||||
}
|
||||
|
||||
|
||||
// Now, we represent the vertex mesh in l o c a l numbering
|
||||
// elements
|
||||
|
||||
for (size_t i = 0; i < l_ia.size(); ++i) {
|
||||
l_ia[i] = _v_g2l.at(l_ia[i]); // element vertices: global to local
|
||||
}
|
||||
SetNelem(l_ne);
|
||||
_ia = l_ia;
|
||||
// boundary
|
||||
_bedges = l_bedges;
|
||||
_sdedges = l_sdedges;
|
||||
// coordinates
|
||||
SetNnode(l_nn);
|
||||
_xc = l_xc;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void ParMesh::Generate_VectorAdd()
|
||||
{
|
||||
// Some checks
|
||||
int lnn = Nnodes(); // local number of vertices
|
||||
assert(static_cast<int>(_v_l2g.size()) == lnn);
|
||||
int ierr{-12345};
|
||||
|
||||
// ---- Determine global largest vertex index
|
||||
int gidx_max{-1}; // global largest vertex index
|
||||
int lmax = *max_element(_v_l2g.cbegin(), _v_l2g.cend());
|
||||
MPI_Allreduce(&lmax, &gidx_max, 1, MPI_INT, MPI_MAX, _icomm);
|
||||
int gidx_min{-1}; // global smallest vertex index
|
||||
int lmin = *min_element(_v_l2g.cbegin(), _v_l2g.cend());
|
||||
MPI_Allreduce(&lmin, &gidx_min, 1, MPI_INT, MPI_MIN, _icomm);
|
||||
//cout << gidx_min << " " << gidx_max << endl;
|
||||
assert(0 == gidx_min); // global indices have to start with 0
|
||||
|
||||
|
||||
// ---- Determine for all global vertices the number of subdomains it belongs to
|
||||
vector<int> global(gidx_max+1, 0); // global scalar array for vertices
|
||||
for (auto const gidx : _v_l2g) global[gidx] = 1;
|
||||
// https://www.mpi-forum.org/docs/mpi-2.2/mpi22-report/node109.htm
|
||||
ierr = MPI_Allreduce(MPI_IN_PLACE, global.data(), global.size(), MPI_INT, MPI_SUM, _icomm);
|
||||
//if (0 == MyRank()) cout << global << endl;
|
||||
//MPI_Barrier(_icomm);
|
||||
//cout << _xc[2*_v_g2l.at(2)] << " , " << _xc[2*_v_g2l.at(2)+1] << endl;
|
||||
//MPI_Barrier(_icomm);
|
||||
|
||||
// now, global[] contains the number of subdomains a global vertex belongs to
|
||||
if ( count(global.cbegin(), global.cend(), 0) > 0 )
|
||||
cerr << "\n !!! Non-continuous global vertex indexing !!!\n";
|
||||
|
||||
// ---- Determine local interface vertices ( <==> global[] > 1 )
|
||||
// _loc_itf, neigh_itf
|
||||
//vector<int> loc_itf; // local indices of interface vertices on this MPI process
|
||||
for (size_t lk = 0; lk < _v_l2g.size(); ++lk) {
|
||||
int const gk = _v_l2g[lk]; // global index of local vertex lk
|
||||
if ( global[gk] > 1 ) {
|
||||
_loc_itf.push_back(lk); // local indices of interface vertices on this MPI process
|
||||
}
|
||||
}
|
||||
|
||||
//MPI_Barrier(_icomm);
|
||||
//if (0 == MyRank()) cout << "\n..._loc_itf...\n" << _loc_itf << "\n......\n";
|
||||
//MPI_Barrier(_icomm);
|
||||
// ---- global indices of local interface vertices
|
||||
//auto gloc_itf(_loc_itf);
|
||||
_gloc_itf=_loc_itf;
|
||||
for_each(_gloc_itf.begin(), _gloc_itf.end(), [this] (auto & v) -> void { v = _v_l2g[v];} );
|
||||
//MPI_Barrier(_icomm);
|
||||
//if (0 == MyRank()) cout << "\n..._gloc_itf...\n" << _gloc_itf << "\n......\n";
|
||||
//DebugVector(_gloc_itf,"_gloc_itf");
|
||||
|
||||
// ---- Determine the global length of interfaces
|
||||
vector<int> vnn(NumProcs(), -1); // number of interface vertices per MPI rank
|
||||
int l_itf(_loc_itf.size()); // # local interface vertices
|
||||
ierr = MPI_Allgather(&l_itf, 1, MPI_INT, vnn.data(), 1, MPI_INT, _icomm);
|
||||
assert(0 == ierr);
|
||||
//cout << vnn << endl;
|
||||
|
||||
// ---- Now we consider only the inferface vertices
|
||||
int snn = accumulate(vnn.cbegin(), vnn.cend(), 0); // required length of array for global interface indices
|
||||
//cout << snn << " " << gnn << endl;
|
||||
vector<int> dispnn(NumProcs(), 0) ; // displacement of interface vertices per MPI rank
|
||||
partial_sum(vnn.cbegin(), vnn.cend() - 1, dispnn.begin() + 1);
|
||||
//cout << dispnn << endl;
|
||||
|
||||
// ---- Get the global indices for all global interfaces
|
||||
vector<int> g_itf(snn, -1); // collects all global indices of the global interfaces
|
||||
// https://www.mpich.org/static//docs/v3.0.x/www3/MPI_Gatherv.html
|
||||
ierr = MPI_Gatherv( _gloc_itf.data(), _gloc_itf.size(), MPI_INT,
|
||||
g_itf.data(), vnn.data(), dispnn.data(), MPI_INT, 0, _icomm);
|
||||
assert(0 == ierr);
|
||||
// https://www.mpich.org/static/docs/v3.1/www3/MPI_Bcast.html
|
||||
ierr = MPI_Bcast(g_itf.data(), g_itf.size(), MPI_INT, 0, _icomm);
|
||||
assert(0 == ierr); // Now, each MPI rank has the all global indices of the global interfaces
|
||||
//MPI_Barrier(_icomm);
|
||||
//if (MyRank() == 0) cout << "\n...g_itf...\n" << g_itf << "\n......\n";
|
||||
//MPI_Barrier(_icomm);
|
||||
|
||||
// ----- Determine all MPI ranks a local interface vertex belongs to
|
||||
vector<vector<int>> neigh_itf(_loc_itf.size());// subdomains a local interface vertex belongs to
|
||||
for (size_t lk = 0; lk < _loc_itf.size(); ++lk) {
|
||||
const int gvert = _gloc_itf[lk]; // global index of local interface node lk
|
||||
for (int rank = 0; rank < NumProcs(); ++rank) {
|
||||
auto const startl = g_itf.cbegin() + dispnn[rank];
|
||||
auto const endl = startl + vnn[rank];
|
||||
if ( find( startl, endl, gvert) != endl) {
|
||||
neigh_itf[lk].push_back(rank);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---- check the available info in _loc_itf[lk], _gloc_itf[lk], neigh_itf[lk]
|
||||
//MPI_Barrier(_icomm);
|
||||
////if (MyRank()==0) cout << "\n...neigh_itf ...\n" << neigh_itf << endl;
|
||||
//if (MyRank() == 0) {
|
||||
//for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
|
||||
//cout << lk << " : local idx " << _loc_itf[lk] << " , global idx " << _gloc_itf[lk];
|
||||
//cout << " with MPI ranks " << neigh_itf[lk] << endl;
|
||||
//}
|
||||
//}
|
||||
//MPI_Barrier(_icomm);
|
||||
|
||||
// ---- store the valence (e.g., the number of subdomains it belongs to) of all local vertices
|
||||
_valence.resize(Nnodes(),1);
|
||||
for (size_t lk = 0; lk < _loc_itf.size(); ++lk)
|
||||
{
|
||||
_valence[_loc_itf[lk]] = neigh_itf[lk].size();
|
||||
}
|
||||
//DebugVector(_valence,"_valence",_icomm);
|
||||
|
||||
// ---- We ware going to use MPI_Alltoallv for data exchange on interfaces
|
||||
// https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node109.htm#Node109
|
||||
// https://www.open-mpi.org/doc/v4.0/man3/MPI_Alltoallv.3.php
|
||||
//int MPI_Alltoallv(const void* sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void* recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm)
|
||||
//
|
||||
// MPI_Alltoallv needs:
|
||||
// vector<double> sendbuf (MPI_IN_PLACE: used also as recvbuf)
|
||||
// vector<int> sendcounts (the same as for recv)
|
||||
// vector<int> sdispls (the same as for recv)
|
||||
//
|
||||
// We need to map the interface vertices onto the sendbuffer:
|
||||
// vector<int> loc_itf local index of interface vertex lk
|
||||
// vector<int> gloc_itf global index of interface vertex lk
|
||||
// vector<int> buf2loc local indices of sendbuffer positions (the same as for recv)
|
||||
|
||||
// ---- Determine sendcounts[] and sdipls[] from neigh_itf[]
|
||||
//vector<int> _sendcounts(NumProcs(), 0);
|
||||
_sendcounts.resize(NumProcs(), 0);
|
||||
for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
|
||||
auto const &kneigh = neigh_itf[lk];
|
||||
for (size_t ns = 0; ns < kneigh.size(); ++ns) {
|
||||
++_sendcounts[kneigh[ns]];
|
||||
}
|
||||
}
|
||||
//if (MyRank() == 0) cout << "\n..._sendcounts ...\n" << _sendcounts << endl;
|
||||
|
||||
//vector<int> _sdispls(NumProcs(), 0);
|
||||
_sdispls.resize(NumProcs(), 0);
|
||||
partial_sum(_sendcounts.cbegin(), _sendcounts.cend() - 1, _sdispls.begin() + 1);
|
||||
//vector<int> _sdispls(NumProcs()+1, 0);
|
||||
//partial_sum(_sendcounts.cbegin(), _sendcounts.cend(), _sdispls.begin() + 1);
|
||||
//if (MyRank() == 0) cout << "\n..._sdispls ...\n" << _sdispls << endl;
|
||||
|
||||
// ---- Determine size of buffer 'nbuffer' and mapping 'buf2loc'
|
||||
int const nbuffer = accumulate(_sendcounts.cbegin(), _sendcounts.cend(), 0);
|
||||
//vector<int> _buf2loc(nbuffer, -1);
|
||||
_buf2loc.resize(nbuffer, -1);
|
||||
int buf_idx = 0; // position in buffer
|
||||
for (int rank = 0; rank < NumProcs(); ++rank) {
|
||||
assert( buf_idx == _sdispls[rank]);
|
||||
for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
|
||||
auto const &kneigh = neigh_itf[lk];
|
||||
if (find(kneigh.cbegin(),kneigh.cend(),rank)!=kneigh.cend())
|
||||
{
|
||||
_buf2loc[buf_idx] = _loc_itf[lk];
|
||||
++buf_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
//if (MyRank() == 0) cout << "\n...buf2loc ...\n" << buf2loc << endl;
|
||||
//DebugVector(buf2loc,"buf2loc",_icomm);
|
||||
|
||||
// ---- Allocate send/recv buffer
|
||||
//vector<double> _sendbuf(nbuffer,-1.0);
|
||||
_sendbuf.resize(nbuffer,-1.0);
|
||||
|
||||
assert(CheckInterfaceExchange_InPlace());
|
||||
cout << " Check of data exchange (InPlace) successful!\n";
|
||||
assert(CheckInterfaceExchange());
|
||||
cout << " Check of data exchange successful!\n";
|
||||
assert(CheckInterfaceAdd_InPlace());
|
||||
cout << " Check of data add successful!\n";
|
||||
assert(CheckInterfaceAdd());
|
||||
cout << " Check of data add (InPlace) successful!\n";
|
||||
|
||||
vector<double> x(Nnodes(),-1.0);
|
||||
VecAccu(x);
|
||||
cout << " VecAccu (InPlace) successful!\n";
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
bool ParMesh::CheckInterfaceExchange_InPlace() const
|
||||
{
|
||||
vector<double> x(Nnodes(),-1.0);
|
||||
copy(_v_l2g.cbegin(),_v_l2g.cend(),x.begin()); // init x with global vertex indices
|
||||
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
_sendbuf[ls] = x[_buf2loc.at(ls)];
|
||||
}
|
||||
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
|
||||
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
|
||||
assert(ierr==0);
|
||||
//DebugVector(_sendbuf,"_sendbuf",_icomm);
|
||||
|
||||
vector<double> y(x);
|
||||
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = -1.0; // only for interface nodes
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
y[_buf2loc.at(ls)] = _sendbuf[ls];
|
||||
}
|
||||
|
||||
double const eps=1e-10;
|
||||
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
|
||||
[eps](double a, double b) -> bool
|
||||
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
|
||||
);
|
||||
return bv;
|
||||
}
|
||||
|
||||
bool ParMesh::CheckInterfaceExchange() const
|
||||
{
|
||||
vector<double> x(Nnodes(),-1.0);
|
||||
copy(_v_l2g.cbegin(),_v_l2g.cend(),x.begin()); // init x with global vertex indices
|
||||
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
_sendbuf[ls] = x[_buf2loc.at(ls)];
|
||||
}
|
||||
vector<double> recvbuf(_sendbuf.size());
|
||||
int ierr = MPI_Alltoallv(_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
|
||||
recvbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
|
||||
//DebugVector(_sendbuf,"_sendbuf",_icomm);
|
||||
//DebugVector(recvbuf,"recvbuf",_icomm);
|
||||
assert(ierr==0);
|
||||
|
||||
vector<double> y(x);
|
||||
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = -1.0; // only for interface nodes
|
||||
for(size_t ls = 0; ls<recvbuf.size(); ++ls)
|
||||
{
|
||||
y[_buf2loc.at(ls)] = recvbuf[ls];
|
||||
}
|
||||
//cout << "WRONG : " << count(y.cbegin(),y.cend(), -1.0) << endl;
|
||||
|
||||
double const eps=1e-10;
|
||||
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
|
||||
[eps](double a, double b) -> bool
|
||||
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
|
||||
);
|
||||
return bv;
|
||||
}
|
||||
|
||||
bool ParMesh::CheckInterfaceAdd_InPlace() const
|
||||
{
|
||||
vector<double> x(Nnodes(),-1.0);
|
||||
for (size_t i=0; i<x.size(); ++i)
|
||||
{
|
||||
x[i] = _xc[2*i]+_xc[2*i+1]; // init x with coordinate values
|
||||
}
|
||||
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
_sendbuf[ls] = x[_buf2loc.at(ls)];
|
||||
}
|
||||
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
|
||||
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
|
||||
assert(ierr==0);
|
||||
//DebugVector(_sendbuf,"_sendbuf",_icomm);
|
||||
|
||||
vector<double> y(x);
|
||||
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = 0.0; // only for interface nodes
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
y[_buf2loc.at(ls)] += _sendbuf[ls];
|
||||
}
|
||||
MPI_Barrier(_icomm);
|
||||
//DebugVector(x,"x",_icomm);
|
||||
//DebugVector(y,"y",_icomm);
|
||||
for (size_t i= 0; i<y.size(); ++i) y[i]/=_valence[i]; // divide by valence
|
||||
|
||||
double const eps=1e-10;
|
||||
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
|
||||
[eps](double a, double b) -> bool
|
||||
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
|
||||
);
|
||||
return bv;
|
||||
}
|
||||
|
||||
bool ParMesh::CheckInterfaceAdd() const
|
||||
{
|
||||
vector<double> x(Nnodes(),-1.0);
|
||||
for (size_t i=0; i<x.size(); ++i)
|
||||
{
|
||||
//x[i] = _xc[2*i]+_xc[2*i+1]; // init x with coordinate values
|
||||
x[i] = _v_l2g[i];
|
||||
}
|
||||
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
_sendbuf[ls] = x[_buf2loc.at(ls)];
|
||||
}
|
||||
vector<double> recvbuf(_sendbuf.size());
|
||||
int ierr = MPI_Alltoallv(_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
|
||||
recvbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
|
||||
//DebugVector(_sendbuf,"_sendbuf",_icomm);
|
||||
//DebugVector(recvbuf,"recvbuf",_icomm);
|
||||
assert(ierr==0);
|
||||
|
||||
vector<double> y(x);
|
||||
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = 0.0; // only for interface nodes
|
||||
for(size_t ls = 0; ls<recvbuf.size(); ++ls)
|
||||
{
|
||||
//if (0==MyRank()) cout << ls << ": " << _buf2loc.at(ls) << " " << y[_buf2loc.at(ls)] << "("<< x[_buf2loc.at(ls)] << ")" << " " << recvbuf[ls] << " (" << _sendbuf[ls] << ")" << endl;
|
||||
y[_buf2loc.at(ls)] += recvbuf[ls];
|
||||
}
|
||||
MPI_Barrier(_icomm);
|
||||
//DebugVector(x,"x",_icomm);
|
||||
//DebugVector(y,"y",_icomm);
|
||||
for (size_t i= 0; i<y.size(); ++i) y[i]/=_valence[i]; // divide by valence
|
||||
|
||||
double const eps=1e-10;
|
||||
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
|
||||
[eps](double a, double b) -> bool
|
||||
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
|
||||
);
|
||||
return bv;
|
||||
}
|
||||
|
||||
|
||||
// ----------
|
||||
|
||||
void ParMesh::VecAccu(std::vector<double> &w) const
|
||||
{
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
_sendbuf[ls] = w[_buf2loc.at(ls)];
|
||||
}
|
||||
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
|
||||
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
|
||||
assert(ierr==0);
|
||||
//DebugVector(_sendbuf,"_sendbuf",_icomm);
|
||||
|
||||
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
w[_buf2loc.at(ls)] += _sendbuf[ls];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// ##########################################################################
|
||||
// ##########################################################################
|
||||
|
||||
|
||||
// ---- EX10 ----
|
||||
void ParMesh::VecAccuInt(std::vector<int> &w) const
|
||||
{
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
_sendbuf[ls] = w[_buf2loc.at(ls)];
|
||||
}
|
||||
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
|
||||
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
|
||||
assert(ierr==0);
|
||||
//DebugVector(_sendbuf,"_sendbuf",_icomm);
|
||||
|
||||
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
w[_buf2loc.at(ls)] += _sendbuf[ls];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// ---- EX11 ----
|
||||
int ParMesh::GlobalNodes() const
|
||||
{
|
||||
int local_count = 0;
|
||||
for (int i=0; i<Nnodes(); ++i) {
|
||||
local_count += 1.0 / _valence[i];
|
||||
}
|
||||
|
||||
int global_nodes = 0;
|
||||
MPI_Allreduce(&local_count, &global_nodes, 1, MPI_INT, MPI_SUM, _icomm);
|
||||
|
||||
return global_nodes;
|
||||
}
|
||||
|
||||
|
||||
// ---- EX12 ----
|
||||
void ParMesh::Average(std::vector<double> &w) const
|
||||
{
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
_sendbuf[ls] = w[_buf2loc.at(ls)];
|
||||
}
|
||||
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
|
||||
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
|
||||
assert(ierr==0);
|
||||
//DebugVector(_sendbuf,"_sendbuf",_icomm);
|
||||
|
||||
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
|
||||
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
|
||||
{
|
||||
w[_buf2loc.at(ls)] += _sendbuf[ls];
|
||||
}
|
||||
|
||||
// Divide interface nodes value by its valence
|
||||
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] /= _valence[_loc_itf.at(lk)];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
150
ex7/code/task4/accu.template/par_geom.h
Normal file
150
ex7/code/task4/accu.template/par_geom.h
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
#ifndef PAR_GEOM_FILE
|
||||
#define PAR_GEOM_FILE
|
||||
#include "geom.h"
|
||||
#include "vdop.h"
|
||||
#include <array>
|
||||
#include <functional> // function; C++11
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <memory> // shared_ptr
|
||||
#include <mpi.h> // MPI
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class ParMesh: public Mesh
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor initializing the members with default values.
|
||||
*
|
||||
* @param[in] ndim space dimensions (dimension for coordinates)
|
||||
* @param[in] nvert_e number of vertices per element (dimension for connectivity)
|
||||
* @param[in] ndof_e degrees of freedom per element (= @p nvert_e for linear elements)
|
||||
* @param[in] nedge_e number of edges per element (= @p nvert_e for linear elements in 2D)
|
||||
* @param[in] icomm MPI communicator
|
||||
*/
|
||||
explicit ParMesh(int ndim, int nvert_e = 0, int ndof_e = 0, int nedge_e = 0, MPI_Comm const &icomm = MPI_COMM_WORLD);
|
||||
|
||||
ParMesh(ParMesh const &) = default;
|
||||
|
||||
ParMesh &operator=(ParMesh const &) = delete;
|
||||
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* See clang warning on
|
||||
* <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
|
||||
*/
|
||||
virtual ~ParMesh();
|
||||
|
||||
/**
|
||||
* Reads mesh data from a binary file.
|
||||
*
|
||||
* @param[in] sname suffix of file name
|
||||
* @param[in] icomm MPI communicator
|
||||
* @see ascii_write_mesh.m for the file format.
|
||||
*/
|
||||
explicit ParMesh(std::string const &sname, MPI_Comm const &icomm = MPI_COMM_WORLD);
|
||||
|
||||
void VecAccu(std::vector<double> &w) const;
|
||||
void VecAccuInt(std::vector<int> &w) const;
|
||||
int GlobalNodes() const;
|
||||
void Average(std::vector<double> &w) const;
|
||||
|
||||
/** Inner product
|
||||
* @param[in] x vector
|
||||
* @param[in] y vector
|
||||
* @return resulting Euclidian inner product <x,y>
|
||||
*/
|
||||
double dscapr(std::vector<double> const &x, std::vector<double> const &y) const
|
||||
{
|
||||
return par_scalar(x, y, _icomm);
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* Reads the global triangle to subdomain mapping.
|
||||
*
|
||||
* @param[in] dname file name
|
||||
*
|
||||
* @see ascii_write_subdomains.m for the file format
|
||||
*/
|
||||
std::vector<int> ReadElementSubdomains(std::string const &dname);
|
||||
|
||||
|
||||
/**
|
||||
* Transform
|
||||
*
|
||||
* @param[in] myrank MPI rank of this process
|
||||
* @param[in] t2d global mapping triangle to subdomain for all elements (vertex based)
|
||||
*/
|
||||
void Transform_Local2Global_Vertex(int myrank, std::vector<int> const &t2d);
|
||||
|
||||
|
||||
/**
|
||||
* Transform
|
||||
*/
|
||||
void Generate_VectorAdd();
|
||||
|
||||
bool CheckInterfaceExchange_InPlace() const;
|
||||
bool CheckInterfaceExchange() const;
|
||||
bool CheckInterfaceAdd_InPlace() const;
|
||||
bool CheckInterfaceAdd() const;
|
||||
|
||||
|
||||
public:
|
||||
/** MPI rank of the calling process in communication group.
|
||||
*
|
||||
* @return MPI rank of the calling process
|
||||
*/
|
||||
int MyRank() const
|
||||
{
|
||||
return _myrank;
|
||||
}
|
||||
|
||||
/** Number of MPI processes in communication group.
|
||||
*
|
||||
* @return Number of MPI processes
|
||||
*/
|
||||
int NumProcs() const
|
||||
{
|
||||
return _numprocs;
|
||||
}
|
||||
|
||||
/** Returns recent
|
||||
* @return MPI communicator
|
||||
*/
|
||||
MPI_Comm GetCommunicator() const
|
||||
{
|
||||
return _icomm;
|
||||
}
|
||||
|
||||
private:
|
||||
// Don't use &_icomm ==> Error
|
||||
MPI_Comm const _icomm; //!< MPI communicator for the group of processes
|
||||
int _numprocs; //!< number of MPI processes
|
||||
int _myrank; //!< my MPI rank
|
||||
std::vector<int> _v_l2g; //!< vertices: local to global mapping
|
||||
std::vector<int> _t_l2g; //!< triangles: local to global mapping
|
||||
std::map<int, int> _v_g2l; //!< vertices: global to local mapping
|
||||
std::map<int, int> _t_g2l; //!< triangles: global to local mapping
|
||||
|
||||
//std::vector<int> e_l2g; //!< edges: local to global mapping
|
||||
|
||||
std::vector<int> _valence; //!< valence of local vertices, i.e. number of subdomains they belong to
|
||||
// MPI_Alltoallv needs:
|
||||
mutable std::vector<double> _sendbuf; //!< send buffer a n d receiving buffer (MPI_IN_PLACE)
|
||||
std::vector<int> _sendcounts; //!< number of data to send to each MPI rank (the same as for recv)
|
||||
std::vector<int> _sdispls; //!< offset of data to send to each MPI rank wrt. _senbuffer (the same as for recv)
|
||||
//
|
||||
// We need to map the interface vertices onto the sendbuffer:
|
||||
std::vector<int> _loc_itf; //!< local index of interface vertex lk
|
||||
std::vector<int> _gloc_itf; //!< global index of interface vertex lk
|
||||
std::vector<int> _buf2loc; //!< local indices of sendbuffer positions (the same as for recv)
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
71
ex7/code/task4/accu.template/square_2.m
Normal file
71
ex7/code/task4/accu.template/square_2.m
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
% Square:
|
||||
% flatpak run org.octave.Octave <filename>
|
||||
% or
|
||||
% octave --no-window-system --no-gui -qf <filename>
|
||||
|
||||
clear all
|
||||
clc
|
||||
% %% L-shape
|
||||
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 0 1 1 0;
|
||||
% 2 2 1 1 0.5 1 0;
|
||||
% 2 1 1 0.5 2 1 0;
|
||||
% 2 1 0 2 2 1 0;
|
||||
% 2 0 0 2 0 1 0]';
|
||||
|
||||
%% square
|
||||
% g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 0 1 1 0;
|
||||
% 2 1 0 1 1 1 0;
|
||||
% 2 0 0 1 0 1 0]';
|
||||
|
||||
%% 2 squares
|
||||
g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 1 1 0 1 1 2;
|
||||
2 1 0 1 1 1 0;
|
||||
2 0 0 1 0 1 0;
|
||||
2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 2 2 0 1 2 0;
|
||||
2 2 1 1 1 2 0
|
||||
]';
|
||||
|
||||
% %% 4 squares
|
||||
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 0 1 1 2;
|
||||
% 2 1 0 1 1 1 3;
|
||||
% 2 0 0 1 0 1 0;
|
||||
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 0 1 2 0;
|
||||
% 2 2 1 1 1 2 4;
|
||||
% % 2 1 1 1 0 2 1;
|
||||
% % 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 1 2 3 4;
|
||||
% 2 1 0 2 2 3 0;
|
||||
% 2 0 0 2 1 3 0;
|
||||
% % 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 1 2 4 0;
|
||||
% 2 2 1 2 2 4 0
|
||||
% % 2 1 1 2 1 4 3
|
||||
% ]';
|
||||
|
||||
[p,e,t] = initmesh(g,'hmax',0.1);
|
||||
pdemesh(p,e,t)
|
||||
|
||||
%% GH
|
||||
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
|
||||
%
|
||||
% coordinates p: [2][nnode]
|
||||
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
|
||||
% edges e: [7][nedges] boundary edges
|
||||
% e([1,2],:) - start/end vertex of edge
|
||||
% e([3,4],:) - start/end values
|
||||
% e(5,:) - segment number
|
||||
% e([6,7],:) - left/right subdomain
|
||||
|
||||
ascii_write_mesh( p, t, e, mfilename);
|
||||
|
||||
ascii_write_subdomains( p, t, e, mfilename);
|
||||
|
||||
|
||||
% tmp=t(1:3,:)
|
||||
|
||||
1086
ex7/code/task4/accu.template/square_2.txt
Normal file
1086
ex7/code/task4/accu.template/square_2.txt
Normal file
File diff suppressed because it is too large
Load diff
653
ex7/code/task4/accu.template/square_2_sd.txt
Normal file
653
ex7/code/task4/accu.template/square_2_sd.txt
Normal file
|
|
@ -0,0 +1,653 @@
|
|||
652
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
71
ex7/code/task4/accu.template/square_4.m
Normal file
71
ex7/code/task4/accu.template/square_4.m
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
% Square:
|
||||
% flatpak run org.octave.Octave <filename>
|
||||
% or
|
||||
% octave --no-window-system --no-gui -qf <filename>
|
||||
|
||||
clear all
|
||||
clc
|
||||
% %% L-shape
|
||||
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 0 1 1 0;
|
||||
% 2 2 1 1 0.5 1 0;
|
||||
% 2 1 1 0.5 2 1 0;
|
||||
% 2 1 0 2 2 1 0;
|
||||
% 2 0 0 2 0 1 0]';
|
||||
|
||||
%% square
|
||||
% g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 0 1 1 0;
|
||||
% 2 1 0 1 1 1 0;
|
||||
% 2 0 0 1 0 1 0]';
|
||||
|
||||
% %% 2 squares
|
||||
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 0 1 1 2;
|
||||
% 2 1 0 1 1 1 0;
|
||||
% 2 0 0 1 0 1 0;
|
||||
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 0 1 2 0;
|
||||
% 2 2 1 1 1 2 0
|
||||
% ]';
|
||||
|
||||
%% 4 squares
|
||||
g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 1 1 0 1 1 2;
|
||||
2 1 0 1 1 1 3;
|
||||
2 0 0 1 0 1 0;
|
||||
2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 2 2 0 1 2 0;
|
||||
2 2 1 1 1 2 4;
|
||||
% 2 1 1 1 0 2 1;
|
||||
% 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 1 1 1 2 3 4;
|
||||
2 1 0 2 2 3 0;
|
||||
2 0 0 2 1 3 0;
|
||||
% 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 2 2 1 2 4 0;
|
||||
2 2 1 2 2 4 0
|
||||
% 2 1 1 2 1 4 3
|
||||
]';
|
||||
|
||||
[p,e,t] = initmesh(g,'hmax',0.1);
|
||||
pdemesh(p,e,t)
|
||||
|
||||
%% GH
|
||||
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
|
||||
%
|
||||
% coordinates p: [2][nnode]
|
||||
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
|
||||
% edges e: [7][nedges] boundary edges
|
||||
% e([1,2],:) - start/end vertex of edge
|
||||
% e([3,4],:) - start/end values
|
||||
% e(5,:) - segment number
|
||||
% e([6,7],:) - left/right subdomain
|
||||
|
||||
ascii_write_mesh( p, t, e, mfilename);
|
||||
|
||||
ascii_write_subdomains( p, t, e, mfilename);
|
||||
|
||||
|
||||
% tmp=t(1:3,:)
|
||||
|
||||
BIN
ex7/code/task4/accu.template/square_4.pdf
Normal file
BIN
ex7/code/task4/accu.template/square_4.pdf
Normal file
Binary file not shown.
2176
ex7/code/task4/accu.template/square_4.txt
Normal file
2176
ex7/code/task4/accu.template/square_4.txt
Normal file
File diff suppressed because it is too large
Load diff
1335
ex7/code/task4/accu.template/square_4_sd.txt
Normal file
1335
ex7/code/task4/accu.template/square_4_sd.txt
Normal file
File diff suppressed because it is too large
Load diff
98
ex7/code/task4/accu.template/square_6.m
Normal file
98
ex7/code/task4/accu.template/square_6.m
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
% Square:
|
||||
% flatpak run org.octave.Octave <filename>
|
||||
% or
|
||||
% octave --no-window-system --no-gui -qf <filename>
|
||||
|
||||
clear all
|
||||
clc
|
||||
% %% L-shape
|
||||
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 0 1 1 0;
|
||||
% 2 2 1 1 0.5 1 0;
|
||||
% 2 1 1 0.5 2 1 0;
|
||||
% 2 1 0 2 2 1 0;
|
||||
% 2 0 0 2 0 1 0]';
|
||||
|
||||
%% square
|
||||
% g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 0 1 1 0;
|
||||
% 2 1 0 1 1 1 0;
|
||||
% 2 0 0 1 0 1 0]';
|
||||
|
||||
% %% 2 squares
|
||||
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 0 1 1 2;
|
||||
% 2 1 0 1 1 1 0;
|
||||
% 2 0 0 1 0 1 0;
|
||||
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 0 1 2 0;
|
||||
% 2 2 1 1 1 2 0
|
||||
% ]';
|
||||
|
||||
% %% 4 squares
|
||||
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 0 1 1 2;
|
||||
% 2 1 0 1 1 1 3;
|
||||
% 2 0 0 1 0 1 0;
|
||||
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 0 1 2 0;
|
||||
% 2 2 1 1 1 2 4;
|
||||
% % 2 1 1 1 0 2 1;
|
||||
% % 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 1 1 1 2 3 4;
|
||||
% 2 1 0 2 2 3 0;
|
||||
% 2 0 0 2 1 3 0;
|
||||
% % 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
% 2 2 2 1 2 4 0;
|
||||
% 2 2 1 2 2 4 0
|
||||
% % 2 1 1 2 1 4 3
|
||||
% ]';
|
||||
|
||||
%% 6 squares
|
||||
g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 1 1 0 1 1 2;
|
||||
2 1 0 1 1 1 3;
|
||||
2 0 0 1 0 1 0;
|
||||
2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 2 2 0 1 2 5;
|
||||
2 2 1 1 1 2 4;
|
||||
% 2 1 1 1 0 2 1;
|
||||
% 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 1 1 1 2 3 4;
|
||||
2 1 0 2 2 3 0;
|
||||
2 0 0 2 1 3 0;
|
||||
% 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 2 2 1 2 4 6;
|
||||
2 2 1 2 2 4 0;
|
||||
% 2 1 1 2 1 4 3;
|
||||
2 2 3 0 0 5 0; % 5 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 3 3 0 1 5 0;
|
||||
2 3 2 1 1 5 6;
|
||||
% 2 2 2 1 0 5 2;
|
||||
% 2 2 3 1 1 6 5; % 6 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
|
||||
2 3 3 1 2 6 0;
|
||||
2 3 2 2 2 6 0
|
||||
% 2 2 2 2 1 6 4
|
||||
]';
|
||||
|
||||
[p,e,t] = initmesh(g,'hmax',0.1);
|
||||
pdemesh(p,e,t)
|
||||
|
||||
%% GH
|
||||
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
|
||||
%
|
||||
% coordinates p: [2][nnode]
|
||||
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
|
||||
% edges e: [7][nedges] boundary edges
|
||||
% e([1,2],:) - start/end vertex of edge
|
||||
% e([3,4],:) - start/end values
|
||||
% e(5,:) - segment number
|
||||
% e([6,7],:) - left/right subdomain
|
||||
|
||||
ascii_write_mesh( p, t, e, mfilename);
|
||||
|
||||
ascii_write_subdomains( p, t, e, mfilename);
|
||||
|
||||
|
||||
% tmp=t(1:3,:)
|
||||
|
||||
3302
ex7/code/task4/accu.template/square_6.txt
Normal file
3302
ex7/code/task4/accu.template/square_6.txt
Normal file
File diff suppressed because it is too large
Load diff
2039
ex7/code/task4/accu.template/square_6_sd.txt
Normal file
2039
ex7/code/task4/accu.template/square_6_sd.txt
Normal file
File diff suppressed because it is too large
Load diff
704
ex7/code/task4/accu.template/uv.txt
Normal file
704
ex7/code/task4/accu.template/uv.txt
Normal file
|
|
@ -0,0 +1,704 @@
|
|||
188 2 327 3
|
||||
1 0
|
||||
1 1
|
||||
2 0
|
||||
2 1
|
||||
1 0.1
|
||||
1 0.2
|
||||
1 0.3
|
||||
1 0.4
|
||||
1 0.5
|
||||
1 0.6
|
||||
1 0.7
|
||||
1 0.8
|
||||
1 0.9
|
||||
1.1 0
|
||||
1.2 0
|
||||
1.3 0
|
||||
1.4 0
|
||||
1.5 0
|
||||
1.6 0
|
||||
1.7 0
|
||||
1.8 0
|
||||
1.9 0
|
||||
2 0.1
|
||||
2 0.2
|
||||
2 0.3
|
||||
2 0.4
|
||||
2 0.5
|
||||
2 0.6
|
||||
2 0.7
|
||||
2 0.8
|
||||
2 0.9
|
||||
1.9 1
|
||||
1.8 1
|
||||
1.7 1
|
||||
1.6 1
|
||||
1.5 1
|
||||
1.4 1
|
||||
1.3 1
|
||||
1.2 1
|
||||
1.1 1
|
||||
1.49824 0.488715
|
||||
1.05193 0.0620153
|
||||
1.96625 0.0483577
|
||||
1.04793 0.955298
|
||||
1.95161 0.955198
|
||||
1.73603 0.345854
|
||||
1.28857 0.282229
|
||||
1.36738 0.727982
|
||||
1.64625 0.722546
|
||||
1.91195 0.84478
|
||||
1.08323 0.845761
|
||||
1.85001 0.0860639
|
||||
1.52152 0.243093
|
||||
1.22787 0.537745
|
||||
1.67462 0.537899
|
||||
1.81754 0.691116
|
||||
1.16397 0.184115
|
||||
1.49817 0.807652
|
||||
1.74693 0.844955
|
||||
1.84393 0.263468
|
||||
1.24154 0.844531
|
||||
1.08066 0.749219
|
||||
1.76054 0.083432
|
||||
1.50008 0.655952
|
||||
1.84529 0.488219
|
||||
1.17462 0.387723
|
||||
1.35549 0.162537
|
||||
1.44621 0.349021
|
||||
1.60832 0.424124
|
||||
1.6628 0.210825
|
||||
1.133 0.617724
|
||||
1.58119 0.13085
|
||||
1.61703 0.869587
|
||||
1.38903 0.856845
|
||||
1.73 0.670898
|
||||
1.91197 0.644767
|
||||
1.16231 0.798105
|
||||
1.14015 0.0810055
|
||||
1.07152 0.14153
|
||||
1.82658 0.899363
|
||||
1.90413 0.172802
|
||||
1.16585 0.900048
|
||||
1.91126 0.746232
|
||||
1.37648 0.580317
|
||||
1.32975 0.43121
|
||||
1.23911 0.676272
|
||||
1.09469 0.495657
|
||||
1.38535 0.261717
|
||||
1.57154 0.56268
|
||||
1.82457 0.587265
|
||||
1.6065 0.316664
|
||||
1.82091 0.349209
|
||||
1.13118 0.291188
|
||||
1.25373 0.0995169
|
||||
1.4471 0.089654
|
||||
1.64761 0.140153
|
||||
1.80397 0.168364
|
||||
1.82574 0.797144
|
||||
1.69201 0.442852
|
||||
1.52834 0.406743
|
||||
1.464 0.903223
|
||||
1.41795 0.780259
|
||||
1.54963 0.730766
|
||||
1.92222 0.448553
|
||||
1.56289 0.928378
|
||||
1.31213 0.892653
|
||||
1.07541 0.655465
|
||||
1.73457 0.92605
|
||||
1.67051 0.812302
|
||||
1.93201 0.256721
|
||||
1.32454 0.794509
|
||||
1.64979 0.933767
|
||||
1.08946 0.92638
|
||||
1.93126 0.0917278
|
||||
1.90811 0.925959
|
||||
1.54511 0.0706685
|
||||
1.40439 0.48925
|
||||
1.91756 0.544695
|
||||
1.26613 0.200964
|
||||
1.73971 0.756545
|
||||
1.74585 0.247972
|
||||
1.21468 0.316416
|
||||
1.91124 0.347099
|
||||
1.28928 0.613103
|
||||
1.07981 0.375561
|
||||
1.53607 0.314626
|
||||
1.42573 0.699103
|
||||
1.58547 0.659664
|
||||
1.16172 0.695142
|
||||
1.76076 0.51266
|
||||
1.24656 0.452978
|
||||
1.46284 0.568585
|
||||
1.46297 0.192615
|
||||
1.56643 0.474578
|
||||
1.05798 0.441083
|
||||
1.35115 0.0700935
|
||||
1.60594 0.183437
|
||||
1.35214 0.350198
|
||||
1.07574 0.57016
|
||||
1.7861 0.425088
|
||||
1.71861 0.156597
|
||||
1.24404 0.927617
|
||||
1.58815 0.801244
|
||||
1.2391 0.752451
|
||||
1.1502 0.552118
|
||||
1.64984 0.364251
|
||||
1.07328 0.22312
|
||||
1.47839 0.735193
|
||||
1.37306 0.930749
|
||||
1.68388 0.877557
|
||||
1.54616 0.862383
|
||||
1.77797 0.635848
|
||||
1.62727 0.0716422
|
||||
1.44232 0.837419
|
||||
1.69071 0.0900998
|
||||
1.85764 0.411619
|
||||
1.85866 0.956351
|
||||
1.96709 0.142936
|
||||
1.13894 0.956686
|
||||
1.31226 0.5173
|
||||
1.47028 0.271985
|
||||
1.26747 0.369611
|
||||
1.65816 0.624881
|
||||
1.36475 0.659896
|
||||
1.20042 0.615405
|
||||
1.32361 0.226305
|
||||
1.45486 0.428791
|
||||
1.59228 0.249245
|
||||
1.62324 0.488742
|
||||
1.7872 0.3017
|
||||
1.66598 0.289144
|
||||
1.7382 0.594826
|
||||
1.21269 0.254385
|
||||
1.16824 0.476568
|
||||
1.58631 0.365552
|
||||
1.11512 0.435004
|
||||
1.54649 0.18839
|
||||
1.30407 0.704311
|
||||
1.39725 0.409587
|
||||
1.51629 0.133942
|
||||
1.42593 0.633055
|
||||
1 0.95
|
||||
1.05 1
|
||||
2 0.95
|
||||
1 0.05
|
||||
2 0.05
|
||||
2 0.15
|
||||
1.95 1
|
||||
22 3 43
|
||||
81 24 110
|
||||
6 5 79
|
||||
43 3 186
|
||||
7 6 147
|
||||
8 7 125
|
||||
9 8 135
|
||||
10 9 139
|
||||
107 10 139
|
||||
11 10 107
|
||||
12 11 62
|
||||
114 23 158
|
||||
94 16 136
|
||||
95 18 116
|
||||
87 9 135
|
||||
42 14 78
|
||||
13 12 51
|
||||
16 17 136
|
||||
17 18 95
|
||||
18 19 116
|
||||
20 21 63
|
||||
21 22 52
|
||||
52 22 114
|
||||
43 23 114
|
||||
110 25 123
|
||||
184 4 188
|
||||
26 27 104
|
||||
104 27 118
|
||||
24 25 110
|
||||
121 46 171
|
||||
30 31 50
|
||||
50 31 115
|
||||
45 32 115
|
||||
80 33 108
|
||||
115 32 157
|
||||
44 40 183
|
||||
33 34 108
|
||||
44 13 113
|
||||
108 34 112
|
||||
36 37 101
|
||||
101 37 149
|
||||
1 14 42
|
||||
14 15 78
|
||||
158 23 187
|
||||
37 38 149
|
||||
34 35 112
|
||||
42 5 185
|
||||
28 29 76
|
||||
106 38 142
|
||||
78 15 94
|
||||
117 41 132
|
||||
51 12 62
|
||||
92 46 170
|
||||
100 41 167
|
||||
164 48 178
|
||||
31 45 115
|
||||
35 36 105
|
||||
82 39 159
|
||||
62 11 107
|
||||
22 43 114
|
||||
25 26 123
|
||||
45 31 184
|
||||
97 60 121
|
||||
119 47 173
|
||||
88 47 166
|
||||
102 48 127
|
||||
89 41 134
|
||||
76 29 83
|
||||
29 30 83
|
||||
109 49 120
|
||||
5 42 79
|
||||
122 47 162
|
||||
127 48 164
|
||||
75 49 163
|
||||
103 49 143
|
||||
103 64 128
|
||||
163 55 172
|
||||
21 52 63
|
||||
39 40 159
|
||||
106 61 111
|
||||
131 54 174
|
||||
131 66 162
|
||||
63 52 97
|
||||
81 60 97
|
||||
99 55 169
|
||||
101 58 151
|
||||
30 50 83
|
||||
99 46 140
|
||||
124 54 160
|
||||
93 7 147
|
||||
126 53 168
|
||||
93 57 173
|
||||
102 74 111
|
||||
168 53 177
|
||||
104 65 156
|
||||
133 53 161
|
||||
116 19 153
|
||||
138 68 179
|
||||
40 44 113
|
||||
77 62 129
|
||||
88 67 133
|
||||
153 20 155
|
||||
102 58 154
|
||||
75 56 120
|
||||
89 64 132
|
||||
38 39 142
|
||||
109 59 150
|
||||
76 56 90
|
||||
83 50 98
|
||||
90 56 152
|
||||
51 62 77
|
||||
107 71 129
|
||||
15 16 94
|
||||
94 67 119
|
||||
78 57 79
|
||||
42 78 79
|
||||
98 59 120
|
||||
112 73 150
|
||||
20 63 155
|
||||
92 60 123
|
||||
77 61 82
|
||||
51 77 82
|
||||
80 59 98
|
||||
56 76 83
|
||||
132 64 181
|
||||
117 84 160
|
||||
88 68 138
|
||||
41 89 132
|
||||
111 61 144
|
||||
145 54 165
|
||||
93 66 125
|
||||
129 71 165
|
||||
119 67 166
|
||||
100 68 126
|
||||
100 69 134
|
||||
89 55 163
|
||||
56 75 152
|
||||
27 28 118
|
||||
99 69 146
|
||||
96 70 137
|
||||
168 70 171
|
||||
90 65 118
|
||||
125 66 176
|
||||
79 57 147
|
||||
95 67 136
|
||||
57 78 94
|
||||
19 20 153
|
||||
116 72 180
|
||||
121 70 141
|
||||
133 95 180
|
||||
146 91 171
|
||||
52 81 97
|
||||
50 80 98
|
||||
56 83 98
|
||||
134 69 169
|
||||
46 92 140
|
||||
146 69 175
|
||||
126 68 161
|
||||
143 73 151
|
||||
61 77 144
|
||||
124 84 164
|
||||
101 74 154
|
||||
152 75 172
|
||||
103 58 148
|
||||
28 76 118
|
||||
140 92 156
|
||||
105 101 151
|
||||
36 101 105
|
||||
82 61 142
|
||||
106 74 149
|
||||
129 86 144
|
||||
139 87 145
|
||||
105 73 112
|
||||
59 80 108
|
||||
49 75 120
|
||||
109 73 143
|
||||
60 81 110
|
||||
123 104 156
|
||||
48 102 111
|
||||
74 106 111
|
||||
35 105 112
|
||||
59 108 150
|
||||
13 51 113
|
||||
51 82 113
|
||||
81 52 114
|
||||
80 50 115
|
||||
32 33 157
|
||||
95 116 180
|
||||
96 72 153
|
||||
131 85 160
|
||||
117 85 179
|
||||
76 90 118
|
||||
65 104 118
|
||||
138 85 162
|
||||
57 94 119
|
||||
56 98 120
|
||||
59 109 120
|
||||
121 60 170
|
||||
70 96 141
|
||||
66 93 122
|
||||
122 93 173
|
||||
26 104 123
|
||||
60 110 123
|
||||
85 117 160
|
||||
144 86 178
|
||||
174 87 176
|
||||
7 93 125
|
||||
137 70 168
|
||||
126 91 175
|
||||
127 64 148
|
||||
58 102 148
|
||||
64 89 128
|
||||
49 103 128
|
||||
124 86 165
|
||||
62 107 129
|
||||
65 90 130
|
||||
55 99 130
|
||||
145 87 174
|
||||
66 122 162
|
||||
84 117 132
|
||||
164 84 181
|
||||
68 88 161
|
||||
67 95 133
|
||||
55 89 169
|
||||
41 100 134
|
||||
135 125 176
|
||||
8 125 135
|
||||
67 94 136
|
||||
17 95 136
|
||||
70 121 171
|
||||
72 96 137
|
||||
167 117 179
|
||||
47 88 138
|
||||
9 87 139
|
||||
71 107 139
|
||||
130 99 140
|
||||
65 130 140
|
||||
63 97 141
|
||||
97 121 141
|
||||
39 82 142
|
||||
61 106 142
|
||||
58 103 143
|
||||
49 109 143
|
||||
48 111 178
|
||||
77 129 144
|
||||
54 124 165
|
||||
71 139 145
|
||||
69 100 175
|
||||
46 99 146
|
||||
6 79 147
|
||||
57 93 147
|
||||
64 103 148
|
||||
102 127 148
|
||||
74 101 149
|
||||
38 106 149
|
||||
73 109 150
|
||||
108 112 150
|
||||
73 105 151
|
||||
58 143 151
|
||||
130 90 172
|
||||
55 130 172
|
||||
141 96 155
|
||||
72 116 153
|
||||
58 101 154
|
||||
74 102 154
|
||||
63 141 155
|
||||
96 153 155
|
||||
92 123 156
|
||||
65 140 156
|
||||
33 80 157
|
||||
80 115 157
|
||||
24 81 158
|
||||
81 114 158
|
||||
113 82 159
|
||||
40 113 159
|
||||
84 124 160
|
||||
54 131 160
|
||||
53 126 161
|
||||
88 133 161
|
||||
85 131 162
|
||||
47 138 162
|
||||
128 89 163
|
||||
49 128 163
|
||||
86 124 178
|
||||
64 127 181
|
||||
86 129 165
|
||||
71 145 165
|
||||
67 88 166
|
||||
47 119 166
|
||||
68 100 167
|
||||
41 117 167
|
||||
91 126 168
|
||||
53 133 177
|
||||
69 99 169
|
||||
89 134 169
|
||||
60 92 170
|
||||
46 121 170
|
||||
46 146 171
|
||||
91 168 171
|
||||
90 152 172
|
||||
75 163 172
|
||||
57 119 173
|
||||
47 122 173
|
||||
66 131 174
|
||||
54 145 174
|
||||
100 126 175
|
||||
91 146 175
|
||||
87 135 176
|
||||
66 174 176
|
||||
72 137 177
|
||||
137 168 177
|
||||
111 144 178
|
||||
124 164 178
|
||||
85 138 179
|
||||
68 167 179
|
||||
177 133 180
|
||||
72 177 180
|
||||
84 132 181
|
||||
127 164 181
|
||||
13 44 182
|
||||
182 44 183
|
||||
2 182 183
|
||||
1 42 185
|
||||
23 43 186
|
||||
24 158 187
|
||||
32 45 188
|
||||
45 184 188
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
135
ex7/code/task4/accu.template/vdop.cpp
Normal file
135
ex7/code/task4/accu.template/vdop.cpp
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
#include "vdop.h"
|
||||
#include <cassert> // assert()
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
|
||||
void vddiv(vector<double> & x, vector<double> const& y,
|
||||
vector<double> const& z)
|
||||
{
|
||||
assert( x.size()==y.size() && y.size()==z.size() );
|
||||
size_t n = x.size();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t k = 0; k < n; ++k)
|
||||
{
|
||||
x[k] = y[k] / z[k];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
//******************************************************************************
|
||||
|
||||
void vdaxpy(std::vector<double> & x, std::vector<double> const& y,
|
||||
double alpha, std::vector<double> const& z )
|
||||
{
|
||||
assert( x.size()==y.size() && y.size()==z.size() );
|
||||
size_t n = x.size();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t k = 0; k < n; ++k)
|
||||
{
|
||||
x[k] = y[k] + alpha * z[k];
|
||||
}
|
||||
return;
|
||||
}
|
||||
//******************************************************************************
|
||||
|
||||
double dscapr(std::vector<double> const& x, std::vector<double> const& y)
|
||||
{
|
||||
assert( x.size()==y.size());
|
||||
size_t n = x.size();
|
||||
|
||||
double s = 0.0;
|
||||
//#pragma omp parallel for reduction(+:s)
|
||||
for (size_t k = 0; k < n; ++k)
|
||||
{
|
||||
s += x[k] * y[k];
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
//******************************************************************************
|
||||
//void DebugVector(vector<double> const &v)
|
||||
//{
|
||||
//cout << "\nVector (nnode = " << v.size() << ")\n";
|
||||
//for (size_t j = 0; j < v.size(); ++j)
|
||||
//{
|
||||
//cout.setf(ios::right, ios::adjustfield);
|
||||
//cout << v[j] << " ";
|
||||
//}
|
||||
//cout << endl;
|
||||
|
||||
//return;
|
||||
//}
|
||||
//******************************************************************************
|
||||
bool CompareVectors(std::vector<double> const& x, int const n, double const y[], double const eps)
|
||||
{
|
||||
bool bn = (static_cast<int>(x.size())==n);
|
||||
if (!bn)
|
||||
{
|
||||
cout << "######### Error: " << "number of elements" << endl;
|
||||
}
|
||||
//bool bv = equal(x.cbegin(),x.cend(),y);
|
||||
bool bv = equal(x.cbegin(),x.cend(),y,
|
||||
[eps](double a, double b) -> bool
|
||||
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
|
||||
);
|
||||
if (!bv)
|
||||
{
|
||||
assert(static_cast<int>(x.size())==n);
|
||||
cout << "######### Error: " << "values" << endl;
|
||||
}
|
||||
return bn && bv;
|
||||
}
|
||||
|
||||
//******************************************************************************
|
||||
double par_scalar(vector<double> const &x, vector<double> const &y, MPI_Comm const& icomm)
|
||||
{
|
||||
const double s = dscapr(x,y);
|
||||
double sg;
|
||||
MPI_Allreduce(&s,&sg,1,MPI_DOUBLE,MPI_SUM,icomm);
|
||||
|
||||
return(sg);
|
||||
}
|
||||
|
||||
//******************************************************************************
|
||||
void ExchangeAll(vector<double> const &xin, vector<double> &yout, MPI_Comm const &icomm)
|
||||
{
|
||||
int myrank, numprocs,ierr(-1);
|
||||
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
|
||||
MPI_Comm_size(icomm, &numprocs);
|
||||
int const N=xin.size();
|
||||
int const sendcount = N/numprocs; // equal sized junks
|
||||
assert(sendcount*numprocs==N); // really all junk sized?
|
||||
assert(xin.size()==yout.size());
|
||||
|
||||
auto sendbuf = xin.data();
|
||||
auto recvbuf = yout.data();
|
||||
ierr = MPI_Alltoall(sendbuf, sendcount, MPI_DOUBLE,
|
||||
recvbuf, sendcount, MPI_DOUBLE, icomm);
|
||||
assert(0==ierr);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//******************************************************************************
|
||||
void ExchangeAllInPlace(vector<double> &xin, MPI_Comm const &icomm)
|
||||
{
|
||||
int myrank, numprocs,ierr(-1);
|
||||
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
|
||||
MPI_Comm_size(icomm, &numprocs);
|
||||
int const N=xin.size();
|
||||
int const sendcount = N/numprocs; // equal sized junks
|
||||
assert(sendcount*numprocs==N); // really all junk sized?
|
||||
|
||||
auto sendbuf = xin.data();
|
||||
ierr = MPI_Alltoall(MPI_IN_PLACE, sendcount, MPI_DOUBLE,
|
||||
sendbuf, sendcount, MPI_DOUBLE, icomm);
|
||||
assert(0==ierr);
|
||||
|
||||
return;
|
||||
}
|
||||
166
ex7/code/task4/accu.template/vdop.h
Normal file
166
ex7/code/task4/accu.template/vdop.h
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
#ifndef VDOP_FILE
|
||||
#define VDOP_FILE
|
||||
#include <iostream>
|
||||
#include <mpi.h> // MPI
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** @brief Element-wise vector divison x_k = y_k/z_k.
|
||||
*
|
||||
* @param[out] x target vector
|
||||
* @param[in] y source vector
|
||||
* @param[in] z source vector
|
||||
*
|
||||
*/
|
||||
void vddiv(std::vector<double> &x, std::vector<double> const &y,
|
||||
std::vector<double> const &z);
|
||||
|
||||
/** @brief Element-wise daxpy operation x(k) = y(k) + alpha*z(k).
|
||||
*
|
||||
* @param[out] x target vector
|
||||
* @param[in] y source vector
|
||||
* @param[in] alpha scalar
|
||||
* @param[in] z source vector
|
||||
*
|
||||
*/
|
||||
void vdaxpy(std::vector<double> &x, std::vector<double> const &y,
|
||||
double alpha, std::vector<double> const &z );
|
||||
|
||||
|
||||
/** @brief Calculates the Euclidean inner product of two vectors.
|
||||
*
|
||||
* @param[in] x vector
|
||||
* @param[in] y vector
|
||||
* @return Euclidean inner product @f$\langle x,y \rangle@f$
|
||||
*
|
||||
*/
|
||||
double dscapr(std::vector<double> const &x, std::vector<double> const &y);
|
||||
|
||||
|
||||
inline
|
||||
double L2_scapr(std::vector<double> const &x, std::vector<double> const &y)
|
||||
{
|
||||
return dscapr(x, y) / x.size();
|
||||
}
|
||||
|
||||
|
||||
/** Parallel inner product
|
||||
@param[in] x vector
|
||||
@param[in] y vector
|
||||
@param[in] icomm MPI communicator
|
||||
@return resulting Euclidian inner product <x,y>
|
||||
*/
|
||||
double par_scalar(std::vector<double> const &x, std::vector<double> const &y,
|
||||
MPI_Comm const& icomm=MPI_COMM_WORLD);
|
||||
|
||||
|
||||
|
||||
/* ReadId : Input and broadcast of an integer */
|
||||
inline
|
||||
int ReadIn(std::string const &ss = std::string(), MPI_Comm const &icomm = MPI_COMM_WORLD)
|
||||
{
|
||||
MPI_Barrier(icomm);
|
||||
int myrank; /* my rank number */
|
||||
MPI_Comm_rank(icomm, &myrank);
|
||||
int id;
|
||||
|
||||
if (myrank == 0) {
|
||||
std::cout << "\n\n " << ss << " : Which process do you want to debug ? \n";
|
||||
std::cin >> id;
|
||||
}
|
||||
MPI_Bcast(&id, 1, MPI_INT, 0, icomm);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print entries of a vector to standard output.
|
||||
*
|
||||
* @param[in] v vector values
|
||||
* @param[in] ss string containing the vector name
|
||||
* @param[in] icomm communicator group for MPI
|
||||
*
|
||||
*/
|
||||
//void DebugVector(std::vector<double> const &v);
|
||||
template <class T>
|
||||
void DebugVector(std::vector<T> const &v, std::string const &ss = std::string(), MPI_Comm const &icomm = MPI_COMM_WORLD)
|
||||
{
|
||||
MPI_Barrier(icomm);
|
||||
int numprocs; /* # processes */
|
||||
MPI_Comm_size(icomm, &numprocs);
|
||||
int myrank; /* my rank number */
|
||||
MPI_Comm_rank(icomm, &myrank);
|
||||
|
||||
int readid = ReadIn(ss); /* Read readid */
|
||||
|
||||
while ( (0 <= readid) && (readid < numprocs) ) {
|
||||
if (myrank == readid) {
|
||||
std::cout << "\n\n process " << readid;
|
||||
std::cout << "\n .... " << ss << " (nnode = " << v.size() << ")\n";
|
||||
for (size_t j = 0; j < v.size(); ++j) {
|
||||
std::cout.setf(std::ios::right, std::ios::adjustfield);
|
||||
std::cout << v[j] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
readid = ReadIn(ss, icomm); /* Read readid */
|
||||
}
|
||||
MPI_Barrier(icomm);
|
||||
return;
|
||||
}
|
||||
|
||||
/** @brief Compares an STL vector with POD vector.
|
||||
*
|
||||
* The accuracy criteria @f$ |x_k-y_k| < \varepsilon \left({1+0.5(|x_k|+|y_k|)}\right) @f$
|
||||
* follows the book by
|
||||
* <a href="https://www.springer.com/la/book/9783319446592">Stoyan/Baran</a>, p.8.
|
||||
*
|
||||
* @param[in] x STL vector
|
||||
* @param[in] n length of POD vector
|
||||
* @param[in] y POD vector
|
||||
* @param[in] eps relative accuracy criteria (default := 0.0).
|
||||
* @return true iff pairwise vector elements are relatively close to each other.
|
||||
*
|
||||
*/
|
||||
bool CompareVectors(std::vector<double> const &x, int n, double const y[], double const eps = 0.0);
|
||||
|
||||
|
||||
/** Output operator for vector
|
||||
* @param[in,out] s output stream, e.g. @p cout
|
||||
* @param[in] v vector
|
||||
*
|
||||
* @return output stream
|
||||
*/
|
||||
template <class T>
|
||||
std::ostream &operator<<(std::ostream &s, std::vector<T> const &v)
|
||||
{
|
||||
for (auto vp : v) {
|
||||
s << vp << " ";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/** Exchanges equal size partions of vector @p xin with all MPI processes.
|
||||
* The received data are return in vector @p yout .
|
||||
*
|
||||
* @param[in] xin input vector
|
||||
* @param[out] yout output vector
|
||||
* @param[in] icomm MPI communicator
|
||||
*
|
||||
*/
|
||||
void ExchangeAll(std::vector<double> const &xin, std::vector<double> &yout, MPI_Comm const &icomm = MPI_COMM_WORLD);
|
||||
|
||||
/** Exchanges equal size partions of vector @p xin with all MPI processes.
|
||||
* The received data are return in vector @p xin .
|
||||
*
|
||||
* @param[in,out] xin input/output vector
|
||||
* @param[in] icomm MPI communicator
|
||||
*
|
||||
*/
|
||||
void ExchangeAllInPlace(std::vector<double> &xin, MPI_Comm const &icomm = MPI_COMM_WORLD);
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
20
ex7/code/task4/accu.template/visualize_results.m
Normal file
20
ex7/code/task4/accu.template/visualize_results.m
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
%% Visualize results
|
||||
%
|
||||
% flatpak run org.octave.Octave <filename>
|
||||
% or
|
||||
% octave --no-window-system --no-gui -qf <filename>
|
||||
%
|
||||
% or
|
||||
% matlab -nosplash < <filename>
|
||||
|
||||
clear all
|
||||
clc
|
||||
|
||||
%%
|
||||
fname = 'uv.txt';
|
||||
|
||||
[xc,ia,v] = ascii_read_meshvector(fname);
|
||||
|
||||
h = trisurf(ia, xc(:,1), xc(:,2), v);
|
||||
|
||||
waitfor(h) % wait for closing the figure
|
||||
Loading…
Add table
Add a link
Reference in a new issue