Response Ex 7
This commit is contained in:
parent
d911954cb5
commit
52609c6099
7 changed files with 206 additions and 3 deletions
BIN
Sheet7/E14.pdf
Normal file
BIN
Sheet7/E14.pdf
Normal file
Binary file not shown.
165
Sheet7/E14/GH_GCC_default.mk
Normal file
165
Sheet7/E14/GH_GCC_default.mk
Normal file
|
|
@ -0,0 +1,165 @@
|
||||||
|
# Basic Defintions for using GNU-compiler suite sequentially
|
||||||
|
# requires setting of COMPILER=GCC_
|
||||||
|
|
||||||
|
#startmake as follows to avoid warnings caused by OpenMPI code
|
||||||
|
# make 2>&1 | grep -v openmpi
|
||||||
|
|
||||||
|
|
||||||
|
#MPI_ROOT=/opt/homebrew/bin/
|
||||||
|
|
||||||
|
|
||||||
|
CC = ${MPI_ROOT}mpicc
|
||||||
|
CXX = ${MPI_ROOT}mpicxx
|
||||||
|
F77 = ${MPI_ROOT}mpif77
|
||||||
|
LINKER = ${CXX}
|
||||||
|
|
||||||
|
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
|
||||||
|
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
|
||||||
|
#MPIRUN = ${MPI_ROOT}mpiexec
|
||||||
|
|
||||||
|
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
|
||||||
|
# solution according to https://github.com/open-mpi/ompi/issues/11063
|
||||||
|
#MPIRUN += -mca btl ^openib
|
||||||
|
|
||||||
|
# KFU:sauron
|
||||||
|
#CXXFLAGS += -I/software/boost/1_72_0/include
|
||||||
|
|
||||||
|
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||||
|
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
|
||||||
|
|
||||||
|
# WARNINGS += -Weffc++ -Wextra
|
||||||
|
# -Wno-pragmas
|
||||||
|
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
|
||||||
|
# -ftree-vectorizer-verbose=5 -DNDEBUG
|
||||||
|
# -ftree-vectorizer-verbose=2
|
||||||
|
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||||
|
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||||
|
|
||||||
|
# info on vectorization
|
||||||
|
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
|
||||||
|
#-fdump-tree-pre=stderr
|
||||||
|
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
|
||||||
|
#CXXFLAGS += ${VECTORIZE}
|
||||||
|
|
||||||
|
# -funroll-all-loops -msse3
|
||||||
|
#GCC -march=knl -march=broadwell -march=haswell
|
||||||
|
|
||||||
|
# for debugging purpose (save code)
|
||||||
|
# -fsanitize=leak # only one out the trhee can be used
|
||||||
|
# -fsanitize=address
|
||||||
|
# -fsanitize=thread
|
||||||
|
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||||
|
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||||
|
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||||
|
#CXXFLAGS += ${SANITARY}
|
||||||
|
#LINKFLAGS +=${SANITARY}
|
||||||
|
|
||||||
|
# OpenMP
|
||||||
|
CXXFLAGS += -fopenmp
|
||||||
|
LINKFLAGS += -fopenmp
|
||||||
|
|
||||||
|
#LISA, to run on a Mac
|
||||||
|
# OpenMP (macOS + clang)
|
||||||
|
OMPFLAGS = -Xpreprocessor -fopenmp
|
||||||
|
OMPINC = -I/opt/homebrew/opt/libomp/include
|
||||||
|
OMPLIBS = -L/opt/homebrew/opt/libomp/lib -lomp
|
||||||
|
|
||||||
|
#CXXFLAGS += $(OMPFLAGS) $(OMPINC)
|
||||||
|
#LINKFLAGS += $(OMPLIBS)
|
||||||
|
|
||||||
|
|
||||||
|
default: ${PROGRAM}
|
||||||
|
|
||||||
|
${PROGRAM}: ${OBJECTS}
|
||||||
|
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||||
|
@echo
|
||||||
|
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||||
|
@echo
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
|
||||||
|
|
||||||
|
clean_all:: clean
|
||||||
|
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
||||||
|
@rm -rf html latex
|
||||||
|
|
||||||
|
run: ${PROGRAM}
|
||||||
|
${MPIRUN} -np 4 ./$^
|
||||||
|
|
||||||
|
# tar the current directory
|
||||||
|
MY_DIR = `basename ${PWD}`
|
||||||
|
tar: clean_all
|
||||||
|
@echo "Tar the directory: " ${MY_DIR}
|
||||||
|
@cd .. ;\
|
||||||
|
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||||
|
cd ${MY_DIR}
|
||||||
|
# tar cf `basename ${PWD}`.tar *
|
||||||
|
|
||||||
|
zip: clean
|
||||||
|
@echo "Zip the directory: " ${MY_DIR}
|
||||||
|
@cd .. ;\
|
||||||
|
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||||
|
cd ${MY_DIR}
|
||||||
|
|
||||||
|
doc:
|
||||||
|
doxygen Doxyfile
|
||||||
|
|
||||||
|
#########################################################################
|
||||||
|
|
||||||
|
.cpp.o:
|
||||||
|
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||||
|
# 2>&1 | grep -v openmpi
|
||||||
|
|
||||||
|
# special: get rid of compiler warnings genereate by openmpi-files
|
||||||
|
#.cpp.o:
|
||||||
|
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
|
||||||
|
# |grep -sv openmpi
|
||||||
|
|
||||||
|
.c.o:
|
||||||
|
$(CC) -c $(CFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
.f.o:
|
||||||
|
$(F77) -c $(FFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
##################################################################################################
|
||||||
|
# some tools
|
||||||
|
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||||
|
cache: ${PROGRAM}
|
||||||
|
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||||
|
# kcachegrind callgrind.out.<pid> &
|
||||||
|
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||||
|
|
||||||
|
# Check for wrong memory accesses, memory leaks, ...
|
||||||
|
# use smaller data sets
|
||||||
|
# no "-pg" in compile/link options
|
||||||
|
mem: ${PROGRAM}
|
||||||
|
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
|
||||||
|
# Graphical interface
|
||||||
|
# valkyrie
|
||||||
|
|
||||||
|
# Simple run time profiling of your code
|
||||||
|
# CXXFLAGS += -g -pg
|
||||||
|
# LINKFLAGS += -pg
|
||||||
|
prof: ${PROGRAM}
|
||||||
|
perf record ./$^
|
||||||
|
perf report
|
||||||
|
# gprof -b ./$^ > gp.out
|
||||||
|
# kprof -f gp.out -p gprof &
|
||||||
|
|
||||||
|
#Trace your heap:
|
||||||
|
#> heaptrack ./main.GCC_
|
||||||
|
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||||
|
heap: ${PROGRAM}
|
||||||
|
heaptrack ./$^ 11
|
||||||
|
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||||
|
|
||||||
|
codecheck: $(SOURCES)
|
||||||
|
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
# get the detailed status of all optimization flags
|
||||||
|
info:
|
||||||
|
echo "detailed status of all optimization flags"
|
||||||
|
$(CXX) --version
|
||||||
|
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||||
|
|
@ -79,15 +79,15 @@ int main(int argc , char **argv )
|
||||||
|
|
||||||
double tstart = MPI_Wtime(); // Wall clock
|
double tstart = MPI_Wtime(); // Wall clock
|
||||||
|
|
||||||
JacobiSolve(SK, fv, uv ); // solve the system of equations
|
//GH JacobiSolve(SK, fv, uv ); // solve the system of equations
|
||||||
JacobiSolveMPI(mesh, SK, fv, uv ); // MPI: solve the system of equations
|
JacobiSolveMPI(mesh, SK, fv, uv ); // MPI: solve the system of equations
|
||||||
|
|
||||||
double t1 = MPI_Wtime() - tstart; // Wall clock
|
double t1 = MPI_Wtime() - tstart; // Wall clock
|
||||||
cout << "JacobiSolve: timing in sec. : " << t1 << endl;
|
cout << "JacobiSolve: timing in sec. : " << t1 << endl;
|
||||||
|
|
||||||
//if (2==myrank || (1==numprocs && 0==myrank) ) mesh.Mesh::Visualize(uv); // Visualize only one subdomain
|
//if (2==myrank || (1==numprocs && 0==myrank) ) mesh.Mesh::Visualize(uv); // Visualize only one subdomain
|
||||||
//mesh.Visualize(uv); // Visualize all subdomains
|
mesh.Visualize(uv); // Visualize all subdomains
|
||||||
|
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
BIN
Sheet7/Ex_1234.pdf
Normal file
BIN
Sheet7/Ex_1234.pdf
Normal file
Binary file not shown.
BIN
Sheet7/Ex_5678.pdf
Normal file
BIN
Sheet7/Ex_5678.pdf
Normal file
Binary file not shown.
BIN
Sheet7/Ex_9to13.pdf
Normal file
BIN
Sheet7/Ex_9to13.pdf
Normal file
Binary file not shown.
38
Sheet7/gh_response.txt
Normal file
38
Sheet7/gh_response.txt
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
2: OK; rank used
|
||||||
|
|
||||||
|
3:
|
||||||
|
E5: DebugVector(): numprocs iterations (cumbersome)
|
||||||
|
|
||||||
|
E6: OK
|
||||||
|
|
||||||
|
E7: MinMax: incl. global exchange
|
||||||
|
|
||||||
|
E8: Allroallv: two versions realized
|
||||||
|
|
||||||
|
4:
|
||||||
|
E9: only the initialization from template has been used, not the coordinates [-1]
|
||||||
|
|
||||||
|
E10: _sendbuf is vector<double>
|
||||||
|
VecAccu(...int..) correct
|
||||||
|
|
||||||
|
|
||||||
|
E11: ??
|
||||||
|
Global_Nodes() : Please explain in the lecture
|
||||||
|
|
||||||
|
|
||||||
|
E12: Ok
|
||||||
|
|
||||||
|
E13: not available
|
||||||
|
|
||||||
|
E14:
|
||||||
|
make run COMPILER=GH_GCC_#
|
||||||
|
Iteration counts are different for all 4 processes
|
||||||
|
Code doesn't stop
|
||||||
|
|
||||||
|
See annotated E14.pdf
|
||||||
|
|
||||||
|
GH: commented main.cpp:82
|
||||||
|
GH: uncommented main.cpp:89
|
||||||
|
|
||||||
|
at least, you tried it! [2 pts]
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue