diff --git a/Sheet7/E14.pdf b/Sheet7/E14.pdf new file mode 100644 index 0000000..3875148 Binary files /dev/null and b/Sheet7/E14.pdf differ diff --git a/Sheet7/E14/GH_GCC_default.mk b/Sheet7/E14/GH_GCC_default.mk new file mode 100644 index 0000000..6b63c4d --- /dev/null +++ b/Sheet7/E14/GH_GCC_default.mk @@ -0,0 +1,165 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=GCC_ + +#startmake as follows to avoid warnings caused by OpenMPI code +# make 2>&1 | grep -v openmpi + + +#MPI_ROOT=/opt/homebrew/bin/ + + +CC = ${MPI_ROOT}mpicc +CXX = ${MPI_ROOT}mpicxx +F77 = ${MPI_ROOT}mpif77 +LINKER = ${CXX} + +# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe' +MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map +#MPIRUN = ${MPI_ROOT}mpiexec + +# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found," +# solution according to https://github.com/open-mpi/ompi/issues/11063 +#MPIRUN += -mca btl ^openib + +# KFU:sauron +#CXXFLAGS += -I/software/boost/1_72_0/include + +WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \ + -Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1 + +# WARNINGS += -Weffc++ -Wextra +# -Wno-pragmas +CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS} +# -ftree-vectorizer-verbose=5 -DNDEBUG +# -ftree-vectorizer-verbose=2 +# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details +# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2 + +# info on vectorization +#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump +#-fdump-tree-pre=stderr +VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5 +#CXXFLAGS += ${VECTORIZE} + +# -funroll-all-loops -msse3 +#GCC -march=knl -march=broadwell -march=haswell + +# for debugging purpose (save code) +# -fsanitize=leak # only one out the trhee can be used +# -fsanitize=address +# -fsanitize=thread +SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \ + -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \ + -fsanitize=bool -fsanitize=enum -fsanitize=vptr +#CXXFLAGS += ${SANITARY} +#LINKFLAGS +=${SANITARY} + +# OpenMP +CXXFLAGS += -fopenmp +LINKFLAGS += -fopenmp + +#LISA, to run on a Mac +# OpenMP (macOS + clang) +OMPFLAGS = -Xpreprocessor -fopenmp +OMPINC = -I/opt/homebrew/opt/libomp/include +OMPLIBS = -L/opt/homebrew/opt/libomp/lib -lomp + +#CXXFLAGS += $(OMPFLAGS) $(OMPINC) +#LINKFLAGS += $(OMPLIBS) + + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + @echo + @echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)" + @echo + +clean: + @rm -f ${PROGRAM} ${OBJECTS} gmon.out + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar *.orig + @rm -rf html latex + +run: ${PROGRAM} + ${MPIRUN} -np 4 ./$^ + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +zip: clean + @echo "Zip the directory: " ${MY_DIR} + @cd .. ;\ + zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< +# 2>&1 | grep -v openmpi + +# special: get rid of compiler warnings genereate by openmpi-files +#.cpp.o: +# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt +# |grep -sv openmpi + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +# no "-pg" in compile/link options +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^ + # Graphical interface + # valkyrie + +# Simple run time profiling of your code +# CXXFLAGS += -g -pg +# LINKFLAGS += -pg +prof: ${PROGRAM} + perf record ./$^ + perf report +# gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +#Trace your heap: +#> heaptrack ./main.GCC_ +#> heaptrack_gui heaptrack.main.GCC_..gz +heap: ${PROGRAM} + heaptrack ./$^ 11 + heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` & + +codecheck: $(SOURCES) + cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^ + + +######################################################################## +# get the detailed status of all optimization flags +info: + echo "detailed status of all optimization flags" + $(CXX) --version + $(CXX) -Q $(CXXFLAGS) --help=optimizers diff --git a/Sheet7/E14/jacob_template/main.cpp b/Sheet7/E14/jacob_template/main.cpp index b268b76..b41c891 100644 --- a/Sheet7/E14/jacob_template/main.cpp +++ b/Sheet7/E14/jacob_template/main.cpp @@ -79,15 +79,15 @@ int main(int argc , char **argv ) double tstart = MPI_Wtime(); // Wall clock - JacobiSolve(SK, fv, uv ); // solve the system of equations + //GH JacobiSolve(SK, fv, uv ); // solve the system of equations JacobiSolveMPI(mesh, SK, fv, uv ); // MPI: solve the system of equations double t1 = MPI_Wtime() - tstart; // Wall clock cout << "JacobiSolve: timing in sec. : " << t1 << endl; //if (2==myrank || (1==numprocs && 0==myrank) ) mesh.Mesh::Visualize(uv); // Visualize only one subdomain - //mesh.Visualize(uv); // Visualize all subdomains + mesh.Visualize(uv); // Visualize all subdomains MPI_Finalize(); return 0; -} \ No newline at end of file +} diff --git a/Sheet7/Ex_1234.pdf b/Sheet7/Ex_1234.pdf new file mode 100644 index 0000000..a4a98f0 Binary files /dev/null and b/Sheet7/Ex_1234.pdf differ diff --git a/Sheet7/Ex_5678.pdf b/Sheet7/Ex_5678.pdf new file mode 100644 index 0000000..3a5624d Binary files /dev/null and b/Sheet7/Ex_5678.pdf differ diff --git a/Sheet7/Ex_9to13.pdf b/Sheet7/Ex_9to13.pdf new file mode 100644 index 0000000..81a3c11 Binary files /dev/null and b/Sheet7/Ex_9to13.pdf differ diff --git a/Sheet7/gh_response.txt b/Sheet7/gh_response.txt new file mode 100644 index 0000000..9b727a9 --- /dev/null +++ b/Sheet7/gh_response.txt @@ -0,0 +1,38 @@ +2: OK; rank used + +3: +E5: DebugVector(): numprocs iterations (cumbersome) + +E6: OK + +E7: MinMax: incl. global exchange + +E8: Allroallv: two versions realized + +4: +E9: only the initialization from template has been used, not the coordinates [-1] + +E10: _sendbuf is vector +VecAccu(...int..) correct + + +E11: ?? +Global_Nodes() : Please explain in the lecture + + +E12: Ok + +E13: not available + +E14: +make run COMPILER=GH_GCC_# +Iteration counts are different for all 4 processes +Code doesn't stop + +See annotated E14.pdf + +GH: commented main.cpp:82 +GH: uncommented main.cpp:89 + +at least, you tried it! [2 pts] +