From 45fdb92841f08ae797ca5acdc5837bc239921949 Mon Sep 17 00:00:00 2001 From: haasegu Date: Wed, 12 Nov 2025 13:58:25 +0100 Subject: [PATCH] response 3 --- CLANG_default.mk | 123 ++++++++++++++++++ GCC_default.mk | 196 +++++++++++++++++++++++++++++ ex3_benchmarks/Makefile | 4 +- ex3_benchmarks/benchmark_tests.cpp | 4 +- ex3_benchmarks/gh_response.txt | 13 ++ 5 files changed, 337 insertions(+), 3 deletions(-) create mode 100644 CLANG_default.mk create mode 100644 GCC_default.mk create mode 100644 ex3_benchmarks/gh_response.txt diff --git a/CLANG_default.mk b/CLANG_default.mk new file mode 100644 index 0000000..4bc290d --- /dev/null +++ b/CLANG_default.mk @@ -0,0 +1,123 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=CLANG_ + +#CLANGPATH=//usr/lib/llvm-10/bin/ +CC = ${CLANGPATH}clang +CXX = ${CLANGPATH}clang++ +#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0 +#F77 = gfortran +LINKER = ${CXX} + +#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages +WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1 +WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic +#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic + +CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS} +# don't use -Ofast +# -ftrapv +LINKFLAGS += -O3 + +# different libraries in Ubuntu or manajaró +ifndef UBUNTU +UBUNTU=1 +endif + +# BLAS, LAPACK +LINKFLAGS += -llapack -lblas +# -lopenblas +ifeq ($(UBUNTU),1) +# ubuntu +else +# on archlinux +LINKFLAGS += -lcblas +endif + +# interprocedural optimization +CXXFLAGS += -flto +LINKFLAGS += -flto + +# very good check +# http://clang.llvm.org/extra/clang-tidy/ +# good check, see: http://llvm.org/docs/CodingStandards.html#include-style +SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init +SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration +#READABILITY=,readability*${SWITCH_OFF} +#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" +TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" +#TIDYFLAGS += -checks='modernize* +# ??? +#TIDYFLAGS = -checks='cert*' -header-filter=.* +# MPI checks ?? +#TIDYFLAGS = -checks='mpi*' +# ?? +#TIDYFLAGS = -checks='performance*' -header-filter=.* +#TIDYFLAGS = -checks='portability-*' -header-filter=.* +#TIDYFLAGS = -checks='readability-*' -header-filter=.* + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +codecheck: tidy_check +tidy_check: + clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES} +# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html + +run: clean ${PROGRAM} +# time ./${PROGRAM} ${PARAMS} + ./${PROGRAM} ${PARAMS} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS} +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS} + +# Simple run time profiling of your code +# CXXFLAGS += -g -pg +# LINKFLAGS += -pg +prof: ${PROGRAM} + perf record ./$^ ${PARAMS} + perf report +# gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +codecheck: tidy_check diff --git a/GCC_default.mk b/GCC_default.mk new file mode 100644 index 0000000..e9e0a0c --- /dev/null +++ b/GCC_default.mk @@ -0,0 +1,196 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=GCC_ + +CC = gcc +CXX = g++ +F77 = gfortran +LINKER = ${CXX} + +WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \ + -Wredundant-decls -fmax-errors=1 +# -Wunreachable-code -Winline +CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS} +#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS} +#-msse3 +# -ftree-vectorizer-verbose=2 -DNDEBUG +# -ftree-vectorizer-verbose=5 +# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr + +# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details +# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2 +# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +LINKFLAGS += -O3 + +#architecture +#CPU = -march=znver2 +CXXFLAGS += ${CPU} +LINKFLAGS += ${CPU} + +# different libraries in Ubuntu or manajaró +ifndef UBUNTU +UBUNTU=1 +endif + +# BLAS, LAPACK +ifeq ($(UBUNTU),1) +LINKFLAGS += -llapack -lblas +# -lopenblas +else +# on archlinux +LINKFLAGS += -llapack -lopenblas -lcblas +endif + +# interprocedural optimization +#CXXFLAGS += -flto +#LINKFLAGS += -flto + +# for debugging purpose (save code) +# -fsanitize=leak # only one out the three can be used +# -fsanitize=address +# -fsanitize=thread +SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \ + -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \ + -fsanitize=bool -fsanitize=enum -fsanitize=vptr +#CXXFLAGS += ${SANITARY} +#LINKFLAGS += ${SANITARY} + +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg + + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + -@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt + -@rm -rf html + +run: clean ${PROGRAM} +#run: ${PROGRAM} +# time ./${PROGRAM} ${PARAMS} + ./${PROGRAM} ${PARAMS} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * +#find . -size +10M > large_files +#--exclude-from ${MY_DIR}/large_files + +zip: clean + @echo "Zip the directory: " ${MY_DIR} + @cd .. ;\ + zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} + +doc: + doxygen Doxyfile + +######################################################################### +.SUFFIXES: .f90 + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< +# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log +# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log) + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +.f90.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS} +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +# no "-pg" in compile/link options +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS} +# Graphical interface +# valkyrie + +# Simple run time profiling of your code +CXXFLAGS += -g -pg +LINKFLAGS += -pg +prof: ${PROGRAM} + ./$^ ${PARAMS} + gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +# sudo apt install gprofng-gui +# https://parallel.computer/presentations/PPoPP2023/2023-Ruud-Slides.pdf +# read §3 in https://sourceware.org/binutils/docs/gprofng.html +# /usr/bin/gp-collect-app -o test.1.er -p on -S on /home/ghaase/Lectures/Math2CPP/Codes/seq/jacobi_oo_stl/main.GCC_ +prof2: ${PROGRAM} + gprofng collect app -h auto ./$^ ${PARAMS} +# gprofng display text -functions `ls -1tdr test.*.er |tail -1` + gprofng display text -script gprofng_script2 `ls -1tdr test.*.er |tail -1` +# gprofng display text -script gprofng_script2 test.*.er +# gprofng display gui & + +prof3: ${PROGRAM} + perf record ./$^ ${PARAMS} + perf report +# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/ +# * install +# * sudo vi /etc/sysctl.conf +# add kernel.perf_event_paranoid = 0 + +#Trace your heap: +#> heaptrack ./main.GCC_ +#> heaptrack_gui heaptrack.main.GCC_..gz +heap: ${PROGRAM} + heaptrack ./$^ ${PARAMS} + heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` & + +codecheck: $(SOURCES) + cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^ + + +######################################################################## +# get the detailed status of all optimization flags +info: + echo "detailed status of all optimization flags" + $(CXX) --version + $(CXX) -Q $(CXXFLAGS) --help=optimizers + lscpu + inxi -C + lstopo + +# Excellent hardware info +# hardinfo +# Life monitoring of CPU frequency etc. +# sudo i7z + +# Memory consumption +# vmstat -at -SM 3 +# xfce4-taskmanager + + +# https://www.tecmint.com/check-linux-cpu-information/ +#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/ + +# Debugging: +# https://wiki.archlinux.org/index.php/Debugging diff --git a/ex3_benchmarks/Makefile b/ex3_benchmarks/Makefile index c9ac361..1daf792 100644 --- a/ex3_benchmarks/Makefile +++ b/ex3_benchmarks/Makefile @@ -13,7 +13,9 @@ COMPILER=GCC_ # COMPILER=PGI_ -SOURCES = main.cpp benchmarks.cpp benchmark_tests.cpp factorization_solve.cpp factorization_solve_tests.cpp +#SOURCES = main.cpp benchmarks.cpp benchmark_tests.cpp factorization_solve.cpp factorization_solve_tests.cpp +# GH +SOURCES = main.cpp benchmarks.cpp benchmark_tests.cpp factorization_solve.cpp factorization_solve_tests.cpp vdop.cpp getmatrix.cpp OBJECTS = $(SOURCES:.cpp=.o) PROGRAM = main.${COMPILER} diff --git a/ex3_benchmarks/benchmark_tests.cpp b/ex3_benchmarks/benchmark_tests.cpp index 846d99a..961b32a 100644 --- a/ex3_benchmarks/benchmark_tests.cpp +++ b/ex3_benchmarks/benchmark_tests.cpp @@ -211,7 +211,7 @@ vector test_C(const size_t &NLOOPS, const size_t &L, const size_t &M, co // Do calculation vector C(M*N); double check; - double check_sum; + double check_sum; // GH: initialize for (size_t i = 0; i < NLOOPS; ++i) { @@ -330,4 +330,4 @@ vector test_D(const size_t &NLOOPS, const size_t &N, const size_t &p) return vector{t_diff, Gflops, MemBandwidth}; -} \ No newline at end of file +} diff --git a/ex3_benchmarks/gh_response.txt b/ex3_benchmarks/gh_response.txt new file mode 100644 index 0000000..6b95df7 --- /dev/null +++ b/ex3_benchmarks/gh_response.txt @@ -0,0 +1,13 @@ +* added ../GCC_default.mk and ../CLANG_default.mk into your repository +* missing objects in Makefile, see Makefile:18 [-1] +* benchmark_tests.cpp:214 + double check_sum // GH: initialize! + + * Some comprehensive documents on results would be good. + + * benchmarks.cpp + MatMat(): swao j and k loop // should be faster + minor remarks in pdf. + + +* make codecheck COMPILER=CLANG_ > codecheck_gh.out