From 0a954e09dae7ea893c45432102778ae3276c814c Mon Sep 17 00:00:00 2001 From: "lisa.pizzo" Date: Wed, 17 Dec 2025 08:46:20 +0100 Subject: [PATCH] . --- Sheet5/Ex2_Second_Attempt/ONEAPI_default.mk | 176 ++++++++++++++++++++ Sheet5/Ex2_Second_Attempt/PGI_default.mk | 93 +++++++++++ 2 files changed, 269 insertions(+) create mode 100644 Sheet5/Ex2_Second_Attempt/ONEAPI_default.mk create mode 100644 Sheet5/Ex2_Second_Attempt/PGI_default.mk diff --git a/Sheet5/Ex2_Second_Attempt/ONEAPI_default.mk b/Sheet5/Ex2_Second_Attempt/ONEAPI_default.mk new file mode 100644 index 0000000..fe7b3fe --- /dev/null +++ b/Sheet5/Ex2_Second_Attempt/ONEAPI_default.mk @@ -0,0 +1,176 @@ +# Basic Defintions for using INTEL compiler suite sequentially +# requires setting of COMPILER=ONEAPI_ + +# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html +# requires +# source /opt/intel/oneapi/setvars.sh +# on AMD: export MKL_DEBUG_CPU_TYPE=5 + +#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/ +#MKL_ROOT = /opt/intel/oneapi/mkl/latest/ +#export KMP_AFFINITY=verbose,compact + +CC = ${BINDIR}icc +CXX = ${BINDIR}dpcpp +F77 = ${BINDIR}ifort +LINKER = ${CXX} + +## Compiler flags +WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic +WARNINGS += -Wpessimizing-move -Wredundant-move +#-wd2015,2012,2014 -wn3 +# -Winline -Wredundant-decls -Wunreachable-code +# -qopt-subscript-in-range +# -vec-threshold0 + +CXXFLAGS += -O3 -std=c++17 ${WARNINGS} +#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include" +#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include" +LINKFLAGS += -O3 + +# interprocedural optimization +CXXFLAGS += -ipo +LINKFLAGS += -ipo +LINKFLAGS += -flto + +# annotated Assembler file +ANNOTED = -fsource-asm -S + +#architecture +CPU = -march=core-avx2 +#CPU += -mtp=zen +# -xCORE-AVX2 +# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2 +CXXFLAGS += ${CPU} +LINKFLAGS += ${CPU} + +# use MKL by INTEL +# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html +# sequential MKL +# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64) +CXXFLAGS += -qmkl=sequential -UMKL_ILP64 +LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread +#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static +# install intel-oneapi-compiler-static +# or +LINKFLAGS += -shared-intel + + +OPENMP = -qopenmp +CXXFLAGS += ${OPENMP} +LINKFLAGS += ${OPENMP} + + +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg +# -vec-report=3 +# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp +# -guide -parallel +# -guide-opts=string -guide-par[=n] -guide-vec[=n] +# -auto-p32 -simd + +# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports +#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par +#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg +# Redirect report from *.optrpt to stderr +# -qopt-report-file=stderr +# Guided paralellization +# -guide -parallel +# -guide-opts=string -guide-par[=n] -guide-vec[=n] +# -auto-p32 -simd + +## run time checks +# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html + + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + rm -f ${PROGRAM} ${OBJECTS} *.optrpt + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +run: clean ${PROGRAM} + ./${PROGRAM} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines) +# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html + +mem: inspector +prof: vtune +cache: inspector + +gap_par_report: + ${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt + +# GUI for performance report +amplifier: ${PROGRAM} + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid + amplxe-gui & + +# GUI for Memory and Thread analyzer (race condition) +inspector: ${PROGRAM} + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +# inspxe-gui & + vtune-gui ./${PROGRAM} & + +advisor: + source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh +# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui & + advisor --collect=survey ./${PROGRAM} +# advisor --collect=roofline ./${PROGRAM} + advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv + +vtune: + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new + export ADVIXE_EXPERIMENTAL=roofline + vtune -collect hotspots ./${PROGRAM} + vtune -report hotspots -r r000hs > vtune.out +# vtune-gui ./${PROGRAM} & + +icc-info: + icpc -# main.cpp + +# MKL on AMD +# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/ +# +# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu +# export MKL_DEBUG_CPU_TYPE=5 +# export MKL_NUM_THRAEDS=1 +# export MKL_DYNAMIC=false +# on Intel compiler +# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html diff --git a/Sheet5/Ex2_Second_Attempt/PGI_default.mk b/Sheet5/Ex2_Second_Attempt/PGI_default.mk new file mode 100644 index 0000000..40760e5 --- /dev/null +++ b/Sheet5/Ex2_Second_Attempt/PGI_default.mk @@ -0,0 +1,93 @@ +# Basic Defintions for using PGI-compiler suite sequentially +# requires setting of COMPILER=PGI_ +# OPTIRUN = optirun + + +CC = pgcc +CXX = pgc++ +F77 = pgfortran +LINKER = ${CXX} + +# on mephisto: +#CXXFLAGS += -I/share/apps/atlas/include +#LINKFLAGS += -L/share/apps/atlas/lib +#LINKFLAGS += -lcblas -latlas + +#LINKFLAGS += -lblas +# Der Header muss mit extern "C" versehen werden, damit g++ alles findet. + +WARNINGS = -Minform=warn +# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow -Wredundant-decls +# -pedantic -Wunreachable-code -Wextra -Winline +# -Wunreachable-code + +#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel +PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity +# -Minfo +# -Mprof=time +# -Mprof=lines +# take care with option -Msafeptr +CXXFLAGS += -O3 -std=c++17 ${WARNINGS} +#CXXFLAGS += -O3 -std=c++11 -DNDEBUG ${PGI_PROFILING} ${WARNINGS} +# -fastsse -fargument-noalias ${WARNINGS} -msse3 -vec-report=3 + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +run: clean ${PROGRAM} + ./${PROGRAM} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# # some tools +# # Simple run time profiling of your code +# # CXXFLAGS += -g -pg +# # LINKFLAGS += -pg + + +# Profiling options PGI, see: pgcollect -help +# CPU_PROF = -allcache +CPU_PROF = -time +# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit +#GPU_PROF = -cuda=branch:cc20 +# +PROF_FILE = pgprof.out + +cache: prof + +prof: ${PROGRAM} + ${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^ + ${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) & + +info: + pgaccelinfo -v