sheet 4
This commit is contained in:
parent
c21fee7862
commit
65a23d88d6
67 changed files with 14385 additions and 0 deletions
176
sheet4/ONEAPI_default.mk
Normal file
176
sheet4/ONEAPI_default.mk
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
# Basic Defintions for using INTEL compiler suite sequentially
|
||||
# requires setting of COMPILER=ONEAPI_
|
||||
|
||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
||||
# requires
|
||||
# source /opt/intel/oneapi/setvars.sh
|
||||
# on AMD: export MKL_DEBUG_CPU_TYPE=5
|
||||
|
||||
#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
|
||||
#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
|
||||
#export KMP_AFFINITY=verbose,compact
|
||||
|
||||
CC = ${BINDIR}icc
|
||||
CXX = ${BINDIR}dpcpp
|
||||
F77 = ${BINDIR}ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
## Compiler flags
|
||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
|
||||
WARNINGS += -Wpessimizing-move -Wredundant-move
|
||||
#-wd2015,2012,2014 -wn3
|
||||
# -Winline -Wredundant-decls -Wunreachable-code
|
||||
# -qopt-subscript-in-range
|
||||
# -vec-threshold0
|
||||
|
||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include"
|
||||
#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include"
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -ipo
|
||||
LINKFLAGS += -ipo
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# annotated Assembler file
|
||||
ANNOTED = -fsource-asm -S
|
||||
|
||||
#architecture
|
||||
CPU = -march=core-avx2
|
||||
#CPU += -mtp=zen
|
||||
# -xCORE-AVX2
|
||||
# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
|
||||
CXXFLAGS += ${CPU}
|
||||
LINKFLAGS += ${CPU}
|
||||
|
||||
# use MKL by INTEL
|
||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
||||
# sequential MKL
|
||||
# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
|
||||
CXXFLAGS += -qmkl=sequential -UMKL_ILP64
|
||||
LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
|
||||
# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static
|
||||
# install intel-oneapi-compiler-static
|
||||
# or
|
||||
LINKFLAGS += -shared-intel
|
||||
|
||||
|
||||
OPENMP = -qopenmp
|
||||
CXXFLAGS += ${OPENMP}
|
||||
LINKFLAGS += ${OPENMP}
|
||||
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
# -vec-report=3
|
||||
# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
|
||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par
|
||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg
|
||||
# Redirect report from *.optrpt to stderr
|
||||
# -qopt-report-file=stderr
|
||||
# Guided paralellization
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
## run time checks
|
||||
# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
|
||||
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS} *.optrpt
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
|
||||
|
||||
mem: inspector
|
||||
prof: vtune
|
||||
cache: inspector
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
||||
amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# inspxe-gui &
|
||||
vtune-gui ./${PROGRAM} &
|
||||
|
||||
advisor:
|
||||
source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
|
||||
# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
|
||||
advisor --collect=survey ./${PROGRAM}
|
||||
# advisor --collect=roofline ./${PROGRAM}
|
||||
advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
|
||||
|
||||
vtune:
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
|
||||
export ADVIXE_EXPERIMENTAL=roofline
|
||||
vtune -collect hotspots ./${PROGRAM}
|
||||
vtune -report hotspots -r r000hs > vtune.out
|
||||
# vtune-gui ./${PROGRAM} &
|
||||
|
||||
icc-info:
|
||||
icpc -# main.cpp
|
||||
|
||||
# MKL on AMD
|
||||
# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
|
||||
#
|
||||
# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
|
||||
# export MKL_DEBUG_CPU_TYPE=5
|
||||
# export MKL_NUM_THRAEDS=1
|
||||
# export MKL_DYNAMIC=false
|
||||
# on Intel compiler
|
||||
# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
|
||||
Loading…
Add table
Add a link
Reference in a new issue