176 lines
5.2 KiB
Makefile
176 lines
5.2 KiB
Makefile
# Basic Defintions for using INTEL compiler suite sequentially
|
|
# requires setting of COMPILER=ONEAPI_
|
|
|
|
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
|
# requires
|
|
# source /opt/intel/oneapi/setvars.sh
|
|
# on AMD: export MKL_DEBUG_CPU_TYPE=5
|
|
|
|
#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
|
|
#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
|
|
#export KMP_AFFINITY=verbose,compact
|
|
|
|
CC = ${BINDIR}icc
|
|
CXX = ${BINDIR}dpcpp
|
|
F77 = ${BINDIR}ifort
|
|
LINKER = ${CXX}
|
|
|
|
## Compiler flags
|
|
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
|
|
WARNINGS += -Wpessimizing-move -Wredundant-move
|
|
#-wd2015,2012,2014 -wn3
|
|
# -Winline -Wredundant-decls -Wunreachable-code
|
|
# -qopt-subscript-in-range
|
|
# -vec-threshold0
|
|
|
|
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
|
#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include"
|
|
#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include"
|
|
LINKFLAGS += -O3
|
|
|
|
# interprocedural optimization
|
|
CXXFLAGS += -ipo
|
|
LINKFLAGS += -ipo
|
|
LINKFLAGS += -flto
|
|
|
|
# annotated Assembler file
|
|
ANNOTED = -fsource-asm -S
|
|
|
|
#architecture
|
|
CPU = -march=core-avx2
|
|
#CPU += -mtp=zen
|
|
# -xCORE-AVX2
|
|
# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
|
|
CXXFLAGS += ${CPU}
|
|
LINKFLAGS += ${CPU}
|
|
|
|
# use MKL by INTEL
|
|
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
|
# sequential MKL
|
|
# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
|
|
CXXFLAGS += -qmkl=sequential -UMKL_ILP64
|
|
LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
|
#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
|
|
|
# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static
|
|
# install intel-oneapi-compiler-static
|
|
# or
|
|
LINKFLAGS += -shared-intel
|
|
|
|
|
|
OPENMP = -qopenmp
|
|
CXXFLAGS += ${OPENMP}
|
|
LINKFLAGS += ${OPENMP}
|
|
|
|
|
|
# profiling tools
|
|
#CXXFLAGS += -pg
|
|
#LINKFLAGS += -pg
|
|
# -vec-report=3
|
|
# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
|
|
# -guide -parallel
|
|
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
|
# -auto-p32 -simd
|
|
|
|
# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
|
|
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par
|
|
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg
|
|
# Redirect report from *.optrpt to stderr
|
|
# -qopt-report-file=stderr
|
|
# Guided paralellization
|
|
# -guide -parallel
|
|
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
|
# -auto-p32 -simd
|
|
|
|
## run time checks
|
|
# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
|
|
|
|
|
|
default: ${PROGRAM}
|
|
|
|
${PROGRAM}: ${OBJECTS}
|
|
$(LINKER) $^ ${LINKFLAGS} -o $@
|
|
|
|
clean:
|
|
rm -f ${PROGRAM} ${OBJECTS} *.optrpt
|
|
|
|
clean_all:: clean
|
|
@rm -f *_ *~ *.bak *.log *.out *.tar
|
|
|
|
run: clean ${PROGRAM}
|
|
./${PROGRAM}
|
|
|
|
# tar the current directory
|
|
MY_DIR = `basename ${PWD}`
|
|
tar: clean_all
|
|
@echo "Tar the directory: " ${MY_DIR}
|
|
@cd .. ;\
|
|
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
|
cd ${MY_DIR}
|
|
# tar cf `basename ${PWD}`.tar *
|
|
|
|
doc:
|
|
doxygen Doxyfile
|
|
|
|
#########################################################################
|
|
|
|
.cpp.o:
|
|
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
|
|
|
.c.o:
|
|
$(CC) -c $(CFLAGS) -o $@ $<
|
|
|
|
.f.o:
|
|
$(F77) -c $(FFLAGS) -o $@ $<
|
|
|
|
##################################################################################################
|
|
# some tools
|
|
# Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
|
# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
|
|
|
|
mem: inspector
|
|
prof: vtune
|
|
cache: inspector
|
|
|
|
gap_par_report:
|
|
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
|
|
|
# GUI for performance report
|
|
amplifier: ${PROGRAM}
|
|
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
|
amplxe-gui &
|
|
|
|
# GUI for Memory and Thread analyzer (race condition)
|
|
inspector: ${PROGRAM}
|
|
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
# inspxe-gui &
|
|
vtune-gui ./${PROGRAM} &
|
|
|
|
advisor:
|
|
source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
|
|
# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
|
|
advisor --collect=survey ./${PROGRAM}
|
|
# advisor --collect=roofline ./${PROGRAM}
|
|
advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
|
|
|
|
vtune:
|
|
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
|
# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
|
|
export ADVIXE_EXPERIMENTAL=roofline
|
|
vtune -collect hotspots ./${PROGRAM}
|
|
vtune -report hotspots -r r000hs > vtune.out
|
|
# vtune-gui ./${PROGRAM} &
|
|
|
|
icc-info:
|
|
icpc -# main.cpp
|
|
|
|
# MKL on AMD
|
|
# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
|
|
#
|
|
# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
|
|
# export MKL_DEBUG_CPU_TYPE=5
|
|
# export MKL_NUM_THRAEDS=1
|
|
# export MKL_DYNAMIC=false
|
|
# on Intel compiler
|
|
# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
|