################################################################################################## # makefile for a simlple project # available targets: # make # make clean ################################################################################################## # Edit the following lines according to your needs ################################################################################################## PROGRAM = main SOURCES = main.cpp mylib.cpp HEADERS = mylib.h OBJECTS = $(SOURCES:.cpp=.o) ROOT_PGI = /state/partition1/apps/pgi/linux86-64/12.9 CUDA_HOME = /usr/local/cuda CC = $(ROOT_PGI)/bin/pgcc # CC = gcc -std=c99 # CXX = $(ROOT_PGI)/bin/pgCC CXX = $(ROOT_PGI)/bin/pgcpp F90 = $(ROOT_PGI)/bin/pgfortran LINKER = $(CC) WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow \ -Wredundant-decls -Winline # -Wunreachable-code # CXXFLAGS = -ffast-math -O3 -funroll-all-loops -DNDEBUG ${WARNING} -ftree-vectorizer-verbose=2 # CXXFLAGS = -ffast-math -O3 -funroll-all-loops -DNDEBUG ${WARNING} -ftree-vectorizer-verbose=2 \ # -fopenmp # PGI_PROFILING = -Mprof=lines –Minfo=ccff # # for OpenMP # CXXFLAGS = -O3 -mp # LDFLAGS = -lpthread # # for OpenACC # Target architecture (nvidia,host) # TA_ARCH = nvidia,host TA_ARCH = nvidia:cc20 # CFLAGS = -O3 -ta=$(TA_ARCH) CFLAGS = -g -fast -acc -ta=$(TA_ARCH) –Minfo $(PGI_PROFILING) LDFLAGS = -g -ta=$(TA_ARCH) -L$(ROOT_PGI)/lib $(PGI_PROFILING) # CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details # CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2 # #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp # FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp # LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp ################################################################################################## # Don't change anything below this line ################################################################################################## all: ${PROGRAM} ${PROGRAM}:: Makefile $(HEADERS) make clean ${PROGRAM}:: ${OBJECTS} $(LINKER) $^ -o $@ ${LDFLAGS} clean: rm -f ${PROGRAM} ${OBJECTS} *~ a.out .cpp.o: $(CXX) -c $(CXXFLAGS) $< .c.o: $(CC) -c $(CFLAGS) $< .f.o: $(F77) -c $(FFLAGS) $< ################################################################################################### # # some tools # # Cache behaviour (CXXFLAGS += -g tracks down to source lines) # cache: ${PROGRAM} # valgrind --tool=callgrind --simulate-cache=yes ./$^ # # kcachegrind callgrind.out. & # # # Check for wrong memory accesses, memory leaks, ... # # use smaller data sets # mem: ${PROGRAM} # valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ # Profiling options PGI, see: pgcollect -help CPU_PROF = -dcache GPU_PROF = -cuda=gmem #GPU_PROF = -cuda=branch:cc20 # PROF_FILE = pgprof.out prof: ${PROGRAM} # ./$^ # $(CUDA_HOME)/bin/nvvp & # export LD_LIBRARY_PATH=/state/partition1/apps/pgi/linux86-64/12.9/lib:$LD_LIBRARY_PATH $(ROOT_PGI)/bin/pgcollect $(GPU_PROF) ./$^ $(ROOT_PGI)/bin/pgprof -exe ./$^ $(PROF_FILE) & # Memory checker (slooooow!!!): # see doc at /usr/local/cuda/doc/cuda-memcheck.pdf mem: ${PROGRAM} $(CUDA_HOME)/bin memcheck./$^