This commit is contained in:
dino.celebic 2025-12-26 20:26:31 +01:00
commit 2467b9ae03
44 changed files with 22631 additions and 0 deletions

View file

@ -0,0 +1,154 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
#startmake as follows to avoid warnings caused by OpenMPI code
# make 2>&1 | grep -v openmpi
MPI_ROOT=/usr/bin/
CC = ${MPI_ROOT}mpicc
CXX = ${MPI_ROOT}mpicxx
F77 = ${MPI_ROOT}mpif77
LINKER = ${CXX}
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
#MPIRUN = ${MPI_ROOT}mpiexec
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
# solution according to https://github.com/open-mpi/ompi/issues/11063
MPIRUN += -mca btl ^openib
# KFU:sauron
CXXFLAGS += -I/software/boost/1_72_0/include
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
# WARNINGS += -Weffc++ -Wextra
# -Wno-pragmas
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
# -ftree-vectorizer-verbose=5 -DNDEBUG
# -ftree-vectorizer-verbose=2
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# info on vectorization
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
#-fdump-tree-pre=stderr
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
#CXXFLAGS += ${VECTORIZE}
# -funroll-all-loops -msse3
#GCC -march=knl -march=broadwell -march=haswell
# for debugging purpose (save code)
# -fsanitize=leak # only one out the trhee can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS +=${SANITARY}
# OpenMP
CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
@rm -rf html latex
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# 2>&1 | grep -v openmpi
# special: get rid of compiler warnings genereate by openmpi-files
#.cpp.o:
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
# |grep -sv openmpi
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
# Graphical interface
# valkyrie
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers

23
ex7/code/task3/Makefile Normal file
View file

@ -0,0 +1,23 @@
#
# Compile with
# make 2>&1 | grep -v openmpi
# to avoid warnings caused by OpenMPI
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
MAIN = main
SOURCES = ${MAIN}.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = ${MAIN}.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS +=
include ./${COMPILER}default.mk

149
ex7/code/task3/main.cpp Normal file
View file

@ -0,0 +1,149 @@
#include <algorithm>
#include <iostream>
#include <mpi.h>
#include <vector>
using namespace std;
void DebugVector(const vector<double> &xin, MPI_Comm icomm) {
int rank, size;
MPI_Comm_rank(icomm, &rank);
MPI_Comm_size(icomm, &size);
int next_process = 0;
while (next_process != -1) {
// Print the local vector for each process
if (rank==next_process){
cout << "x_" << rank << " = ";
for (const auto& value : xin) {
cout << value << " ";
}
cout << endl;
}
MPI_Barrier(icomm);
if (rank == 0) {
cout << "Enter rank (0-" << size - 1 << ") or -1 to exit: ";
cin >> next_process;
}
MPI_Bcast(&next_process, 1, MPI_INT, 0, icomm);
MPI_Barrier(icomm);
}
}
double par_scalar(const vector<double>& x, const vector<double>& y, MPI_Comm icomm) {
double local_dot = 0.0;
for (size_t i = 0; i < x.size(); ++i) {
local_dot += x[i] * y[i];
}
double global_dot = 0.0;
MPI_Allreduce(&local_dot, &global_dot, 1, MPI_DOUBLE, MPI_SUM, icomm);
return global_dot;
}
tuple<double,double> find_global_minmax(const vector<double>& xin, MPI_Comm icomm) {
int rank, size;
MPI_Comm_rank(icomm, &rank);
MPI_Comm_size(icomm, &size);
// Find local min/max
double local_min = *min_element(xin.begin(), xin.end());
double local_max = *max_element(xin.begin(), xin.end());
// Gather local mins/maxs in vector
vector<double> local_min_vector(size);
vector<double> local_max_vector(size);
MPI_Gather(&local_min, 1, MPI_DOUBLE, local_min_vector.data(), 1, MPI_DOUBLE, 0, icomm);
MPI_Gather(&local_max, 1, MPI_DOUBLE, local_max_vector.data(), 1, MPI_DOUBLE, 0, icomm);
// Find global min/max
double global_min(0);
double global_max(0);
if (rank==0) {
global_min = *min_element(local_min_vector.begin(), local_min_vector.end());
global_max = *max_element(local_max_vector.begin(), local_max_vector.end());
}
// Broadcast global min/max
MPI_Bcast(&global_min, 1, MPI_DOUBLE, 0, icomm);
MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, icomm);
return make_tuple(global_min, global_max);
}
tuple<double,double> find_global_minmax_Allreduce(const vector<double>& xin, MPI_Comm icomm) {
double local_min = *min_element(xin.begin(), xin.end());
double local_max = *max_element(xin.begin(), xin.end());
double global_min(0);
double global_max(0);
MPI_Allreduce(&local_min, &global_min, 1, MPI_DOUBLE, MPI_MIN, icomm);
MPI_Allreduce(&local_max, &global_max, 1, MPI_DOUBLE, MPI_MAX, icomm);
return make_tuple(global_min, global_max);
}
int main(int argc, char *argv[]) {
MPI_Comm icomm = MPI_COMM_WORLD;
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(icomm, &rank);
MPI_Comm_size(icomm, &size);
if (rank==0) {
cout << "\n There are " << size << " processes running.\n";
}
// Create vectors
size_t n=20;
vector<double> local_vector(n);
vector<double> local_vector_inv(n);
for (size_t i=0; i<n; ++i) {
// local_vector[i] = rank*n + i+1;
// local_vector_inv[i] = 1.0/(local_vector[i]);
local_vector[i] = rank*100.0 + (i%5)*10.0 + i; // EX8
local_vector_inv[i] = 1.0/(local_vector[i]+1.0);
}
MPI_Barrier(icomm);
if (rank == 0) {printf("\n\n-------------- Task 5 --------------\n\n");}
DebugVector(local_vector, icomm);
MPI_Barrier(icomm);
if (rank == 0) {printf("\n\n-------------- Task 6 --------------\n\n");}
double result = par_scalar(local_vector, local_vector_inv, icomm);
if (rank == 0) {printf("Global scalar product: %f\n", result);}
MPI_Barrier(icomm);
if (rank == 0) {printf("\n\n-------------- Task 7 --------------\n\n");}
auto [min, max] = find_global_minmax(local_vector, icomm);
if (rank == 0) {printf("Global min: %.0f | global max: %.0f\n\n", min, max);}
MPI_Barrier(icomm);
tuple(min, max) = find_global_minmax_Allreduce(local_vector, icomm);
if (rank == 0) {printf("Global min: %.0f | global max: %.0f\n", min, max);}
MPI_Barrier(icomm);
if (rank == 0) {printf("\n\n-------------- Task 8 --------------\n\n");}
if (rank == 0) {printf("\n---- MPI_Alltoall ----\n");}
vector<double> recv(n);
MPI_Alltoall(local_vector.data(), 5, MPI_DOUBLE, recv.data(), 5, MPI_DOUBLE, icomm);
DebugVector(recv, icomm);
MPI_Barrier(icomm);
if (rank == 0) {printf("\n---- MPI_Alltoall using MPI_IN_PLACE ----\n");}
MPI_Alltoall(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, local_vector.data(), 5, MPI_DOUBLE, icomm);
DebugVector(local_vector, icomm);
MPI_Finalize();
return 0;
}