ex7

2025-12-26 20:26:31 +01:00 · 2025-12-26 20:26:31 +01:00 · 2467b9ae03
commit 2467b9ae03
parent 89326dd329
44 changed files with 22631 additions and 0 deletions
--- a/ex7/code/task3/GCC_default.mk
+++ b/ex7/code/task3/GCC_default.mk
@ -0,0 +1,154 @@
+# Basic Defintions for using GNU-compiler suite sequentially
+# requires setting of COMPILER=GCC_
+
+#startmake as follows to avoid warnings caused by OpenMPI code
+#  make 2>&1 | grep -v openmpi
+
+
+MPI_ROOT=/usr/bin/
+
+CC	= ${MPI_ROOT}mpicc
+CXX     = ${MPI_ROOT}mpicxx
+F77	= ${MPI_ROOT}mpif77
+LINKER  = ${CXX}
+
+# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
+MPIRUN  = ${MPI_ROOT}mpirun --oversubscribe -display-map
+#MPIRUN  = ${MPI_ROOT}mpiexec
+
+# 2023, Oct 23:  ""WARNING: There is at least non-excluded one OpenFabrics device found,"
+# solution according to https://github.com/open-mpi/ompi/issues/11063
+MPIRUN += -mca btl ^openib 
+
+# KFU:sauron
+CXXFLAGS += -I/software/boost/1_72_0/include
+
+WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
+           -Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
+
+# WARNINGS += -Weffc++ -Wextra
+# -Wno-pragmas
+CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
+# -ftree-vectorizer-verbose=5  -DNDEBUG
+#          -ftree-vectorizer-verbose=2
+# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
+# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
+
+# info on vectorization
+#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
+#-fdump-tree-pre=stderr
+VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
+#CXXFLAGS += ${VECTORIZE}
+
+# -funroll-all-loops   -msse3
+#GCC  -march=knl -march=broadwell -march=haswell
+
+# for debugging purpose (save code)
+# -fsanitize=leak         # only one out the trhee can be used
+# -fsanitize=address
+# -fsanitize=thread
+SANITARY =  -fsanitize=address  -fsanitize=undefined -fsanitize=null -fsanitize=return \
+ -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
+ -fsanitize=bool -fsanitize=enum -fsanitize=vptr
+#CXXFLAGS  += ${SANITARY}
+#LINKFLAGS +=${SANITARY}
+
+# OpenMP
+CXXFLAGS += -fopenmp
+LINKFLAGS += -fopenmp
+
+default: ${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+	@echo
+	@echo "Start with :  $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
+	@echo
+
+clean:
+	@rm -f ${PROGRAM} ${OBJECTS} gmon.out
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
+	@rm -rf html latex
+
+run: ${PROGRAM}
+	${MPIRUN} -np 4 ./$^
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+zip: clean
+	@echo "Zip the directory: " ${MY_DIR}
+	@cd .. ;\
+	zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+#	 2>&1 | grep -v openmpi
+
+# special: get rid of compiler warnings genereate by openmpi-files
+#.cpp.o:
+# 	@$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
+# 	|grep -sv openmpi
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+#    some tools
+# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
+cache: ${PROGRAM}
+	valgrind --tool=callgrind --simulate-cache=yes ./$^
+#	kcachegrind callgrind.out.<pid> &
+	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
+
+# Check for wrong memory accesses, memory leaks, ...
+# use smaller data sets
+# no "-pg"  in compile/link options
+mem: ${PROGRAM}
+	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
+	# Graphical interface
+	# valkyrie
+
+#  Simple run time profiling of your code
+#  CXXFLAGS += -g -pg
+#  LINKFLAGS += -pg
+prof: ${PROGRAM}
+	perf record ./$^
+	perf report
+#	gprof -b ./$^ > gp.out
+#	kprof -f gp.out -p gprof &
+
+#Trace your heap:
+#> heaptrack ./main.GCC_
+#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
+heap: ${PROGRAM}
+	heaptrack ./$^ 11
+	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
+
+codecheck: $(SOURCES)
+	cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
+
+
+########################################################################
+#  get the detailed  status of all optimization flags
+info:
+	echo "detailed  status of all optimization flags"
+	$(CXX) --version
+	$(CXX) -Q $(CXXFLAGS) --help=optimizers
--- a/ex7/code/task3/Makefile
+++ b/ex7/code/task3/Makefile
@ -0,0 +1,23 @@
+#
+#  Compile with
+#   make 2>&1 | grep -v openmpi
+#  to avoid warnings caused by OpenMPI
+
+# use GNU-Compiler tools
+COMPILER=GCC_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+MAIN = main
+SOURCES = ${MAIN}.cpp
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= ${MAIN}.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+LINKFLAGS +=
+
+include ./${COMPILER}default.mk
--- a/ex7/code/task3/main.cpp
+++ b/ex7/code/task3/main.cpp
@ -0,0 +1,149 @@
+#include <algorithm>
+#include <iostream>
+#include <mpi.h>
+#include <vector>
+using namespace std;
+
+
+void DebugVector(const vector<double> &xin, MPI_Comm icomm) {
+    int rank, size;
+    MPI_Comm_rank(icomm, &rank);
+    MPI_Comm_size(icomm, &size);
+
+    int next_process = 0;
+    while (next_process != -1) {
+        // Print the local vector for each process
+        if (rank==next_process){
+            cout << "x_" << rank << " = ";
+            for (const auto& value : xin) {
+                cout << value << " ";
+            }
+            cout << endl;
+        }
+        MPI_Barrier(icomm);
+
+        if (rank == 0) {
+            cout << "Enter rank (0-" << size - 1 << ") or -1 to exit: ";
+            cin >> next_process;
+        }
+        MPI_Bcast(&next_process, 1, MPI_INT, 0, icomm);
+        MPI_Barrier(icomm);
+    }
+}
+
+
+
+double par_scalar(const vector<double>& x, const vector<double>& y, MPI_Comm icomm) {
+ 
+    double local_dot = 0.0;
+    for (size_t i = 0; i < x.size(); ++i) {
+        local_dot += x[i] * y[i];
+    }
+
+    double global_dot = 0.0;
+    MPI_Allreduce(&local_dot, &global_dot, 1, MPI_DOUBLE, MPI_SUM, icomm);
+
+    return global_dot;
+}
+
+
+tuple<double,double> find_global_minmax(const vector<double>& xin, MPI_Comm icomm) {
+    int rank, size;
+    MPI_Comm_rank(icomm, &rank);
+    MPI_Comm_size(icomm, &size);
+    // Find local min/max
+    double local_min = *min_element(xin.begin(), xin.end());
+    double local_max = *max_element(xin.begin(), xin.end());
+    // Gather local mins/maxs in vector
+    vector<double> local_min_vector(size);
+    vector<double> local_max_vector(size);
+    MPI_Gather(&local_min, 1, MPI_DOUBLE, local_min_vector.data(), 1, MPI_DOUBLE, 0, icomm);
+    MPI_Gather(&local_max, 1, MPI_DOUBLE, local_max_vector.data(), 1, MPI_DOUBLE, 0, icomm);
+    // Find global min/max
+    double global_min(0);
+    double global_max(0);
+    if (rank==0) {
+        global_min = *min_element(local_min_vector.begin(), local_min_vector.end());
+        global_max = *max_element(local_max_vector.begin(), local_max_vector.end());
+    }
+    // Broadcast global min/max
+    MPI_Bcast(&global_min, 1, MPI_DOUBLE, 0, icomm);
+    MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, icomm);
+    
+    return make_tuple(global_min, global_max);
+}
+
+
+tuple<double,double> find_global_minmax_Allreduce(const vector<double>& xin, MPI_Comm icomm) {
+    double local_min = *min_element(xin.begin(), xin.end());
+    double local_max = *max_element(xin.begin(), xin.end());
+    double global_min(0);
+    double global_max(0);
+    MPI_Allreduce(&local_min, &global_min, 1, MPI_DOUBLE, MPI_MIN, icomm);
+    MPI_Allreduce(&local_max, &global_max, 1, MPI_DOUBLE, MPI_MAX, icomm);
+    return make_tuple(global_min, global_max);
+}
+
+
+int main(int argc, char *argv[]) {
+    MPI_Comm icomm = MPI_COMM_WORLD;
+    MPI_Init(&argc, &argv);
+    int rank, size;
+    MPI_Comm_rank(icomm, &rank);
+    MPI_Comm_size(icomm, &size);
+
+    if (rank==0) {
+        cout << "\n There are " << size << " processes running.\n";
+    }
+
+    // Create vectors
+    size_t n=20;
+    vector<double> local_vector(n);
+    vector<double> local_vector_inv(n);
+    for (size_t i=0; i<n; ++i) {
+        // local_vector[i] = rank*n + i+1;                  
+        // local_vector_inv[i] = 1.0/(local_vector[i]);
+
+        local_vector[i] = rank*100.0 + (i%5)*10.0 + i;      // EX8
+        local_vector_inv[i] = 1.0/(local_vector[i]+1.0);
+    }
+
+    
+    MPI_Barrier(icomm);
+    if (rank == 0) {printf("\n\n-------------- Task 5 --------------\n\n");}
+    DebugVector(local_vector, icomm);
+
+
+    MPI_Barrier(icomm);
+    if (rank == 0) {printf("\n\n-------------- Task 6 --------------\n\n");}
+    double result = par_scalar(local_vector, local_vector_inv, icomm);
+    if (rank == 0) {printf("Global scalar product: %f\n", result);}
+
+
+    MPI_Barrier(icomm);
+    if (rank == 0) {printf("\n\n-------------- Task 7 --------------\n\n");}
+    auto [min, max] = find_global_minmax(local_vector, icomm);
+    if (rank == 0) {printf("Global min: %.0f | global max: %.0f\n\n", min, max);}
+    
+    MPI_Barrier(icomm);
+    tuple(min, max) = find_global_minmax_Allreduce(local_vector, icomm);
+    if (rank == 0) {printf("Global min: %.0f | global max: %.0f\n", min, max);}
+
+    
+    MPI_Barrier(icomm);
+    if (rank == 0) {printf("\n\n-------------- Task 8 --------------\n\n");}
+
+    if (rank == 0) {printf("\n---- MPI_Alltoall ----\n");}
+    vector<double> recv(n);
+    MPI_Alltoall(local_vector.data(), 5, MPI_DOUBLE, recv.data(), 5, MPI_DOUBLE, icomm);
+    DebugVector(recv, icomm);
+    
+    MPI_Barrier(icomm);
+    if (rank == 0) {printf("\n---- MPI_Alltoall using MPI_IN_PLACE ----\n");}
+    MPI_Alltoall(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, local_vector.data(), 5, MPI_DOUBLE, icomm);
+    DebugVector(local_vector, icomm);
+
+    
+    MPI_Finalize();
+    return 0;
+}