Upload ex6 and ex7

2026-01-04 20:15:55 +01:00 · 2026-01-04 20:15:55 +01:00 · 6c2d96ff4d
commit 6c2d96ff4d
parent 1885d64c0a
44 changed files with 15291 additions and 0 deletions
--- a/ex7/ex7_3/GCC_default.mk
+++ b/ex7/ex7_3/GCC_default.mk
@ -0,0 +1,154 @@
+# Basic Defintions for using GNU-compiler suite sequentially
+# requires setting of COMPILER=GCC_
+
+#startmake as follows to avoid warnings caused by OpenMPI code
+#  make 2>&1 | grep -v openmpi
+
+
+MPI_ROOT=/usr/bin/
+
+CC	= ${MPI_ROOT}mpicc
+CXX     = ${MPI_ROOT}mpicxx
+F77	= ${MPI_ROOT}mpif77
+LINKER  = ${CXX}
+
+# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
+MPIRUN  = ${MPI_ROOT}mpirun --oversubscribe -display-map
+#MPIRUN  = ${MPI_ROOT}mpiexec
+
+# 2023, Oct 23:  ""WARNING: There is at least non-excluded one OpenFabrics device found,"
+# solution according to https://github.com/open-mpi/ompi/issues/11063
+MPIRUN += -mca btl ^openib 
+
+# KFU:sauron
+CXXFLAGS += -I/software/boost/1_72_0/include
+
+WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
+           -Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
+
+# WARNINGS += -Weffc++ -Wextra
+# -Wno-pragmas
+CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
+# -ftree-vectorizer-verbose=5  -DNDEBUG
+#          -ftree-vectorizer-verbose=2
+# CFLAGS	= -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
+# CFLAGS	= -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
+
+# info on vectorization
+#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
+#-fdump-tree-pre=stderr
+VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
+#CXXFLAGS += ${VECTORIZE}
+
+# -funroll-all-loops   -msse3
+#GCC  -march=knl -march=broadwell -march=haswell
+
+# for debugging purpose (save code)
+# -fsanitize=leak         # only one out the trhee can be used
+# -fsanitize=address
+# -fsanitize=thread
+SANITARY =  -fsanitize=address  -fsanitize=undefined -fsanitize=null -fsanitize=return \
+ -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
+ -fsanitize=bool -fsanitize=enum -fsanitize=vptr
+#CXXFLAGS  += ${SANITARY}
+#LINKFLAGS +=${SANITARY}
+
+# OpenMP
+CXXFLAGS += -fopenmp
+LINKFLAGS += -fopenmp
+
+default: ${PROGRAM}
+
+${PROGRAM}:	${OBJECTS}
+	$(LINKER)  $^  ${LINKFLAGS} -o $@
+	@echo
+	@echo "Start with :  $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
+	@echo
+
+clean:
+	@rm -f ${PROGRAM} ${OBJECTS} gmon.out
+
+clean_all:: clean
+	@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
+	@rm -rf html latex
+
+run: ${PROGRAM}
+	${MPIRUN} -np 4 ./$^
+
+# tar the current directory
+MY_DIR = `basename ${PWD}`
+tar: clean_all
+	@echo "Tar the directory: " ${MY_DIR}
+	@cd .. ;\
+	tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+# 	tar cf `basename ${PWD}`.tar *
+
+zip: clean
+	@echo "Zip the directory: " ${MY_DIR}
+	@cd .. ;\
+	zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
+	cd ${MY_DIR}
+
+doc:
+	doxygen Doxyfile
+
+#########################################################################
+
+.cpp.o:
+	$(CXX) -c $(CXXFLAGS) -o $@ $<
+#	 2>&1 | grep -v openmpi
+
+# special: get rid of compiler warnings genermaeate by openmpi-files
+#.cpp.o:
+# 	@$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
+# 	|grep -sv openmpi
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.f.o:
+	$(F77) -c $(FFLAGS) -o $@ $<
+
+##################################################################################################
+#    some tools
+# Cache behaviour (CXXFLAGS += -g  tracks down to source lines; no -pg in linkflags)
+cache: ${PROGRAM}
+	valgrind --tool=callgrind --simulate-cache=yes ./$^
+#	kcachegrind callgrind.out.<pid> &
+	kcachegrind `ls -1tr  callgrind.out.* |tail -1`
+
+# Check for wrong memory accesses, memory leaks, ...
+# use smaller data sets
+# no "-pg"  in compile/link options
+mem: ${PROGRAM}
+	valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
+	# Graphical interface
+	# valkyrie
+
+#  Simple run time profiling of your code
+#  CXXFLAGS += -g -pg
+#  LINKFLAGS += -pg
+prof: ${PROGRAM}
+	perf record ./$^
+	perf report
+#	gprof -b ./$^ > gp.out
+#	kprof -f gp.out -p gprof &
+
+#Trace your heap:
+#> heaptrack ./main.GCC_
+#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
+heap: ${PROGRAM}
+	heaptrack ./$^ 11
+	heaptrack_gui  `ls -1tr  heaptrack.$^.* |tail -1` &
+
+codecheck: $(SOURCES)
+	cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
+
+
+########################################################################
+#  get the detailed  status of all optimization flags
+info:
+	echo "detailed  status of all optimization flags"
+	$(CXX) --version
+	$(CXX) -Q $(CXXFLAGS) --help=optimizers
--- a/ex7/ex7_3/ex7_3/Makefile
+++ b/ex7/ex7_3/ex7_3/Makefile
@ -0,0 +1,54 @@
+#
+# use GNU-Compiler tools
+COMPILER=GCC_
+# COMPILER=GCC_SEQ_
+# alternatively from the shell
+# export COMPILER=GCC_
+# or, alternatively from the shell
+# make COMPILER=GCC_
+
+MAIN = main
+SOURCES = ${MAIN}.cpp vector_operations.cpp
+
+OBJECTS = $(SOURCES:.cpp=.o)
+
+PROGRAM	= ${MAIN}.${COMPILER}
+
+# uncomment the next to lines for debugging and detailed performance analysis
+CXXFLAGS += -g
+# -DNDEBUG
+# -pg slows down the code on my laptop when using CLANG_
+LINKFLAGS += -g
+#-pg
+#CXXFLAGS += -Q --help=optimizers
+#CXXFLAGS += -fopt-info
+
+include ../${COMPILER}default.mk
+
+#############################################################################
+# additional specific cleaning in this directory
+clean_all::
+	@rm -f t.dat*
+
+
+#############################################################################
+# special testing
+# NPROCS	= 4
+#
+TFILE	= t.dat
+# TTMP	= t.tmp
+#
+graph: $(PROGRAM)
+# 	@rm -f $(TFILE).*
+	# next two lines only sequentially
+	./$(PROGRAM)
+	@mv  $(TFILE).000 $(TFILE)
+# 	$(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
+# 	@echo " "; echo "Manipulate data for graphics."; echo " "
+# 	@cat $(TFILE).* > $(TTMP)
+# 	@sort -b -k 2    $(TTMP)   -o $(TTMP).1
+# 	@sort -b -k 1    $(TTMP).1 -o $(TTMP).2
+# 	@awk  -f nl.awk  $(TTMP).2  > $(TFILE)
+# 	@rm -f $(TTMP).* $(TTMP) $(TFILE).*
+#
+	-gnuplot jac.dem
--- a/ex7/ex7_3/ex7_3/main.cpp
+++ b/ex7/ex7_3/ex7_3/main.cpp
@ -0,0 +1,107 @@
+#include <iostream>
+#include <mpi.h> 
+#include <vector>
+
+#include "vector_operations.h"
+
+using namespace std;
+
+
+int main(int argc , char **argv)
+{
+    MPI_Init(&argc, &argv);     // Initializes the MPI execution environment 
+    MPI_Comm const icomm(MPI_COMM_WORLD);
+    int myrank;
+    MPI_Comm_rank(icomm, &myrank);
+
+    int n = 20;
+    vector<double> x(n);
+    vector<double> y = x;
+    for (int i = 0; i < n; ++i)
+    {
+        x[i] = myrank*100 + (i % 5)*10 + i;
+        y[i] = 1.0/(x[i]);
+    }
+
+    if(myrank == 0) // so scalar product is well defined (avoid division by 0)
+        y[0] = 0;
+    
+
+    // -------------------- E5 --------------------
+    if (myrank == 0) cout << "-------------------- E5 --------------------" << endl;
+    DebugVector(x, icomm);
+
+
+    cout.flush();
+    MPI_Barrier(icomm);
+    // -------------------- E6 --------------------
+    if (myrank == 0) cout << "-------------------- E6 --------------------" << endl;
+    double scalar_product = par_scalar(x, y, icomm);
+
+    if (myrank == 0)
+    {
+        cout << "<x,y> = " << scalar_product << endl << endl;
+    }
+    
+
+    cout.flush();
+    MPI_Barrier(icomm);
+    // -------------------- E7 --------------------
+    if (myrank == 0) cout << "-------------------- E7 --------------------" << endl;
+    double xmin, xmax;
+    par_minmax(x, xmin, xmax, icomm);
+
+    if (myrank == 0)
+    {
+        cout << "Global min: " << xmin << endl;
+        cout << "Global max: " << xmax << endl << endl;
+    }
+
+    cout.flush();
+    MPI_Barrier(icomm);
+    // -------------------- E8 --------------------
+    if (myrank == 0) cout << "-------------------- E8 --------------------" << endl;
+    vector<double> x_new(n);
+
+
+    cout.flush();
+    MPI_Barrier(icomm);
+    // All to all
+    if (myrank == 0) cout << "----- All to all -----" << endl;
+    auto sendbuf = x.data();
+    int sendcount = 5;
+    auto recvbuf = x_new.data();
+    int recvcount = 5;
+    MPI_Alltoall(sendbuf, sendcount, MPI_DOUBLE, recvbuf, recvcount, MPI_DOUBLE, icomm);
+
+    DebugVector(x_new, icomm);
+
+
+    cout.flush();
+    MPI_Barrier(icomm);
+    // All to all v
+    if (myrank == 0) cout << "----- All to all v -----" << endl;
+    int sendcounts[4] = {5, 5, 5, 5};
+    int senddispls[4] = {0, 5, 10, 15};
+    int rcvcounts[4] = {5, 5, 5, 5};
+    int rcvdispls[4] = {0, 5, 10, 15};
+    MPI_Alltoallv(x.data(), sendcounts, senddispls, MPI_DOUBLE, x_new.data(), rcvcounts, rcvdispls, MPI_DOUBLE, icomm);
+
+    DebugVector(x_new, icomm);
+
+
+    cout.flush();
+    MPI_Barrier(icomm);
+    // All to all (in place), sendcount and sendtype are ignored
+    if (myrank == 0) cout << "----- All to all (in place) -----" << endl;
+    MPI_Alltoall(MPI_IN_PLACE, sendcount, MPI_DOUBLE, x.data(), recvcount, MPI_DOUBLE, icomm);
+
+    DebugVector(x, icomm);
+
+
+
+    
+    MPI_Finalize();     // Terminates MPI execution environment
+
+    return 0;
+}
--- a/ex7/ex7_3/ex7_3/vector_operations.cpp
+++ b/ex7/ex7_3/ex7_3/vector_operations.cpp
@ -0,0 +1,110 @@
+#include "vector_operations.h"
+#include <cassert>
+#include <cfloat>
+
+void DebugVector(const vector<double> &xin, const MPI_Comm &icomm)
+{
+    int myrank, numprocs;
+    MPI_Comm_rank(icomm, &myrank);                // my MPI-rank
+    MPI_Comm_size(icomm, &numprocs);              // #MPI processes
+    int ierr;
+    
+
+    int n = xin.size();
+    
+    
+    int chosen_process;
+    for (int k = 0; k < numprocs; ++k)
+    {   
+        MPI_Barrier(icomm);
+
+        if (myrank == 0)
+        {   
+            cout << "Choose next process: ";
+            cin >> chosen_process;
+            
+        }
+        ierr = MPI_Bcast(&chosen_process, 1, MPI_INT, 0, icomm); // broadcast value of "chosen_process" to all processes
+        assert(ierr == 0);
+
+        MPI_Barrier(icomm);
+
+        if (chosen_process == myrank)
+        {
+            for (int i = 0; i < n; ++i)
+            {
+                cout << "x_" << i << " = " << xin[i] << "\t(Process " << myrank << ")" << endl;
+            }
+            cout.flush();
+        }
+    }
+    return;
+}
+
+
+
+double par_scalar(const vector<double> &x, const vector<double> &y, const MPI_Comm &icomm)
+{
+    int n = x.size();
+    assert(n == (int)y.size());
+
+    double sum = 0.0;
+    double local_sum = 0.0;
+
+    
+    for (int i = 0; i < n; ++i)
+    {
+        local_sum += x[i]*y[i];
+    }
+
+    int ierr = MPI_Allreduce(&local_sum, &sum, 1, MPI_DOUBLE, MPI_SUM, icomm);  // reduce local sums to global sum
+    assert(ierr == 0);
+
+
+    return sum;
+}
+
+
+void par_minmax(const vector<double> &x, double &global_min, double &global_max, const MPI_Comm &icomm)
+{
+    int myrank, numprocs;
+    MPI_Comm_rank(icomm, &myrank);                // my MPI-rank
+    MPI_Comm_size(icomm, &numprocs);              // #MPI processes
+
+    int n = x.size();
+
+    double local_min = DBL_MAX;
+    double local_max = -DBL_MAX;
+    
+    for (int i = 0; i < n; ++i)
+    {
+        if (x[i] < local_min)
+            local_min = x[i];
+        if (x[i] > local_max)
+            local_max = x[i];
+    }
+
+    vector<double> local_mins(numprocs);
+    vector<double> local_maxs(numprocs);
+    MPI_Gather(&local_min, 1, MPI_DOUBLE, local_mins.data(), 1, MPI_DOUBLE, 0, icomm);
+    MPI_Gather(&local_max, 1, MPI_DOUBLE, local_maxs.data(), 1, MPI_DOUBLE, 0, icomm);
+
+    if (myrank == 0)
+    {
+        global_min = DBL_MAX;
+        global_max = -DBL_MAX;
+
+        for (int i = 0; i < numprocs; ++i)
+        {
+            if (local_mins[i] < global_min)
+                global_min = local_mins[i];
+            if (local_maxs[i] > global_max)
+                global_max = local_maxs[i];
+        }
+    }
+
+    MPI_Bcast(&global_min, 1, MPI_DOUBLE, 0, icomm); // make sure every process is up to date
+    MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, icomm);
+
+    return;
+}
--- a/ex7/ex7_3/ex7_3/vector_operations.h
+++ b/ex7/ex7_3/ex7_3/vector_operations.h
@ -0,0 +1,10 @@
+#include <mpi.h>
+#include <vector>
+
+using namespace std;
+
+void DebugVector(const vector<double> &xin, const MPI_Comm &icomm);
+
+double par_scalar(const vector<double> &x, const vector<double> &y, const MPI_Comm &icomm);
+
+void par_minmax(const vector<double> &x, double &global_min, double &global_max, const MPI_Comm &icomm);