Upload ex6 and ex7
This commit is contained in:
parent
1885d64c0a
commit
6c2d96ff4d
44 changed files with 15291 additions and 0 deletions
154
ex7/ex7_3/GCC_default.mk
Normal file
154
ex7/ex7_3/GCC_default.mk
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
#startmake as follows to avoid warnings caused by OpenMPI code
|
||||
# make 2>&1 | grep -v openmpi
|
||||
|
||||
|
||||
MPI_ROOT=/usr/bin/
|
||||
|
||||
CC = ${MPI_ROOT}mpicc
|
||||
CXX = ${MPI_ROOT}mpicxx
|
||||
F77 = ${MPI_ROOT}mpif77
|
||||
LINKER = ${CXX}
|
||||
|
||||
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
|
||||
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
|
||||
#MPIRUN = ${MPI_ROOT}mpiexec
|
||||
|
||||
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
|
||||
# solution according to https://github.com/open-mpi/ompi/issues/11063
|
||||
MPIRUN += -mca btl ^openib
|
||||
|
||||
# KFU:sauron
|
||||
CXXFLAGS += -I/software/boost/1_72_0/include
|
||||
|
||||
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
|
||||
|
||||
# WARNINGS += -Weffc++ -Wextra
|
||||
# -Wno-pragmas
|
||||
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
|
||||
# -ftree-vectorizer-verbose=5 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=2
|
||||
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
|
||||
# info on vectorization
|
||||
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
|
||||
#-fdump-tree-pre=stderr
|
||||
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
|
||||
#CXXFLAGS += ${VECTORIZE}
|
||||
|
||||
# -funroll-all-loops -msse3
|
||||
#GCC -march=knl -march=broadwell -march=haswell
|
||||
|
||||
# for debugging purpose (save code)
|
||||
# -fsanitize=leak # only one out the trhee can be used
|
||||
# -fsanitize=address
|
||||
# -fsanitize=thread
|
||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||
#CXXFLAGS += ${SANITARY}
|
||||
#LINKFLAGS +=${SANITARY}
|
||||
|
||||
# OpenMP
|
||||
CXXFLAGS += -fopenmp
|
||||
LINKFLAGS += -fopenmp
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
@echo
|
||||
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
|
||||
@echo
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
|
||||
@rm -rf html latex
|
||||
|
||||
run: ${PROGRAM}
|
||||
${MPIRUN} -np 4 ./$^
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
zip: clean
|
||||
@echo "Zip the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
# 2>&1 | grep -v openmpi
|
||||
|
||||
# special: get rid of compiler warnings genermaeate by openmpi-files
|
||||
#.cpp.o:
|
||||
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
|
||||
# |grep -sv openmpi
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
|
||||
# Graphical interface
|
||||
# valkyrie
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ 11
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
codecheck: $(SOURCES)
|
||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
54
ex7/ex7_3/ex7_3/Makefile
Executable file
54
ex7/ex7_3/ex7_3/Makefile
Executable file
|
|
@ -0,0 +1,54 @@
|
|||
#
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# COMPILER=GCC_SEQ_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
MAIN = main
|
||||
SOURCES = ${MAIN}.cpp vector_operations.cpp
|
||||
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = ${MAIN}.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
# -DNDEBUG
|
||||
# -pg slows down the code on my laptop when using CLANG_
|
||||
LINKFLAGS += -g
|
||||
#-pg
|
||||
#CXXFLAGS += -Q --help=optimizers
|
||||
#CXXFLAGS += -fopt-info
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
|
||||
#############################################################################
|
||||
# additional specific cleaning in this directory
|
||||
clean_all::
|
||||
@rm -f t.dat*
|
||||
|
||||
|
||||
#############################################################################
|
||||
# special testing
|
||||
# NPROCS = 4
|
||||
#
|
||||
TFILE = t.dat
|
||||
# TTMP = t.tmp
|
||||
#
|
||||
graph: $(PROGRAM)
|
||||
# @rm -f $(TFILE).*
|
||||
# next two lines only sequentially
|
||||
./$(PROGRAM)
|
||||
@mv $(TFILE).000 $(TFILE)
|
||||
# $(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
|
||||
# @echo " "; echo "Manipulate data for graphics."; echo " "
|
||||
# @cat $(TFILE).* > $(TTMP)
|
||||
# @sort -b -k 2 $(TTMP) -o $(TTMP).1
|
||||
# @sort -b -k 1 $(TTMP).1 -o $(TTMP).2
|
||||
# @awk -f nl.awk $(TTMP).2 > $(TFILE)
|
||||
# @rm -f $(TTMP).* $(TTMP) $(TFILE).*
|
||||
#
|
||||
-gnuplot jac.dem
|
||||
107
ex7/ex7_3/ex7_3/main.cpp
Normal file
107
ex7/ex7_3/ex7_3/main.cpp
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
#include <iostream>
|
||||
#include <mpi.h>
|
||||
#include <vector>
|
||||
|
||||
#include "vector_operations.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
int main(int argc , char **argv)
|
||||
{
|
||||
MPI_Init(&argc, &argv); // Initializes the MPI execution environment
|
||||
MPI_Comm const icomm(MPI_COMM_WORLD);
|
||||
int myrank;
|
||||
MPI_Comm_rank(icomm, &myrank);
|
||||
|
||||
int n = 20;
|
||||
vector<double> x(n);
|
||||
vector<double> y = x;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
x[i] = myrank*100 + (i % 5)*10 + i;
|
||||
y[i] = 1.0/(x[i]);
|
||||
}
|
||||
|
||||
if(myrank == 0) // so scalar product is well defined (avoid division by 0)
|
||||
y[0] = 0;
|
||||
|
||||
|
||||
// -------------------- E5 --------------------
|
||||
if (myrank == 0) cout << "-------------------- E5 --------------------" << endl;
|
||||
DebugVector(x, icomm);
|
||||
|
||||
|
||||
cout.flush();
|
||||
MPI_Barrier(icomm);
|
||||
// -------------------- E6 --------------------
|
||||
if (myrank == 0) cout << "-------------------- E6 --------------------" << endl;
|
||||
double scalar_product = par_scalar(x, y, icomm);
|
||||
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "<x,y> = " << scalar_product << endl << endl;
|
||||
}
|
||||
|
||||
|
||||
cout.flush();
|
||||
MPI_Barrier(icomm);
|
||||
// -------------------- E7 --------------------
|
||||
if (myrank == 0) cout << "-------------------- E7 --------------------" << endl;
|
||||
double xmin, xmax;
|
||||
par_minmax(x, xmin, xmax, icomm);
|
||||
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "Global min: " << xmin << endl;
|
||||
cout << "Global max: " << xmax << endl << endl;
|
||||
}
|
||||
|
||||
cout.flush();
|
||||
MPI_Barrier(icomm);
|
||||
// -------------------- E8 --------------------
|
||||
if (myrank == 0) cout << "-------------------- E8 --------------------" << endl;
|
||||
vector<double> x_new(n);
|
||||
|
||||
|
||||
cout.flush();
|
||||
MPI_Barrier(icomm);
|
||||
// All to all
|
||||
if (myrank == 0) cout << "----- All to all -----" << endl;
|
||||
auto sendbuf = x.data();
|
||||
int sendcount = 5;
|
||||
auto recvbuf = x_new.data();
|
||||
int recvcount = 5;
|
||||
MPI_Alltoall(sendbuf, sendcount, MPI_DOUBLE, recvbuf, recvcount, MPI_DOUBLE, icomm);
|
||||
|
||||
DebugVector(x_new, icomm);
|
||||
|
||||
|
||||
cout.flush();
|
||||
MPI_Barrier(icomm);
|
||||
// All to all v
|
||||
if (myrank == 0) cout << "----- All to all v -----" << endl;
|
||||
int sendcounts[4] = {5, 5, 5, 5};
|
||||
int senddispls[4] = {0, 5, 10, 15};
|
||||
int rcvcounts[4] = {5, 5, 5, 5};
|
||||
int rcvdispls[4] = {0, 5, 10, 15};
|
||||
MPI_Alltoallv(x.data(), sendcounts, senddispls, MPI_DOUBLE, x_new.data(), rcvcounts, rcvdispls, MPI_DOUBLE, icomm);
|
||||
|
||||
DebugVector(x_new, icomm);
|
||||
|
||||
|
||||
cout.flush();
|
||||
MPI_Barrier(icomm);
|
||||
// All to all (in place), sendcount and sendtype are ignored
|
||||
if (myrank == 0) cout << "----- All to all (in place) -----" << endl;
|
||||
MPI_Alltoall(MPI_IN_PLACE, sendcount, MPI_DOUBLE, x.data(), recvcount, MPI_DOUBLE, icomm);
|
||||
|
||||
DebugVector(x, icomm);
|
||||
|
||||
|
||||
|
||||
|
||||
MPI_Finalize(); // Terminates MPI execution environment
|
||||
|
||||
return 0;
|
||||
}
|
||||
110
ex7/ex7_3/ex7_3/vector_operations.cpp
Normal file
110
ex7/ex7_3/ex7_3/vector_operations.cpp
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
#include "vector_operations.h"
|
||||
#include <cassert>
|
||||
#include <cfloat>
|
||||
|
||||
void DebugVector(const vector<double> &xin, const MPI_Comm &icomm)
|
||||
{
|
||||
int myrank, numprocs;
|
||||
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
|
||||
MPI_Comm_size(icomm, &numprocs); // #MPI processes
|
||||
int ierr;
|
||||
|
||||
|
||||
int n = xin.size();
|
||||
|
||||
|
||||
int chosen_process;
|
||||
for (int k = 0; k < numprocs; ++k)
|
||||
{
|
||||
MPI_Barrier(icomm);
|
||||
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "Choose next process: ";
|
||||
cin >> chosen_process;
|
||||
|
||||
}
|
||||
ierr = MPI_Bcast(&chosen_process, 1, MPI_INT, 0, icomm); // broadcast value of "chosen_process" to all processes
|
||||
assert(ierr == 0);
|
||||
|
||||
MPI_Barrier(icomm);
|
||||
|
||||
if (chosen_process == myrank)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
cout << "x_" << i << " = " << xin[i] << "\t(Process " << myrank << ")" << endl;
|
||||
}
|
||||
cout.flush();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double par_scalar(const vector<double> &x, const vector<double> &y, const MPI_Comm &icomm)
|
||||
{
|
||||
int n = x.size();
|
||||
assert(n == (int)y.size());
|
||||
|
||||
double sum = 0.0;
|
||||
double local_sum = 0.0;
|
||||
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
local_sum += x[i]*y[i];
|
||||
}
|
||||
|
||||
int ierr = MPI_Allreduce(&local_sum, &sum, 1, MPI_DOUBLE, MPI_SUM, icomm); // reduce local sums to global sum
|
||||
assert(ierr == 0);
|
||||
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
void par_minmax(const vector<double> &x, double &global_min, double &global_max, const MPI_Comm &icomm)
|
||||
{
|
||||
int myrank, numprocs;
|
||||
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
|
||||
MPI_Comm_size(icomm, &numprocs); // #MPI processes
|
||||
|
||||
int n = x.size();
|
||||
|
||||
double local_min = DBL_MAX;
|
||||
double local_max = -DBL_MAX;
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (x[i] < local_min)
|
||||
local_min = x[i];
|
||||
if (x[i] > local_max)
|
||||
local_max = x[i];
|
||||
}
|
||||
|
||||
vector<double> local_mins(numprocs);
|
||||
vector<double> local_maxs(numprocs);
|
||||
MPI_Gather(&local_min, 1, MPI_DOUBLE, local_mins.data(), 1, MPI_DOUBLE, 0, icomm);
|
||||
MPI_Gather(&local_max, 1, MPI_DOUBLE, local_maxs.data(), 1, MPI_DOUBLE, 0, icomm);
|
||||
|
||||
if (myrank == 0)
|
||||
{
|
||||
global_min = DBL_MAX;
|
||||
global_max = -DBL_MAX;
|
||||
|
||||
for (int i = 0; i < numprocs; ++i)
|
||||
{
|
||||
if (local_mins[i] < global_min)
|
||||
global_min = local_mins[i];
|
||||
if (local_maxs[i] > global_max)
|
||||
global_max = local_maxs[i];
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Bcast(&global_min, 1, MPI_DOUBLE, 0, icomm); // make sure every process is up to date
|
||||
MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, icomm);
|
||||
|
||||
return;
|
||||
}
|
||||
10
ex7/ex7_3/ex7_3/vector_operations.h
Normal file
10
ex7/ex7_3/ex7_3/vector_operations.h
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
#include <mpi.h>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void DebugVector(const vector<double> &xin, const MPI_Comm &icomm);
|
||||
|
||||
double par_scalar(const vector<double> &x, const vector<double> &y, const MPI_Comm &icomm);
|
||||
|
||||
void par_minmax(const vector<double> &x, double &global_min, double &global_max, const MPI_Comm &icomm);
|
||||
Loading…
Add table
Add a link
Reference in a new issue