Upload ex6 and ex7

This commit is contained in:
jakob.schratter 2026-01-04 20:15:55 +01:00
commit 6c2d96ff4d
44 changed files with 15291 additions and 0 deletions

83
ex6/adaptivity_schemes.py Normal file
View file

@ -0,0 +1,83 @@
import numpy as np
def flux_jumps(mesh, u):
N = len(mesh) - 1 # number of elements
jumps = np.zeros(N + 1) # N-1 edges
for i in range(1, N):
upper = (u[i + 1] - u[i])/(mesh[i + 1] - mesh[i])
lower = (u[i] - u[i - 1])/(mesh[i] - mesh[i - 1])
jumps[i] = upper - lower
return jumps
def residual_errors(mesh, u):
N = len(mesh) - 1 # number of elements
errors = np.zeros(N)
jumps = flux_jumps(mesh, u)
for i in range(N):
errors[i] = np.sqrt((jumps[i]**2 + jumps[i + 1]**2)/2) # Braess (8.10)
#print("errors:\n", errors)
return errors
def adapt_h(mesh, u, alpha):
N = len(mesh) - 1 # number of elements
errors = residual_errors(mesh, u)
threshhold = alpha * abs(max(errors))
# refine mesh
refined_mesh = [mesh[0]]
for i in range(N):
if abs(errors[i]) <= threshhold:
refined_mesh.append(mesh[i + 1])
else:
refined_mesh.append(mesh[i] + (mesh[i + 1] - mesh[i])/2)
refined_mesh.append(mesh[i + 1])
#print("refined mesh:\n", refined_mesh)
return refined_mesh
def adapt_r(mesh, u):
N = len(mesh) - 1 # number of elements
rho = np.abs(flux_jumps(mesh, u)) # rho ... mesh density function
p = np.zeros(N) # piecewise constant function on the mesh elements
for i in range(N):
p[i] = (rho[i] + rho[i + 1])/2
P = np.zeros(N + 1) # \int_0^{x_j} p(x) dx for j = 1,...,N
for j in range(1, N + 1):
h_j = mesh[j] - mesh[j - 1]
P[j] = P[j - 1] + h_j*p[j - 1] # add integral over j-th interval
moved_mesh = np.zeros(N + 1)
moved_mesh[0] = mesh[0]
moved_mesh[-1] = mesh[-1]
for j in range(1, N): # calculate the new nodes with De Boor's algorithm
xi_j = j/N
k = np.searchsorted(P, xi_j*P[-1], side="left") # searches for index k, such that xi_j*P[-1] > P[k]
assert(P[k - 1] < xi_j*P[-1])
assert(xi_j*P[-1] <= P[k])
moved_mesh[j] = mesh[k - 1] + (xi_j*P[-1] - P[k - 1])/p[k - 1]
print("orign_mesh[j] =", mesh[j])
print("moved_mesh[j] =", moved_mesh[j])
print("done\n")
#print("moved mesh:\n", moved_mesh)
return moved_mesh

139
ex6/ex_6A.py Normal file
View file

@ -0,0 +1,139 @@
import numpy as np
import scipy.integrate as integrate
import matplotlib.pyplot as plt
import adaptivity_schemes
np.set_printoptions(precision=2)
def Solve_6A(mesh, p):
N = len(mesh) - 1 # number of elements
f = lambda x : 2*p**3*x/((p**2*x**2 + 1)**2)
g_b = p/(p**2 + 1)
A = np.zeros((N + 1, N + 1))
f_vec = np.zeros(N + 1)
for i in range(1, N + 1):
h = mesh[i] - mesh[i - 1]
a_11 = 1./h
a_12 = -1./h
a_21 = -1./h
a_22 = 1./h
A[i - 1, i - 1] += a_11
A[i - 1, i] += a_12
A[i, i - 1] += a_21
A[i, i] += a_22
phi_lower = lambda x : (mesh[i] - x)/h
f_vec[i-1] += integrate.quad(lambda x : f(x)*phi_lower(x), mesh[i - 1], mesh[i])[0]
phi_upper = lambda x : (x - mesh[i - 1])/h
f_vec[i] += integrate.quad(lambda x : f(x)*phi_upper(x), mesh[i - 1], mesh[i])[0]
# take Neumann data into account
A[N, N] += 0
f_vec[N] += g_b
# take Dirichlet data into account
u_g = np.zeros(N + 1)
u_g[0] = -np.arctan(p)
#print("u_g =\n", u_g)
# remove first row of A
A_g = A[1:N+1, :]
#print("A_g =\n", A_g)
# remove first row of f_vec
f_g = f_vec[1:N+1]
# assemble RHS with dirichlet data
f_g -= A_g.dot(u_g)
#print("f_g =\n", f_g)
# matrix for the inner nodes (excluding nodes with dirichlet bcs)
A_0 = A[1:N+1, 1:N+1]
#print(A_0)
# solve for u_0 (free dofs)
u_0 = np.linalg.solve(A_0, f_g)
# assemble "u = u_0 + u_g"
u = np.concatenate([[u_g[0]], u_0])
return u
p = 100
########## h-adaptivity ##########
N = 5 # number of elements
mesh = np.linspace(-1, 1, N + 1)
u = Solve_6A(mesh, p)
plt.plot(mesh, u, '-o')
plt.grid()
plt.xlabel('x')
plt.ylabel('u_h(x)')
plt.title("h-adaptivity")
N_vec = ["0 refinements, " + str(N) + " elements"]
refinements = 4 # number of refinements
for i in range(refinements):
mesh = adaptivity_schemes.adapt_h(mesh, u, 0.7)
u = Solve_6A(mesh, p)
plt.plot(mesh, u, '-o')
N_vec.append(str(i + 1) + " refinements, " + str(len(mesh) - 1) + " elements")
# plot exact solution
x = np.linspace(-1, 1, 50)
plt.plot(x, np.arctan(p*x))
N_vec.append("exact")
plt.legend(N_vec)
plt.show()
# ########## r-adaptivity ##########
N = 5
mesh = np.linspace(-1, 1, N + 1)
u = Solve_6A(mesh, p)
plt.plot(mesh, u, '-o')
title = "r-adaptivity with " + str(N) + " elements"
plt.title(title)
adaptations_vec = ["0 adaptations"]
adaptations = 5 # number of iterations
for i in range(adaptations):
mesh = adaptivity_schemes.adapt_r(mesh, u)
u = Solve_6A(mesh, p)
plt.plot(mesh, u, '-o')
adaptations_vec.append(str(i + 1) + " adaptations")
# plot exact solution
x = np.linspace(-1, 1, 50)
plt.plot(x, np.arctan(p*x))
adaptations_vec.append("exact")
plt.legend(adaptations_vec)
plt.xlabel('x')
plt.ylabel('u_h(x)')
plt.grid()
plt.show()

120
ex6/ex_6B.py Normal file
View file

@ -0,0 +1,120 @@
import numpy as np
import matplotlib.pyplot as plt
import adaptivity_schemes
np.set_printoptions(precision=2)
def lam_func(x):
n = len(x)
lam_vec = np.zeros(n)
for i in range(n):
if (x[i] > 1/np.sqrt(2)):
lam_vec[i] = 10
else:
lam_vec[i] = 1
return lam_vec
def Solve_6B(mesh):
N = len(mesh) - 1 # number of elements
A = np.zeros((N + 1, N + 1))
lam_vec = lam_func(mesh)
for i in range(1, N + 1):
h = mesh[i] - mesh[i - 1]
a_11 = lam_vec[i]/h
a_12 = -lam_vec[i]/h
a_21 = -lam_vec[i]/h
a_22 = lam_vec[i]/h
A[i - 1, i - 1] += a_11
A[i - 1, i] += a_12
A[i, i - 1] += a_21
A[i, i] += a_22
#print("A =\n", A)
# take dirichlet data into account
u_g = np.zeros(N + 1)
u_g[0] = 0
u_g[N] = 1
#print("u_g =\n", u_g)
# remove first and last row of A
A_g = A[1:N, :]
#print("A_g =\n", A_g)
# assemble RHS with dirichlet data
f = -A_g.dot(u_g)
#print(f)
# matrix for the inner nodes (excluding nodes with dirichlet bcs)
A_0 = A[1:N, 1:N]
#print(A_0)
# solve for u_0 (free dofs)
u_0 = np.linalg.solve(A_0, f)
# assemble "u = u_0 + u_g"
u = np.concatenate([[0], u_0, [1]])
#print("u =\n", u)
return u
########## h-adaptivity ##########
N = 2 # number of elements
mesh = np.linspace(0, 1, N + 1)
u = Solve_6B(mesh)
plt.plot(mesh, u, '-o')
plt.grid()
plt.xlabel('x')
plt.ylabel('u_h(x)')
plt.title("h-adaptivity")
N_vec = ["0 refinements, " + str(N) + " elements"]
refinements = 5 # number of refinements
for i in range(refinements):
mesh = adaptivity_schemes.adapt_h(mesh, lam_func(mesh)*u, 0.9)
u = Solve_6B(mesh)
plt.plot(mesh, u, '-o')
N_vec.append(str(i + 1) + " refinements, " + str(len(mesh) - 1) + " elements")
plt.legend(N_vec)
plt.show()
########## r-adaptivity ##########
N = 5
mesh = np.linspace(0, 1, N + 1)
u = Solve_6B(mesh)
plt.plot(mesh, u, '-o')
title = "r-adaptivity with " + str(N) + " elements"
plt.title(title)
adaptations_vec = ["0 adaptations"]
adaptations = 4 # number of iterations
for i in range(adaptations):
mesh = adaptivity_schemes.adapt_r(mesh, lam_func(mesh)*u)
u = Solve_6B(mesh)
plt.plot(mesh, u, '-o')
adaptations_vec.append(str(i + 1) + " adaptations")
plt.legend(adaptations_vec)
plt.xlabel('x')
plt.ylabel('u_h(x)')
plt.grid()
plt.show()

127
ex6/ex_6C.py Normal file
View file

@ -0,0 +1,127 @@
import numpy as np
import matplotlib.pyplot as plt
import adaptivity_schemes
np.set_printoptions(precision=2)
def Solve_6C(mesh, p):
N = len(mesh) - 1 # number of elements
A = np.zeros((N + 1, N + 1))
for i in range(1, N + 1):
h = mesh[i] - mesh[i - 1]
a_11 = 1./h - p/2.
a_12 = -1./h + p/2.
a_21 = -1./h - p/2.
a_22 = 1./h + p/2.
A[i - 1, i - 1] += a_11
A[i - 1, i] += a_12
A[i, i - 1] += a_21
A[i, i] += a_22
#print("A =\n", A)
# take dirichlet data into account
u_g = np.zeros(N + 1)
u_g[0] = 0
u_g[N] = 1
#print("u_g =\n", u_g)
# remove first and last row of A
A_g = A[1:N, :]
#print("A_g =\n", A_g)
# assemble RHS with dirichlet data
f = -A_g.dot(u_g)
#print(f)
# matrix for the inner nodes (excluding nodes with dirichlet bcs)
A_0 = A[1:N, 1:N]
#print(A_0)
# solve for u_0 (free dofs)
u_0 = np.linalg.solve(A_0, f)
# assemble "u = u_0 + u_g"
u = np.concatenate([[0], u_0, [1]])
return u
p = 70
######### h-adaptivity ##########
N = 5 # number of elements
mesh = np.linspace(0, 1, N + 1)
u = Solve_6C(mesh, p)
plt.plot(mesh, u, '-o')
plt.grid()
plt.xlabel('x')
plt.ylabel('u_h(x)')
plt.title("h-adaptivity")
N_vec = ["0 refinements, " + str(N) + " elements"]
refinements = 4 # number of refinements
for i in range(refinements):
mesh = adaptivity_schemes.adapt_h(mesh, u, 0.7)
u = Solve_6C(mesh, p)
plt.plot(mesh, u, '-o')
N_vec.append(str(i + 1) + " refinements, " + str(len(mesh) - 1) + " elements")
# plot exact solution
x = np.linspace(0, 1, 50)
plt.plot(x, (np.exp(p*x) - 1.)/(np.exp(p) - 1.))
N_vec.append("exact")
plt.legend(N_vec)
plt.show()
########## r-adaptivity ##########
N = 10
mesh = np.linspace(0, 1, N + 1)
u = Solve_6C(mesh, p)
plt.plot(mesh, u, '-o')
title = "r-adaptivity with " + str(N) + " elements"
plt.title(title)
adaptations_vec = ["0 adaptations"]
adaptations = 4 # number of iterations
for i in range(adaptations):
mesh = adaptivity_schemes.adapt_r(mesh, u)
u = Solve_6C(mesh, p)
plt.plot(mesh, u, '-o')
adaptations_vec.append(str(i + 1) + " adaptations")
# plot exact solution
x = np.linspace(0, 1, 50)
plt.plot(x, (np.exp(p*x) - 1.)/(np.exp(p) - 1.))
adaptations_vec.append("exact")
plt.legend(adaptations_vec)
plt.xlabel('x')
plt.ylabel('u_h(x)')
plt.grid()
plt.show()

154
ex7/ex7_2/GCC_default.mk Normal file
View file

@ -0,0 +1,154 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
#startmake as follows to avoid warnings caused by OpenMPI code
# make 2>&1 | grep -v openmpi
MPI_ROOT=/usr/bin/
CC = ${MPI_ROOT}mpicc
CXX = ${MPI_ROOT}mpicxx
F77 = ${MPI_ROOT}mpif77
LINKER = ${CXX}
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
#MPIRUN = ${MPI_ROOT}mpiexec
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
# solution according to https://github.com/open-mpi/ompi/issues/11063
MPIRUN += -mca btl ^openib
# KFU:sauron
CXXFLAGS += -I/software/boost/1_72_0/include
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
# WARNINGS += -Weffc++ -Wextra
# -Wno-pragmas
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
# -ftree-vectorizer-verbose=5 -DNDEBUG
# -ftree-vectorizer-verbose=2
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# info on vectorization
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
#-fdump-tree-pre=stderr
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
#CXXFLAGS += ${VECTORIZE}
# -funroll-all-loops -msse3
#GCC -march=knl -march=broadwell -march=haswell
# for debugging purpose (save code)
# -fsanitize=leak # only one out the trhee can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS +=${SANITARY}
# OpenMP
CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
@rm -rf html latex
run: ${PROGRAM}
${MPIRUN} -np 8 ./$^
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# 2>&1 | grep -v openmpi
# special: get rid of compiler warnings genermaeate by openmpi-files
#.cpp.o:
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
# |grep -sv openmpi
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 8 ./$^
# Graphical interface
# valkyrie
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers

54
ex7/ex7_2/ex7_2/Makefile Executable file
View file

@ -0,0 +1,54 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# COMPILER=GCC_SEQ_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
MAIN = main
SOURCES = ${MAIN}.cpp greetings.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = ${MAIN}.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
# -DNDEBUG
# -pg slows down the code on my laptop when using CLANG_
LINKFLAGS += -g
#-pg
#CXXFLAGS += -Q --help=optimizers
#CXXFLAGS += -fopt-info
include ../${COMPILER}default.mk
#############################################################################
# additional specific cleaning in this directory
clean_all::
@rm -f t.dat*
#############################################################################
# special testing
# NPROCS = 4
#
TFILE = t.dat
# TTMP = t.tmp
#
graph: $(PROGRAM)
# @rm -f $(TFILE).*
# next two lines only sequentially
./$(PROGRAM)
@mv $(TFILE).000 $(TFILE)
# $(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
# @echo " "; echo "Manipulate data for graphics."; echo " "
# @cat $(TFILE).* > $(TTMP)
# @sort -b -k 2 $(TTMP) -o $(TTMP).1
# @sort -b -k 1 $(TTMP).1 -o $(TTMP).2
# @awk -f nl.awk $(TTMP).2 > $(TFILE)
# @rm -f $(TTMP).* $(TTMP) $(TFILE).*
#
-gnuplot jac.dem

View file

@ -0,0 +1,89 @@
#include "greetings.h"
#include <cassert>
#include <cstring>
#include <iostream>
#include <mpi.h> // MPI
#include <string>
using namespace std;
// see http://www.open-mpi.org/doc/current
// for details on MPI functions
void greetings(MPI_Comm const &icomm)
{
int myrank, numprocs;
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
MPI_Comm_size(icomm, &numprocs); // #MPI processes
char *name = new char [MPI_MAX_PROCESSOR_NAME],
*chbuf = new char [MPI_MAX_PROCESSOR_NAME];
int reslen, ierr;
MPI_Get_processor_name( name, &reslen);
if (0==myrank) {
cout << " " << myrank << " runs on " << name << endl;
for (int i = 1; i < numprocs; ++i) {
MPI_Status stat;
stat.MPI_ERROR = 0; // M U S T be initialized!!
//ierr = MPI_Recv(chbuf, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, icomm, &stat);
ierr = MPI_Recv(chbuf, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, i, i, icomm, &stat);
assert(0==ierr);
cout << " " << stat.MPI_SOURCE << " runs on " << chbuf;
int count;
MPI_Get_count(&stat, MPI_CHAR, &count); // size of received data
cout << " (length: " << count << " )" << endl;
// stat.Get_error() // Error code
}
}
else {
int dest = 0;
ierr = MPI_Send(name, strlen(name) + 1, MPI_CHAR, dest, myrank, icomm);
assert(0==ierr);
}
delete [] chbuf;
delete [] name;
return;
}
void greetings_cpp(MPI_Comm const &icomm)
{
int myrank, numprocs;
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
MPI_Comm_size(icomm, &numprocs); // #MPI processes
string name(MPI_MAX_PROCESSOR_NAME,'#'), // C++
recvbuf(MPI_MAX_PROCESSOR_NAME,'#'); // C++: receive buffer, don't change size
int reslen, ierr;
MPI_Get_processor_name(name.data(), &reslen);
name.resize(reslen); // C++
if (0==myrank) {
cout << " " << myrank << " runs on " << name << endl;
for (int i = 1; i < numprocs; ++i) {
MPI_Status stat;
stat.MPI_ERROR = 0; // M U S T be initialized!!
//ierr = MPI_Recv(recvbuf.data(), MPI_MAX_PROCESSOR_NAME, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, icomm, &stat);
ierr = MPI_Recv(recvbuf.data(), MPI_MAX_PROCESSOR_NAME, MPI_CHAR, i, i, icomm, &stat);
assert(0==ierr);
int count;
MPI_Get_count(&stat, MPI_CHAR, &count); // size of received data
string const chbuf(recvbuf,0,count); // C++
cout << " " << stat.MPI_SOURCE << " runs on " << chbuf;
cout << " (length: " << count << " )" << endl;
// stat.Get_error() // Error code
}
}
else {
int dest = 0;
ierr = MPI_Send(name.data(), name.size(), MPI_CHAR, dest, myrank, icomm);
assert(0==ierr);
}
return;
}

View file

@ -0,0 +1,11 @@
// general header for all functions in directory
#ifndef GREETINGS_FILE
#define GREETINGS_FILE
#include <mpi.h>
void greetings(MPI_Comm const &icomm);
void greetings_cpp(MPI_Comm const &icomm);
#endif

40
ex7/ex7_2/ex7_2/main.cpp Normal file
View file

@ -0,0 +1,40 @@
#include <iostream>
#include <mpi.h>
#include "greetings.h"
using namespace std;
int main(int argc , char **argv )
{
// -------------------- E2 --------------------
MPI_Init(&argc, &argv); // Initializes the MPI execution environment
// -------------------- E1 --------------------
MPI_Comm const icomm(MPI_COMM_WORLD); // MPI_COMM_WORLD ... all processes
// -------------------- E3 --------------------
int rank;
MPI_Comm_rank(icomm, &rank); // Determines the rank of the calling process in the communicator.
if (rank == 0)
{
int size;
MPI_Comm_size(icomm, &size); // Returns the size of the group associated with a communicator.
cout << "Process " << rank << " says: " << size << " proesses are running." << endl;
}
// To vary number of processes: changed number in GCC_default.mk file
// alternatively, call in terminal:
// /usr/bin/mpirun --oversubscribe -display-map -mca btl ^openib -np 4 ./main.GCC_
// or
// /usr/bin/mpirun --oversubscribe -display-map -mca btl ^openib -np 8 ./main.GCC_
// -------------------- E4 --------------------
greetings_cpp(MPI_COMM_WORLD); // greetings with sorted output
MPI_Finalize(); // Terminates MPI execution environment
return 0;
}

154
ex7/ex7_3/GCC_default.mk Normal file
View file

@ -0,0 +1,154 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
#startmake as follows to avoid warnings caused by OpenMPI code
# make 2>&1 | grep -v openmpi
MPI_ROOT=/usr/bin/
CC = ${MPI_ROOT}mpicc
CXX = ${MPI_ROOT}mpicxx
F77 = ${MPI_ROOT}mpif77
LINKER = ${CXX}
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
#MPIRUN = ${MPI_ROOT}mpiexec
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
# solution according to https://github.com/open-mpi/ompi/issues/11063
MPIRUN += -mca btl ^openib
# KFU:sauron
CXXFLAGS += -I/software/boost/1_72_0/include
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
# WARNINGS += -Weffc++ -Wextra
# -Wno-pragmas
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
# -ftree-vectorizer-verbose=5 -DNDEBUG
# -ftree-vectorizer-verbose=2
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# info on vectorization
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
#-fdump-tree-pre=stderr
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
#CXXFLAGS += ${VECTORIZE}
# -funroll-all-loops -msse3
#GCC -march=knl -march=broadwell -march=haswell
# for debugging purpose (save code)
# -fsanitize=leak # only one out the trhee can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS +=${SANITARY}
# OpenMP
CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
@rm -rf html latex
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# 2>&1 | grep -v openmpi
# special: get rid of compiler warnings genermaeate by openmpi-files
#.cpp.o:
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
# |grep -sv openmpi
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
# Graphical interface
# valkyrie
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers

54
ex7/ex7_3/ex7_3/Makefile Executable file
View file

@ -0,0 +1,54 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# COMPILER=GCC_SEQ_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
MAIN = main
SOURCES = ${MAIN}.cpp vector_operations.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = ${MAIN}.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
# -DNDEBUG
# -pg slows down the code on my laptop when using CLANG_
LINKFLAGS += -g
#-pg
#CXXFLAGS += -Q --help=optimizers
#CXXFLAGS += -fopt-info
include ../${COMPILER}default.mk
#############################################################################
# additional specific cleaning in this directory
clean_all::
@rm -f t.dat*
#############################################################################
# special testing
# NPROCS = 4
#
TFILE = t.dat
# TTMP = t.tmp
#
graph: $(PROGRAM)
# @rm -f $(TFILE).*
# next two lines only sequentially
./$(PROGRAM)
@mv $(TFILE).000 $(TFILE)
# $(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
# @echo " "; echo "Manipulate data for graphics."; echo " "
# @cat $(TFILE).* > $(TTMP)
# @sort -b -k 2 $(TTMP) -o $(TTMP).1
# @sort -b -k 1 $(TTMP).1 -o $(TTMP).2
# @awk -f nl.awk $(TTMP).2 > $(TFILE)
# @rm -f $(TTMP).* $(TTMP) $(TFILE).*
#
-gnuplot jac.dem

107
ex7/ex7_3/ex7_3/main.cpp Normal file
View file

@ -0,0 +1,107 @@
#include <iostream>
#include <mpi.h>
#include <vector>
#include "vector_operations.h"
using namespace std;
int main(int argc , char **argv)
{
MPI_Init(&argc, &argv); // Initializes the MPI execution environment
MPI_Comm const icomm(MPI_COMM_WORLD);
int myrank;
MPI_Comm_rank(icomm, &myrank);
int n = 20;
vector<double> x(n);
vector<double> y = x;
for (int i = 0; i < n; ++i)
{
x[i] = myrank*100 + (i % 5)*10 + i;
y[i] = 1.0/(x[i]);
}
if(myrank == 0) // so scalar product is well defined (avoid division by 0)
y[0] = 0;
// -------------------- E5 --------------------
if (myrank == 0) cout << "-------------------- E5 --------------------" << endl;
DebugVector(x, icomm);
cout.flush();
MPI_Barrier(icomm);
// -------------------- E6 --------------------
if (myrank == 0) cout << "-------------------- E6 --------------------" << endl;
double scalar_product = par_scalar(x, y, icomm);
if (myrank == 0)
{
cout << "<x,y> = " << scalar_product << endl << endl;
}
cout.flush();
MPI_Barrier(icomm);
// -------------------- E7 --------------------
if (myrank == 0) cout << "-------------------- E7 --------------------" << endl;
double xmin, xmax;
par_minmax(x, xmin, xmax, icomm);
if (myrank == 0)
{
cout << "Global min: " << xmin << endl;
cout << "Global max: " << xmax << endl << endl;
}
cout.flush();
MPI_Barrier(icomm);
// -------------------- E8 --------------------
if (myrank == 0) cout << "-------------------- E8 --------------------" << endl;
vector<double> x_new(n);
cout.flush();
MPI_Barrier(icomm);
// All to all
if (myrank == 0) cout << "----- All to all -----" << endl;
auto sendbuf = x.data();
int sendcount = 5;
auto recvbuf = x_new.data();
int recvcount = 5;
MPI_Alltoall(sendbuf, sendcount, MPI_DOUBLE, recvbuf, recvcount, MPI_DOUBLE, icomm);
DebugVector(x_new, icomm);
cout.flush();
MPI_Barrier(icomm);
// All to all v
if (myrank == 0) cout << "----- All to all v -----" << endl;
int sendcounts[4] = {5, 5, 5, 5};
int senddispls[4] = {0, 5, 10, 15};
int rcvcounts[4] = {5, 5, 5, 5};
int rcvdispls[4] = {0, 5, 10, 15};
MPI_Alltoallv(x.data(), sendcounts, senddispls, MPI_DOUBLE, x_new.data(), rcvcounts, rcvdispls, MPI_DOUBLE, icomm);
DebugVector(x_new, icomm);
cout.flush();
MPI_Barrier(icomm);
// All to all (in place), sendcount and sendtype are ignored
if (myrank == 0) cout << "----- All to all (in place) -----" << endl;
MPI_Alltoall(MPI_IN_PLACE, sendcount, MPI_DOUBLE, x.data(), recvcount, MPI_DOUBLE, icomm);
DebugVector(x, icomm);
MPI_Finalize(); // Terminates MPI execution environment
return 0;
}

View file

@ -0,0 +1,110 @@
#include "vector_operations.h"
#include <cassert>
#include <cfloat>
void DebugVector(const vector<double> &xin, const MPI_Comm &icomm)
{
int myrank, numprocs;
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
MPI_Comm_size(icomm, &numprocs); // #MPI processes
int ierr;
int n = xin.size();
int chosen_process;
for (int k = 0; k < numprocs; ++k)
{
MPI_Barrier(icomm);
if (myrank == 0)
{
cout << "Choose next process: ";
cin >> chosen_process;
}
ierr = MPI_Bcast(&chosen_process, 1, MPI_INT, 0, icomm); // broadcast value of "chosen_process" to all processes
assert(ierr == 0);
MPI_Barrier(icomm);
if (chosen_process == myrank)
{
for (int i = 0; i < n; ++i)
{
cout << "x_" << i << " = " << xin[i] << "\t(Process " << myrank << ")" << endl;
}
cout.flush();
}
}
return;
}
double par_scalar(const vector<double> &x, const vector<double> &y, const MPI_Comm &icomm)
{
int n = x.size();
assert(n == (int)y.size());
double sum = 0.0;
double local_sum = 0.0;
for (int i = 0; i < n; ++i)
{
local_sum += x[i]*y[i];
}
int ierr = MPI_Allreduce(&local_sum, &sum, 1, MPI_DOUBLE, MPI_SUM, icomm); // reduce local sums to global sum
assert(ierr == 0);
return sum;
}
void par_minmax(const vector<double> &x, double &global_min, double &global_max, const MPI_Comm &icomm)
{
int myrank, numprocs;
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
MPI_Comm_size(icomm, &numprocs); // #MPI processes
int n = x.size();
double local_min = DBL_MAX;
double local_max = -DBL_MAX;
for (int i = 0; i < n; ++i)
{
if (x[i] < local_min)
local_min = x[i];
if (x[i] > local_max)
local_max = x[i];
}
vector<double> local_mins(numprocs);
vector<double> local_maxs(numprocs);
MPI_Gather(&local_min, 1, MPI_DOUBLE, local_mins.data(), 1, MPI_DOUBLE, 0, icomm);
MPI_Gather(&local_max, 1, MPI_DOUBLE, local_maxs.data(), 1, MPI_DOUBLE, 0, icomm);
if (myrank == 0)
{
global_min = DBL_MAX;
global_max = -DBL_MAX;
for (int i = 0; i < numprocs; ++i)
{
if (local_mins[i] < global_min)
global_min = local_mins[i];
if (local_maxs[i] > global_max)
global_max = local_maxs[i];
}
}
MPI_Bcast(&global_min, 1, MPI_DOUBLE, 0, icomm); // make sure every process is up to date
MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, icomm);
return;
}

View file

@ -0,0 +1,10 @@
#include <mpi.h>
#include <vector>
using namespace std;
void DebugVector(const vector<double> &xin, const MPI_Comm &icomm);
double par_scalar(const vector<double> &x, const vector<double> &y, const MPI_Comm &icomm);
void par_minmax(const vector<double> &x, double &global_min, double &global_max, const MPI_Comm &icomm);

154
ex7/ex7_4/GCC_default.mk Normal file
View file

@ -0,0 +1,154 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
#startmake as follows to avoid warnings caused by OpenMPI code
# make 2>&1 | grep -v openmpi
MPI_ROOT=/usr/bin/
CC = ${MPI_ROOT}mpicc
CXX = ${MPI_ROOT}mpicxx
F77 = ${MPI_ROOT}mpif77
LINKER = ${CXX}
# If you 'mpirun ...' reports some error "... not enough slots .." then use the option '--oversubscribe'
MPIRUN = ${MPI_ROOT}mpirun --oversubscribe -display-map
#MPIRUN = ${MPI_ROOT}mpiexec
# 2023, Oct 23: ""WARNING: There is at least non-excluded one OpenFabrics device found,"
# solution according to https://github.com/open-mpi/ompi/issues/11063
MPIRUN += -mca btl ^openib
# KFU:sauron
CXXFLAGS += -I/software/boost/1_72_0/include
WARNINGS = -Wall -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Wunreachable-code -Winline -fmax-errors=1
# WARNINGS += -Weffc++ -Wextra
# -Wno-pragmas
CXXFLAGS += -std=c++17 -ffast-math -O3 -march=native ${WARNINGS}
# -ftree-vectorizer-verbose=5 -DNDEBUG
# -ftree-vectorizer-verbose=2
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# info on vectorization
#VECTORIZE = -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
#-fdump-tree-pre=stderr
VECTORIZE = -ftree-vectorize -fopt-info -ftree-vectorizer-verbose=5
#CXXFLAGS += ${VECTORIZE}
# -funroll-all-loops -msse3
#GCC -march=knl -march=broadwell -march=haswell
# for debugging purpose (save code)
# -fsanitize=leak # only one out the trhee can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS +=${SANITARY}
# OpenMP
CXXFLAGS += -fopenmp
LINKFLAGS += -fopenmp
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
@rm -f ${PROGRAM} ${OBJECTS} gmon.out
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
@rm -rf html latex
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# 2>&1 | grep -v openmpi
# special: get rid of compiler warnings genereate by openmpi-files
#.cpp.o:
# @$(CXX) -c $(CXXFLAGS) $< 2>/tmp/t.txt || grep -sv openmpi /tmp/t.txt
# |grep -sv openmpi
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes mpirun -np 4 ./$^
# Graphical interface
# valkyrie
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers

View file

@ -0,0 +1,107 @@
# Basic Defintions for using INTEL-MPI with its compilers
# requires setting of COMPILER=ICC_NATIVE_
# MPI_ROOT should be defined by shell
# path to icpc is contained in $PATH
MPI_BIN = $(shell dirname `which icpc` | sed 's/bin\/intel64/mpi\/intel64\/bin/g')/
MPI_LIB = $(shell echo ${MPI_BIN} | sed 's/bin/lib/g')
# Intel-MPI wrappers used gcc as default !!
CC = ${MPI_BIN}mpicc -cc=icc
CXX = ${MPI_BIN}mpicxx -cxx=icpc
F77 = ${MPI_BIN}mpif77 -f77=ifort
LINKER = ${CXX}
MPIRUN = ${MPI_BIN}mpirun
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
# -Weffc++ -Wunreachable-code -Winline
CXXFLAGS += -O3 -fargument-noalias -DNDEBUG -std=c++17 ${WARNINGS} ${MPI_COMPILE_FLAGS}
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal \
-Wshadow ${MPI_COMPILE_FLAGS}
# -vec-report=3 -mkl
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
# use MKL by INTEL
LINKFLAGS += -mkl ${MPI_LINK_FLAGS}
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: ${PROGRAM}
(export LD_LIBRARY_PATH=${MPI_LIB}:${LD_LIBRARY_PATH} ;${MPIRUN} -np 4 ./$^ ${PROG_ARGS})
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &

112
ex7/ex7_4/ICC_default.mk Normal file
View file

@ -0,0 +1,112 @@
# Basic Defintions for using INTEL compilers with OpenMPI headers and libraries
# requires setting of COMPILER=ICC_
# MPI_ROOT should be defined by shell
MPI_ROOT=/usr
CC = icc
CXX = icpc
F77 = ifort
LINKER = ${CXX}
MPIRUN = ${MPI_ROOT}/bin/mpirun
# no differences when C or C++ is used !! (always used options from mpicxx)
MPI_COMPILE_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:compile`
MPI_LINK_FLAGS = `${MPI_ROOT}/bin/mpicxx -showme:link`
# MPI_LINK_FLAGS = -pthread -L/usr/lib/openmpi/lib -lmpi_cxx -lmpi -lopen-rte -lopen-pal -ldl -Wl,--export-dynamic -lnsl -lutil -lm -ldl
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
# -Weffc++ -Wunreachable-code -Winline
CXXFLAGS += -O3 -std=c++17 -fargument-noalias -DNDEBUG ${WARNINGS} ${MPI_COMPILE_FLAGS}
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal \
-Wshadow ${MPI_COMPILE_FLAGS}
# -vec-report=3 -mkl
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
# use MKL by INTEL
LINKFLAGS += -mkl
# use MPI by Compiler
LINKFLAGS += ${MPI_LINK_FLAGS}
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &

View file

@ -0,0 +1,128 @@
# Basic Defintions for using OpenMPI with CLANG compilers
# requires setting of COMPILER=OPENMPI_CLANG_
# Pass CLANG Compilers to the OpenMPI wrappers
# see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
EXPORT = export OMPI_CXX=clang++; export OMPI_CC=clang; export OMPI_mpifort=flang
CC = mpicc
CXX = mpicxx
F77 = mpifort
LINKER = ${CXX}
MPIRUN = ${MPI_BIN}mpirun
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
SILENCE_MPI = -Wno-weak-vtables -Wno-old-style-cast -Wno-cast-align -Wno-deprecated
SILENCE_MPI+= -Wno-sign-conversion -Wno-reserved-id-macro -Wno-c++98-compat-pedantic
SILENCE_MPI+= -Wno-zero-as-null-pointer-constant -Wno-source-uses-openmp
WARNINGS = -Weverything -Wno-c++98-compat -Wno-weak-vtables -ferror-limit=3 ${SILENCE_MPI}
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
CXXFLAGS += -Ofast -std=c++17 ${WARNINGS}
#CXXFLAGS += -Ofast -std=c++17
# -ftrapv
#
CFLAGS += -Ofast -Weverything -ferror-limit=3 ${MPI_COMPILE_FLAGS}
# OpenMP
#CXXFLAGS += -fopenmp
#LINKFLAGS += -fopenmp
# tidy_check
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
#READABILITY=,readability*${SWITCH_OFF}
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
#TIDYFLAGS += -checks='modernize*
MPI_COMPILE_FLAGS = `${MPI_BIN}mpicxx -showme:compile`
MPI_LINK_FLAGS = `${MPI_BIN}mpicxx -showme:link`
#TIDYFLAGS += ${MPI_COMPILE_FLAGS}
TIDYFLAGS += -extra-arg="-I/usr/lib/x86_64-linux-gnu/openmpi/include"
#check:
# echo ${MPI_COMPILE_FLAGS}
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
@( ${EXPORT}; $(LINKER) $^ ${LINKFLAGS} -o $@ )
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
codecheck: tidy_check
tidy_check:
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
.c.o:
@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &

View file

@ -0,0 +1,107 @@
# Basic Defintions for using OpenMPI with Intel compilers
# requires setting of COMPILER=OPENMPI_ICC_
# Pass Intel Compilers to the OpenMPI wrappers
# see: https://www.open-mpi.org/faq/?category=mpi-apps#override-wrappers-after-v1.0
EXPORT = export OMPI_CXX=icpc; export OMPI_CC=icc; export OMPI_mpifort=ifort
CC = mpicc
CXX = mpicxx
F77 = mpifort
LINKER = ${CXX}
MPIRUN = ${MPI_BIN}mpirun
WARNINGS = -Wall -Wextra -pedantic -Woverloaded-virtual -Wfloat-equal -Wshadow
# -Weffc++ -Wunreachable-code -Winline
CXXFLAGS += -fast -fargument-noalias -DNDEBUG -std=c++17 ${WARNINGS}
CFLAGS += -O3 -fargument-noalias -DNDEBUG -Wall -Wextra -pedantic -Wfloat-equal -Wshadow
# -vec-report=3 -mkl
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
# use MKL by INTEL
LINKFLAGS += -O3 -mkl ${MPI_LINK_FLAGS}
# ipo: warning #11021: unresolved __GI_memset
# see: https://software.intel.com/en-us/articles/ipo-warning-11021-unresolved-symbols-referenced-a-dynamic-library
LINKFLAGS +=
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
@( ${EXPORT}; $(LINKER) $^ ${LINKFLAGS} -o $@ )
@echo
@echo "Start with : $(MPIRUN) -np num_proc $(MPIFLAGS) $(PROGRAM)"
@echo
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: ${PROGRAM}
${MPIRUN} -np 4 ./$^ ${PROG_ARGS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
@( ${EXPORT}; $(CXX) -c $(CXXFLAGS) -o $@ $< )
.c.o:
@( ${EXPORT}; $(CC) -c $(CFLAGS) -o $@ $< )
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
${BINDIR}../vtune_amplifier_xe_2013/bin64/amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
# http://askubuntu.com/questions/41629/after-upgrade-gdb-wont-attach-to-process
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
${BINDIR}../inspector_xe_2013/bin64/inspxe-gui &

View file

@ -0,0 +1,125 @@
# Use the MPI-wrappers from the PGI compiler suite.
# requires setting of COMPILER=PGI_MPI_
#
# requires
# sudo apt install librdmacm1
# Details for run time information
# export PGI_ACC_TIME=1
# unset PGI_ACC_TIME
# export PGI_ACC_NOTIFY=1
# export PGI_ACC_NOTIFY=3
# unset PGI_ACC_NOTIFY
PGI_PATH = /opt/pgi/linux86-64/2019/bin
#ifeq "$(HOSTNAME)" "mephisto.uni-graz.at"
# # mephisto
# PGI_PATH = /share/apps/pgi/linux86-64/2016/bin
#endif
#MPI_ROOT=${PGI_PATH}mpi/mpich/bin/
MPI_ROOT= ${PGI_PATH}/../mpi/openmpi-3.1.3/bin/
MPIRUN = ${MPI_ROOT}mpirun
CC = ${MPI_ROOT}mpicc
CXX = ${MPI_ROOT}mpicxx
#F77 = ${MPI_ROOT}mpif77
ifndef LINKER
LINKER = ${CC}
endif
LINKER = ${CXX}
WARNINGS = -Minform=warn
PGI_PROFILING += -Minfo=loop,vect,opt,intensity,mp,accel
#PGI_PROFILING += -Mprof=lines Minfo=ccff
CXXFLAGS += -e3 -std=c++17 -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
CFLAGS += -fast ${PGI_PROFILING} ${WARNINGS} -Mnodepchk
#
# for OpenACC
# Target architecture (nvidia,host)
TA_ARCH = host
#TA_ARCH = nvidia,host
#TA_ARCH = -ta=nvidia:cc2+,cuda5.5,fastmath
#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,cuda5.5,fastmath,keepgpu
#TA_ARCH = -acc -DNDEBUG -ta=nvidia:cc2+,fastmath,keepgpu
#,keepgpu
# CFLAGS = -O3 -ta=$(TA_ARCH)
#CFLAGS += -B -gopt $(TA_ARCH)
#CXXFLAGS += -B -gopt $(TA_ARCH)
# -Minfo=all
# libcudart.a is needed for direct CUDA calls
#LINKFLAGS = -gopt $(TA_ARCH) -L${BINDIR}../lib $(PGI_PROFILING)
# -lcudart
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
rm -f ${PROGRAM} ${OBJECTS} *.gpu *gprof.out
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
#run: clean ${PROGRAM}
run: ${PROGRAM}
${MPIRUN} -np 4 ${OPTIRUN} ./${PROGRAM}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# Profiling options PGI, see: pgcollect -help
CPU_PROF = -allcache
GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
#GPU_PROF = -cuda=branch:cc20
#
PROF_FILE = pgprof.out
prof: ${PROGRAM}
# ./$^
# $(CUDA_HOME)/bin/nvvp &
# export LD_LIBRARY_PATH=/state/partition1/apps/pgi/linux86-64/12.9/lib:$LD_LIBRARY_PATH
${OPTIRUN} ${BINDIR}pgcollect $(GPU_PROF) ./$^
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
# Memory checker (slooooow!!!):
# see doc at /usr/local/cuda/doc/cuda-memcheck.pdf
# mem: ${PROGRAM}
# $(CUDA_HOME)memcheck ./$^

2877
ex7/ex7_4/ex7_4/Doxyfile Normal file

File diff suppressed because it is too large Load diff

54
ex7/ex7_4/ex7_4/Makefile Executable file
View file

@ -0,0 +1,54 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# COMPILER=GCC_SEQ_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
MAIN = main
SOURCES = ${MAIN}.cpp vdop.cpp geom.cpp par_geom.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = ${MAIN}.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
# -DNDEBUG
# -pg slows down the code on my laptop when using CLANG_
LINKFLAGS += -g
#-pg
#CXXFLAGS += -Q --help=optimizers
#CXXFLAGS += -fopt-info
include ../${COMPILER}default.mk
#############################################################################
# additional specific cleaning in this directory
clean_all::
@rm -f uv.txt
#############################################################################
# special testing
# NPROCS = 4
#
TFILE = t.dat
# TTMP = t.tmp
#
graph: $(PROGRAM)
# @rm -f $(TFILE).*
# next two lines only sequentially
./$(PROGRAM)
@mv $(TFILE).000 $(TFILE)
# $(MPIRUN) $(MPIFLAGS) -np $(NPROCS) $(PROGRAM)
# @echo " "; echo "Manipulate data for graphics."; echo " "
# @cat $(TFILE).* > $(TTMP)
# @sort -b -k 2 $(TTMP) -o $(TTMP).1
# @sort -b -k 1 $(TTMP).1 -o $(TTMP).2
# @awk -f nl.awk $(TTMP).2 > $(TFILE)
# @rm -f $(TTMP).* $(TTMP) $(TFILE).*
#
-gnuplot jac.dem

View file

@ -0,0 +1,43 @@
function [ xc, ia, v ] = ascii_read_meshvector( fname )
%
% Loads the 2D triangular mesh (coordinates, vertex connectivity)
% together with values on its vertices from an ASCII file.
% Matlab indexing is stored (starts with 1).
%
% The input file format is compatible
% with Mesh_2d_3_matlab:Write_ascii_matlab(..) in jacobi_oo_stl/geom.h
%
%
% IN: fname - filename
% OUT: xc - coordinates
% ia - mesh connectivity
% v - solution vector
DELIMETER = ' ';
fprintf('Read file %s\n',fname)
% Read mesh constants
nn = dlmread(fname,DELIMETER,[0 0 0 3]); %% row_1, col_1, row_2, col_2 in C indexing!!!
nnode = nn(1);
ndim = nn(2);
nelem = nn(3);
nvert = nn(4);
% Read coordinates
row_start = 0+1;
row_end = 0+nnode;
xc = dlmread(fname,DELIMETER,[row_start 0 row_end ndim-1]);
% Read connectivity
row_start = row_end+1;
row_end = row_end+nelem;
ia = dlmread(fname,DELIMETER,[row_start 0 row_end nvert-1]);
% Read solution
row_start = row_end+1;
row_end = row_end+nnode;
v = dlmread(fname,DELIMETER,[row_start 0 row_end 0]);
end

View file

@ -0,0 +1,49 @@
function ascii_write_mesh( xc, ia, e, basename)
%
% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
% in an ASCII file.
% Matlab indexing is stored (starts with 1).
%
% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
%
% IN:
% coordinates xc: [2][nnode]
% connectivity ia: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
% basename: file name without extension
%
% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
%
fname = [basename, '.txt'];
nnode = int32(size(xc,2));
ndim = int32(size(xc,1));
nelem = int32(size(ia,2));
nvert_e = int32(3);
dlmwrite(fname,nnode,'delimiter','\t','precision',16) % number of nodes
dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16) % space dimension
dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16) % number of elements
dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16) % number of vertices per element
% dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16) % coordinates
dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
% no subdomain info transferred
tmp=int32(ia(1:3,:));
% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
% store only start and end point of boundary edges,
nbedges = size(e,2);
dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16) % number boundary edges
tmp=int32(e(1:2,:));
% dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
end

View file

@ -0,0 +1,51 @@
function ascii_write_subdomains( xc, ia, e, basename)
%
% Saves the 2D triangular mesh in the minimal way (only coordinates, vertex connectivity, minimal boundary edge info)
% in an ASCII file.
% Matlab indexing is stored (starts with 1).
%
% The output file format is compatible with Mesh_2d_3_matlab:Mesh_2d_3_matlab(std::string const &fname) in jacobi_oo_stl/geom.h
%
% IN:
% coordinates xc: [2][nnode]
% connectivity ia: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
% basename: file name without extension
%
% Data have been generated via <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>.
%
fname = [basename, '_sd.txt'];
nnode = int32(size(xc,2));
ndim = int32(size(xc,1));
nelem = int32(size(ia,2));
nvert_e = int32(3);
% dlmwrite(fname,nnode,'delimiter','\t','precision',16) % number of nodes
% dlmwrite(fname,ndim,'-append','delimiter','\t','precision',16) % space dimension
% dlmwrite(fname,nelem,'-append','delimiter','\t','precision',16) % number of elements
dlmwrite(fname,nelem,'delimiter','\t','precision',16) % number of elements
% dlmwrite(fname,nvert_e,'-append','delimiter','\t','precision',16) % number of vertices per element
% % dlmwrite(fname,xc(:),'-append','delimiter','\t','precision',16) % coordinates
% dlmwrite(fname,xc([1,2],:).','-append','delimiter','\t','precision',16) % coordinates
% subdomain info
tmp=int32(ia(4,:));
% % dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
% dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % connectivity in Matlab indexing
dlmwrite(fname,tmp(:,:).','-append','delimiter','\t') % connectivity in Matlab indexing
% % store only start and end point of boundary edges,
% nbedges = size(e,2);
% dlmwrite(fname,nbedges,'-append','delimiter','\t','precision',16) % number boundary edges
% tmp=int32(e(1:2,:));
% % dlmwrite(fname,tmp(:),'-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
% dlmwrite(fname,tmp(:,:).','-append','delimiter','\t','precision',16) % boundary edges in Matlab indexing
end

1281
ex7/ex7_4/ex7_4/geom.cpp Normal file

File diff suppressed because it is too large Load diff

712
ex7/ex7_4/ex7_4/geom.h Normal file
View file

@ -0,0 +1,712 @@
#ifndef GEOM_FILE
#define GEOM_FILE
#include <array>
#include <functional> // function; C++11
#include <iostream>
#include <memory> // shared_ptr
#include <string>
#include <vector>
/**
* Basis class for finite element meshes.
*/
class Mesh
{
public:
/**
* Constructor initializing the members with default values.
*
* @param[in] ndim space dimensions (dimension for coordinates)
* @param[in] nvert_e number of vertices per element (dimension for connectivity)
* @param[in] ndof_e degrees of freedom per element (= @p nvert_e for linear elements)
* @param[in] nedge_e number of edges per element (= @p nvert_e for linear elements in 2D)
*/
explicit Mesh(int ndim, int nvert_e = 0, int ndof_e = 0, int nedge_e = 0);
__attribute__((noinline))
Mesh(Mesh const &) = default;
Mesh &operator=(Mesh const &) = delete;
/**
* Destructor.
*
* See clang warning on
* <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
*/
virtual ~Mesh();
/**
* Reads mesh data from a binary file.
*
* File format, see ascii_write_mesh.m
*
* @param[in] fname file name
*/
explicit Mesh(std::string const &fname);
/**
* Reads mesh data from a binary file.
*
* File format, see ascii_write_mesh.m
*
* @param[in] fname file name
*/
void ReadVertexBasedMesh(std::string const &fname);
/**
* Number of finite elements in (sub)domain.
* @return number of elements.
*/
int Nelems() const
{
return _nelem;
}
/**
* Global number of vertices for each finite element.
* @return number of vertices per element.
*/
int NverticesElements() const
{
return _nvert_e;
}
/**
* Global number of degrees of freedom (dof) for each finite element.
* @return degrees of freedom per element.
*/
int NdofsElement() const
{
return _ndof_e;
}
/**
* Number of vertices in mesh.
* @return number of vertices.
*/
int Nnodes() const
{
return _nnode;
}
/**
* Space dimension.
* @return number of dimensions.
*/
int Ndims() const
{
return _ndim;
}
/**
* (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
*
* @param[in] nelem number of elements
* @param[in] nvert_e number of vertices per element
*/
void Resize_Connectivity(int nelem, int nvert_e)
{
SetNelem(nelem); // number of elements
SetNverticesElement(nvert_e); // vertices per element
_ia.resize(nelem * nvert_e);
}
/**
* Read connectivity information (g1,g2,g3)_i.
* @return connectivity vector [nelems*ndofs].
*/
const std::vector<int> &GetConnectivity() const
{
return _ia;
}
/**
* Access/Change connectivity information (g1,g2,g3)_i.
* @return connectivity vector [nelems*ndofs].
*/
std::vector<int> &GetConnectivity()
{
return _ia;
}
/**
* (Re-)Allocates memory for the element connectivity and redefines the appropriate dimensions.
*
* @param[in] nnodes number of nodes
* @param[in] ndim space dimension
*/
void Resize_Coords(int nnodes, int ndim)
{
SetNnode(nnodes); // number of nodes
SetNdim(ndim); // space dimension
_xc.resize(nnodes * ndim);
}
/**
* Read coordinates of vertices (x,y)_i.
* @return coordinates vector [nnodes*2].
*/
const std::vector<double> &GetCoords() const
{
return _xc;
}
/**
* Access/Change coordinates of vertices (x,y)_i.
* @return coordinates vector [nnodes*2].
*/
std::vector<double> &GetCoords()
{
return _xc;
}
/**
* Calculate values in vector @p v via function @p func(x,y)
* @param[in] v vector
* @param[in] func function of (x,y) returning a double value.
*/
void SetValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
void SetBoundaryValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
void SetDirchletValues(std::vector<double> &v, const std::function<double(double, double)> &func) const;
/**
* Prints the information for a finite element mesh
*/
void Debug() const;
/**
* Prints the edge based information for a finite element mesh
*/
void DebugEdgeBased() const;
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
virtual std::vector<int> Index_DirichletNodes() const;
virtual std::vector<int> Index_BoundaryNodes() const;
/**
* Write vector @p v together with its mesh information to an ASCii file @p fname.
*
* The data are written in C-style.
*
* @param[in] fname file name
* @param[in] v vector
*/
void Write_ascii_matlab(std::string const &fname, std::vector<double> const &v) const;
/**
* Exports the mesh information to ASCii files @p basename + {_coords|_elements}.txt.
*
* The data are written in C-style.
*
* @param[in] basename first part of file names
*/
void Export_scicomp(std::string const &basename) const;
/**
* Visualize @p v together with its mesh information via matlab or octave.
*
* Comment/uncomment those code lines in method Mesh:Visualize (geom.cpp)
* that are supported on your system.
*
* @param[in] v vector
*
* @warning matlab files ascii_read_meshvector.m visualize_results.m
* must be in the executing directory.
*/
void Visualize(std::vector<double> const &v) const;
/**
* Global number of edges.
* @return number of edges in mesh.
*/
int Nedges() const
{
return _nedge;
}
/**
* Global number of edges for each finite element.
* @return number of edges per element.
*/
int NedgesElements() const
{
return _nedge_e;
}
/**
* Read edge connectivity information (e1,e2,e3)_i.
* @return edge connectivity vector [nelems*_nedge_e].
*/
const std::vector<int> &GetEdgeConnectivity() const
{
return _ea;
}
/**
* Access/Change edge connectivity information (e1,e2,e3)_i.
* @return edge connectivity vector [nelems*_nedge_e].
*/
std::vector<int> &GetEdgeConnectivity()
{
return _ea;
}
/**
* Read edge information (v1,v2)_i.
* @return edge connectivity vector [_nedge*2].
*/
const std::vector<int> &GetEdges() const
{
return _edges;
}
/**
* Access/Change edge information (v1,v2)_i.
* @return edge connectivity vector [_nedge*2].
*/
std::vector<int> &GetEdges()
{
return _edges;
}
/**
* Determines all node to node connections from the vertex based mesh.
*
* @return vector[k][] containing all connections of vertex k, including to itself.
*/
std::vector<std::vector<int>> Node2NodeGraph() const
{
//// Check version 2 wrt. version 1
//auto v1=Node2NodeGraph_1();
//auto v2=Node2NodeGraph_2();
//if ( equal(v1.cbegin(),v1.cend(),v2.begin()) )
//{
//std::cout << "\nidentical Versions\n";
//}
//else
//{
//std::cout << "\nE R R O R in Versions\n";
//}
//return Node2NodeGraph_1();
return Node2NodeGraph_2(); // 2 times faster than version 1
}
/**
* Accesses the father-of-nodes relation.
*
* @return vector of length 0 because no relation available.
*
*/
virtual std::vector<int> const &GetFathersOfVertices() const
{
return _dummy;
}
/**
* Deletes all edge connectivity information (saves memory).
*/
void Del_EdgeConnectivity();
protected:
//public:
void SetNelem(int nelem)
{
_nelem = nelem;
}
void SetNverticesElement(int nvert)
{
_nvert_e = nvert;
}
void SetNdofsElement(int ndof)
{
_ndof_e = ndof;
}
void SetNnode(int nnode)
{
_nnode = nnode;
}
void SetNdim(int ndim)
{
_ndim = ndim;
}
void SetNedge(int nedge)
{
_nedge = nedge;
}
/**
* Reads vertex based mesh data from a binary file.
*
* File format, see ascii_write_mesh.m
*
* @param[in] fname file name
*/
void ReadVectexBasedMesh(std::string const &fname);
/**
* The vertex based mesh data are used to derive the edge based data.
*
* @warning Exactly 3 vertices, 3 edges per element are assumed (linear triangle in 2D)
*/
void DeriveEdgeFromVertexBased()
{
//DeriveEdgeFromVertexBased_slow();
//DeriveEdgeFromVertexBased_fast();
DeriveEdgeFromVertexBased_fast_2();
}
void DeriveEdgeFromVertexBased_slow();
void DeriveEdgeFromVertexBased_fast();
void DeriveEdgeFromVertexBased_fast_2();
/**
* The edge based mesh data are used to derive the vertex based data.
*
* @warning Exactly 3 vertices, 3 edges per element are assumed (linear triangle in 2D)
*/
void DeriveVertexFromEdgeBased();
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
int Nnbedges() const
{
return static_cast<int>(_bedges.size());
}
/**
* Checks whether the array dimensions fit to their appropriate size parameters
* @return index vector.
*/
virtual bool Check_array_dimensions() const;
/**
* Permutes the vertex information in an edge based mesh.
*
* @param[in] old2new new indices of original vertices.
*/
void PermuteVertices_EdgeBased(std::vector<int> const &old2new);
private:
/**
* Determines all node to node connections from the vertex based mesh.
*
* @return vector[k][] containing all connections of vertex k, including to itself.
*/
std::vector<std::vector<int>> Node2NodeGraph_1() const; // is correct
/**
* Determines all node to node connections from the vertex based mesh.
*
* Faster than @p Node2NodeGraph_1().
*
* @return vector[k][] containing all connections of vertex k, including to itself.
*/
std::vector<std::vector<int>> Node2NodeGraph_2() const; // is correct
//private:
protected:
int _nelem; //!< number elements
int _nvert_e; //!< number of vertices per element
int _ndof_e; //!< degrees of freedom (d.o.f.) per element
int _nnode; //!< number nodes/vertices
int _ndim; //!< space dimension of the problem (1, 2, or 3)
std::vector<int> _ia; //!< element connectivity
std::vector<double> _xc; //!< coordinates
protected:
// B.C.
std::vector<int> _bedges; //!< boundary edges [nbedges][2] storing start/end vertex
// 2020-01-08
std::vector<int> _sdedges; //!< boundary edges [nbedges][2] with left/right subdomain number
//private:
protected:
// edge based connectivity
int _nedge; //!< number of edges in mesh
int _nedge_e; //!< number of edges per element
std::vector<int> _edges; //!< edges of mesh (vertices ordered ascending)
std::vector<int> _ea; //!< edge based element connectivity
// B.C.
std::vector<int> _ebedges; //!< boundary edges [nbedges]
private:
const std::vector<int> _dummy; //!< empty dummy vector
};
// *********************************************************************
class RefinedMesh: public Mesh
{
public:
/**
* Constructs a refined mesh according to the marked elements in @p ibref.
*
* If the vector @p ibref has size 0 then all elements will be refined.
*
* @param[in] cmesh original mesh for coarsening.
* @param[in] ibref vector containing True/False regarding refinement for each element
*
*/
//explicit RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref = std::vector<bool>(0));
RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref);
//RefinedMesh(Mesh const &cmesh, std::vector<bool> const &ibref);
/**
* Constructs a refined mesh by regulare refinement of all elements.
*
* @param[in] cmesh original mesh for coarsening.
*
*/
explicit RefinedMesh(Mesh const &cmesh)
: RefinedMesh(cmesh, std::vector<bool>(0))
{}
RefinedMesh(RefinedMesh const &) = delete;
//RefinedMesh(RefinedMesh const&&) = delete;
RefinedMesh &operator=(RefinedMesh const &) = delete;
//RefinedMesh& operator=(RefinedMesh const&&) = delete;
/**
* Destructor.
*/
virtual ~RefinedMesh() override;
/**
* Refines the mesh according to the marked elements.
*
* @param[in] ibref vector containing True/False regarding refinement for each element
*
* @return the refined mesh
*
*/
Mesh RefineElements(std::vector<bool> const &ibref);
/**
* Refines all elements in the actual mesh.
*
* @param[in] nref number of regular refinements to perform
*
*/
void RefineAllElements(int nref = 1);
/**
* Accesses the father-of-nodes relation.
*
* @return father-of-nodes relation [nnodes][2]
*
*/
std::vector<int> const &GetFathersOfVertices() const override
{
return _vfathers;
}
protected:
/**
* Checks whether the array dimensions fit to their appropriate size parameters
* @return index vector.
*/
bool Check_array_dimensions() const override;
/**
* Permutes the vertex information in an edge based mesh.
*
* @param[in] old2new new indices of original vertices.
*/
void PermuteVertices_EdgeBased(std::vector<int> const &old2new);
private:
//Mesh const & _cmesh; //!< coarse mesh
std::vector<bool> const _ibref; //!< refinement info
int _nref; //!< number of regular refinements performed
std::vector<int> _vfathers; //!< stores the 2 fathers of each vertex (equal fathers denote original coarse vertex)
};
// *********************************************************************
class gMesh_Hierarchy
{
public:
/**
* Constructs mesh hierarchy of @p nlevel levels starting with coarse mesh @p cmesh.
* The coarse mesh @p cmesh will be @p nlevel-1 times geometrically refined.
*
* @param[in] cmesh initial coarse mesh
* @param[in] nlevel number levels in mesh hierarchy
*
*/
gMesh_Hierarchy(Mesh const &cmesh, int nlevel);
size_t size() const
{
return _gmesh.size();
}
/**
* Access to mesh @p lev from mesh hierarchy.
*
* @return mesh @p lev
* @warning An out_of_range exception might be thrown.
*
*/
Mesh const &operator[](int lev) const
{
return *_gmesh.at(lev);
}
/**
* Access to finest mesh in mesh hierarchy.
*
* @return finest mesh
*
*/
Mesh const &finest() const
{
return *_gmesh.back();
}
/**
* Access to coarest mesh in mesh hierarchy.
*
* @return coarsest mesh
*
*/
Mesh const &coarsest() const
{
return *_gmesh.front();
}
private:
std::vector<std::shared_ptr<Mesh>> _gmesh; //!< mesh hierarchy from coarse ([0]) to fine.
};
// *********************************************************************
/**
* 2D finite element mesh of the square consisting of linear triangular elements.
*/
class Mesh_2d_3_square: public Mesh
{
public:
/**
* Generates the f.e. mesh for the unit square.
*
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[in] myid my MPI-rank / subdomain
* @param[in] procx number of ranks/subdomains in x-direction
* @param[in] procy number of processes in y-direction
*/
Mesh_2d_3_square(int nx, int ny, int myid = 0, int procx = 1, int procy = 1);
/**
* Destructor
*/
~Mesh_2d_3_square() override;
/**
* Set solution vector based on a tensor product grid in the rectangle.
* @param[in] u solution vector
*/
void SetU(std::vector<double> &u) const;
/**
* Set right hand side (rhs) vector on a tensor product grid in the rectangle.
* @param[in] f rhs vector
*/
void SetF(std::vector<double> &f) const;
/**
* Determines the indices of those vertices with Dirichlet boundary conditions
* @return index vector.
*/
std::vector<int> Index_DirichletNodes() const override;
std::vector<int> Index_BoundaryNodes() const override;
/**
* Stores the values of vector @p u of (sub)domain into a file @p name for further processing in gnuplot.
* The file stores rowise the x- and y- coordinates together with the value from @p u .
* The domain [@p xl, @p xr] x [@p yb, @p yt] is discretized into @p nx x @p ny intervals.
*
* @param[in] name basename of file name (file name will be extended by the rank number)
* @param[in] u local vector
*
* @warning Assumes tensor product grid in unit square; rowise numbered
* (as generated in class constructor).
* The output is provided for tensor product grid visualization
* ( similar to Matlab-surf() ).
*
* @see Mesh_2d_3_square
*/
void SaveVectorP(std::string const &name, std::vector<double> const &u) const;
// here will still need to implement in the class
// GetBound(), AddBound()
// or better a generalized way with indices and their appropriate ranks for MPI communication
private:
/**
* Determines the coordinates of the discretization nodes of the domain [@p xl, @p xr] x [@p yb, @p yt]
* which is discretized into @p nx x @p ny intervals.
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[in] xl x-coordinate of left boundary
* @param[in] xr x-coordinate of right boundary
* @param[in] yb y-coordinate of lower boundary
* @param[in] yt y-coordinate of upper boundary
* @param[out] xc coordinate vector of length 2n with x(2*k,2*k+1) as coordinates of node k
*/
void GetCoordsInRectangle(int nx, int ny, double xl, double xr, double yb, double yt,
double xc[]);
/**
* Determines the element connectivity of linear triangular elements of a FEM discretization
* of a rectangle using @p nx x @p ny equidistant intervals for discretization.
* @param[in] nx number of discretization intervals in x-direction
* @param[in] ny number of discretization intervals in y-direction
* @param[out] ia element connectivity matrix with ia(3*s,3*s+1,3*s+2) as node numbers od element s
*/
void GetConnectivityInRectangle(int nx, int ny, int ia[]);
private:
int _myid; //!< my MPI rank
int _procx; //!< number of MPI ranks in x-direction
int _procy; //!< number of MPI ranks in y-direction
std::array<int, 4> _neigh; //!< MPI ranks of neighbors (negative: no neighbor but b.c.)
int _color; //!< red/black coloring (checker board) of subdomains
double _xl; //!< x coordinate of lower left corner of square
double _xr; //!< x coordinate of lower right corner of square
double _yb; //!< y coordinate or lower left corner of square
double _yt; //!< y coordinate of upper right corner of square
int _nx; //!< number of intervals in x-direction
int _ny; //!< number of intervals in y-direction
};
// *********************************************************************
#endif

106
ex7/ex7_4/ex7_4/main.cpp Normal file
View file

@ -0,0 +1,106 @@
#include "geom.h"
#include "par_geom.h"
#include "vdop.h"
#include <cassert>
#include <cmath>
#include <iostream>
#include <mpi.h> // MPI
#include <omp.h> // OpenMP
using namespace std;
int main(int argc, char **argv )
{
MPI_Init(&argc, &argv);
MPI_Comm const icomm(MPI_COMM_WORLD);
omp_set_num_threads(1); // don't use OMP parallelization for a start
//
{
int np;
MPI_Comm_size(icomm, &np);
//assert(4 == np); // example is only provided for 4 MPI processes
}
// #####################################################################
// ---- Read the f.e. mesh and the mapping of elements to MPI processes
//Mesh const mesh_c("square_4.txt"); // Files square_4.txt and square_4_sd.txt are needed
ParMesh const mesh("square", icomm);
int const numprocs = mesh.NumProcs();
int const myrank = mesh.MyRank();
if ( 0 == myrank ) {
cout << "\n There are " << numprocs << " processes running.\n \n";
}
int const check_rank=0; // choose the MPI process you would like to check the mesh
//if ( check_rank == myrank ) mesh.Debug();
//if ( check_rank == myrank ) mesh.DebugEdgeBased();
// -------------------- E9 --------------------
vector<double> xl(mesh.Nnodes(), 1.0);
// scalar product
double ss = mesh.dscapr(xl,xl);
if (myrank == check_rank)
{
cout << "-------------------- E9 --------------------" << endl;
cout << myrank << " : scalar : " << ss << endl << endl;
}
// check accumulation (by visualization)
mesh.VecAccu(xl);
if (check_rank==myrank) mesh.Visualize(xl);
// -------------------- E10 --------------------
vector<int> xl_int(mesh.Nnodes(), 1);
// check accumulation (by console output)
mesh.VecAccu(xl_int);
vector<double> coords = mesh.GetCoords();
if (check_rank == myrank)
{
cout << "-------------------- E10 --------------------" << endl;
for (size_t i = 0; i < coords.size(); i += 2)
{
cout << "(" << coords[i] << ", " << coords[i + 1] << "):\t" << xl_int[i/2] << endl;
}
}
// -------------------- E11 --------------------
int global_nodes = mesh.GlobalNodes();
if (check_rank == myrank)
{
cout << "-------------------- E11 --------------------" << endl;
cout << "Global nodes: " << global_nodes << endl;
}
// -------------------- E12 --------------------
vector<double> xl_new(mesh.Nnodes(), 1.0);
// check averaging at the interfaces (by visualization)
mesh.Average(xl_new);
if (check_rank==myrank) mesh.Visualize(xl_new);
// -------------------- E13 --------------------
// In terminal: /usr/local/MATLAB/R2025b/bin/matlab < square_2.m
// I then had to rename square_2.txt and square_2_sd.txt manually
// Run in terminal:
// /usr/bin/mpirun --oversubscribe -display-map -mca btl ^openib -np 2 ./main.GCC_
// /usr/bin/mpirun --oversubscribe -display-map -mca btl ^openib -np 1 ./main.GCC_
MPI_Finalize();
return 0;
}

View file

@ -0,0 +1,598 @@
// see: http://llvm.org/docs/CodingStandards.html#include-style
#include "vdop.h"
//#include "geom.h"
#include "par_geom.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <cmath>
#include <ctime> // contains clock()
#include <fstream>
#include <iostream>
#include <list>
#include <numeric> // accumulate()
#include <string>
#include <vector>
using namespace std;
ParMesh::ParMesh(int ndim, int nvert_e, int ndof_e, int nedge_e, MPI_Comm const &icomm)
: Mesh(ndim, nvert_e, ndof_e, nedge_e),
_icomm(icomm), _numprocs(-1), _myrank(-1),
_v_l2g(0), _t_l2g(0), _v_g2l{{}}, _t_g2l{{}}, _valence(0),
_sendbuf(0), _sendcounts(0), _sdispls(0),
_loc_itf(0), _gloc_itf(0), _buf2loc(0)
{
MPI_Comm_size(icomm, &_numprocs);
MPI_Comm_rank(icomm, &_myrank);
}
ParMesh::~ParMesh()
{}
ParMesh::ParMesh(std::string const &sname, MPI_Comm const &icomm)
: ParMesh(2, 3, 3, 3, icomm) // two dimensions, 3 vertices, 3 dofs, 3 edges per element
{
//const int numprocs = _icomm.Get_size();
const string NS = "_" + to_string(_numprocs);
const string fname = sname + NS + ".txt";
//cout << "############ " << fname << endl;
ReadVertexBasedMesh(fname);
cout << "\n End of sequential File read \n";
// ------------------------------------------------------------------------------
// Until this point a l l processes possess a l l mesh info in g l o b a l numbering
//
// Now, we have to select the data belonging to my_rank
// and we have to create the mapping local to global (l2g) and vice versa (g2l)
// ------------------------------------------------------------------------------
// save the global node mesh (maybe we need it later)
DeriveEdgeFromVertexBased(); // and even more
Mesh global_mesh(*this); // requires a l o t of memory
Del_EdgeConnectivity();
// read the subdomain info
const string dname = sname + NS + "_sd" + ".txt";
vector<int> t2d = ReadElementSubdomains(dname); // global mapping triangle to subdomain for all elements
//const int myrank = _icomm.Get_rank();
Transform_Local2Global_Vertex(_myrank, t2d); // Vertex based mesh: now in l o c a l indexing
DeriveEdgeFromVertexBased(); // Generate also the l o c a l edge based information
Generate_VectorAdd();
// Now we have to organize the MPI communication of vertices on the subdomain interfaces
return;
}
vector<int> ParMesh::ReadElementSubdomains(string const &dname)
{
ifstream ifs(dname);
if (!(ifs.is_open() && ifs.good())) {
cerr << "ParMesh::ReadElementSubdomain: Error cannot open file " << dname << endl;
assert(ifs.is_open());
}
int const OFFSET{1}; // Matlab to C indexing
cout << "ASCI file " << dname << " opened" << endl;
// Read some mesh constants
int nelem;
ifs >> nelem;
cout << nelem << " " << Nelems() << endl;
assert( Nelems() == nelem);
// Allocate memory
vector<int> t2d(nelem, -1);
// Read element mapping
for (int k = 0; k < nelem; ++k) {
int tmp;
ifs >> tmp;
//t2d[k] = tmp - OFFSET;
// 2020-01-08
t2d[k] = min(tmp, NumProcs()) - OFFSET;
}
return t2d;
}
void ParMesh::Transform_Local2Global_Vertex(int const myrank, vector<int> const &t2d)
{
// number of local elements
const int l_ne = count(t2d.cbegin(), t2d.cend(), myrank);
//cout << myrank << ":: " << lne << endl;
vector<int> l_ia(l_ne * NverticesElements(), -1); // local elements still with global vertex numbers
_t_l2g.resize(l_ne, -1);
int lk = 0;
for (size_t k = 0; k < t2d.size(); ++k) {
if (myrank == t2d[k]) {
//if (0==myrank)
//{
//cout << lk << " k " << t2d[k] << endl;
//}
l_ia[3 * lk ] = _ia[3 * k ];
l_ia[3 * lk + 1] = _ia[3 * k + 1];
l_ia[3 * lk + 2] = _ia[3 * k + 2]; // local elements still with global vertex numbers
_t_l2g[lk] = k; // elements: local to global mapping
_t_g2l[k] = lk; // global to local
++lk;
}
}
// Checks:
assert( count(l_ia.cbegin(), l_ia.cend(), -1) == 0 );
assert( count(_t_l2g.cbegin(), _t_l2g.cend(), -1) == 0 );
// Vertices: local to global mapping
auto tmp = l_ia;
sort(tmp.begin(), tmp.end());
auto ip = unique(tmp.begin(), tmp.end());
tmp.erase(ip, tmp.end());
_v_l2g = tmp; // Vertices: local to global mapping
for (size_t lkv = 0; lkv < _v_l2g.size(); ++lkv) {
_v_g2l[_v_l2g[lkv]] = lkv; // global to local
}
// Boundary edges
vector<int> l_bedges;
vector<int> l_sdedges;
for (size_t b = 0; b < _bedges.size(); b += 2) {
int const v1 = _bedges[b ]; // global vertex numbers
int const v2 = _bedges[b + 1];
try {
int const lv1 = _v_g2l.at(v1); // map[] would add that element
int const lv2 = _v_g2l.at(v2); // but at() throws an exeption
l_bedges.push_back(lv1);
l_bedges.push_back(lv2); // Boundaries: already in local indexing
// 2020-01-08
l_sdedges.push_back(_sdedges[b ]);
l_sdedges.push_back(_sdedges[b+1]);
}
catch (std::out_of_range & err) {
//cerr << ".";
}
}
// number of local vertices
const int l_nn = _v_l2g.size();
vector<double> l_xc(Ndims()*l_nn);
for (int lkk = 0; lkk < l_nn; ++lkk) {
int k = _v_l2g.at(lkk);
l_xc[2 * lkk ] = _xc[2 * k ];
l_xc[2 * lkk + 1] = _xc[2 * k + 1];
}
// Now, we represent the vertex mesh in l o c a l numbering
// elements
for (size_t i = 0; i < l_ia.size(); ++i) {
l_ia[i] = _v_g2l.at(l_ia[i]); // element vertices: global to local
}
SetNelem(l_ne);
_ia = l_ia;
// boundary
_bedges = l_bedges;
_sdedges = l_sdedges;
// coordinates
SetNnode(l_nn);
_xc = l_xc;
return;
}
void ParMesh::Generate_VectorAdd()
{
// Some checks
int lnn = Nnodes(); // local number of vertices
assert(static_cast<int>(_v_l2g.size()) == lnn);
int ierr{-12345};
// ---- Determine global largest vertex index
int gidx_max{-1}; // global largest vertex index
int lmax = *max_element(_v_l2g.cbegin(), _v_l2g.cend());
MPI_Allreduce(&lmax, &gidx_max, 1, MPI_INT, MPI_MAX, _icomm);
int gidx_min{-1}; // global smallest vertex index
int lmin = *min_element(_v_l2g.cbegin(), _v_l2g.cend());
MPI_Allreduce(&lmin, &gidx_min, 1, MPI_INT, MPI_MIN, _icomm);
//cout << gidx_min << " " << gidx_max << endl;
assert(0 == gidx_min); // global indices have to start with 0
// ---- Determine for all global vertices the number of subdomains it belongs to
vector<int> global(gidx_max+1, 0); // global scalar array for vertices
for (auto const gidx : _v_l2g) global[gidx] = 1;
// https://www.mpi-forum.org/docs/mpi-2.2/mpi22-report/node109.htm
ierr = MPI_Allreduce(MPI_IN_PLACE, global.data(), global.size(), MPI_INT, MPI_SUM, _icomm);
//if (0 == MyRank()) cout << global << endl;
//MPI_Barrier(_icomm);
//cout << _xc[2*_v_g2l.at(2)] << " , " << _xc[2*_v_g2l.at(2)+1] << endl;
//MPI_Barrier(_icomm);
// now, global[] contains the number of subdomains a global vertex belongs to
if ( count(global.cbegin(), global.cend(), 0) > 0 )
cerr << "\n !!! Non-continuous global vertex indexing !!!\n";
// ---- Determine local interface vertices ( <==> global[] > 1 )
// _loc_itf, neigh_itf
//vector<int> loc_itf; // local indices of interface vertices on this MPI process
for (size_t lk = 0; lk < _v_l2g.size(); ++lk) {
int const gk = _v_l2g[lk]; // global index of local vertex lk
if ( global[gk] > 1 ) {
_loc_itf.push_back(lk); // local indices of interface vertices on this MPI process
}
}
//MPI_Barrier(_icomm);
//if (0 == MyRank()) cout << "\n..._loc_itf...\n" << _loc_itf << "\n......\n";
//MPI_Barrier(_icomm);
// ---- global indices of local interface vertices
//auto gloc_itf(_loc_itf);
_gloc_itf=_loc_itf;
for_each(_gloc_itf.begin(), _gloc_itf.end(), [this] (auto & v) -> void { v = _v_l2g[v];} );
//MPI_Barrier(_icomm);
//if (0 == MyRank()) cout << "\n..._gloc_itf...\n" << _gloc_itf << "\n......\n";
//DebugVector(_gloc_itf,"_gloc_itf");
// ---- Determine the global length of interfaces
vector<int> vnn(NumProcs(), -1); // number of interface vertices per MPI rank
int l_itf(_loc_itf.size()); // # local interface vertices
ierr = MPI_Allgather(&l_itf, 1, MPI_INT, vnn.data(), 1, MPI_INT, _icomm);
assert(0 == ierr);
//cout << vnn << endl;
// ---- Now we consider only the inferface vertices
int snn = accumulate(vnn.cbegin(), vnn.cend(), 0); // required length of array for global interface indices
//cout << snn << " " << gnn << endl;
vector<int> dispnn(NumProcs(), 0) ; // displacement of interface vertices per MPI rank
partial_sum(vnn.cbegin(), vnn.cend() - 1, dispnn.begin() + 1);
//cout << dispnn << endl;
// ---- Get the global indices for all global interfaces
vector<int> g_itf(snn, -1); // collects all global indices of the global interfaces
// https://www.mpich.org/static//docs/v3.0.x/www3/MPI_Gatherv.html
ierr = MPI_Gatherv( _gloc_itf.data(), _gloc_itf.size(), MPI_INT,
g_itf.data(), vnn.data(), dispnn.data(), MPI_INT, 0, _icomm);
assert(0 == ierr);
// https://www.mpich.org/static/docs/v3.1/www3/MPI_Bcast.html
ierr = MPI_Bcast(g_itf.data(), g_itf.size(), MPI_INT, 0, _icomm);
assert(0 == ierr); // Now, each MPI rank has the all global indices of the global interfaces
//MPI_Barrier(_icomm);
//if (MyRank() == 0) cout << "\n...g_itf...\n" << g_itf << "\n......\n";
//MPI_Barrier(_icomm);
// ----- Determine all MPI ranks a local interface vertex belongs to
vector<vector<int>> neigh_itf(_loc_itf.size());// subdomains a local interface vertex belongs to
for (size_t lk = 0; lk < _loc_itf.size(); ++lk) {
const int gvert = _gloc_itf[lk]; // global index of local interface node lk
for (int rank = 0; rank < NumProcs(); ++rank) {
auto const startl = g_itf.cbegin() + dispnn[rank];
auto const endl = startl + vnn[rank];
if ( find( startl, endl, gvert) != endl) {
neigh_itf[lk].push_back(rank);
}
}
}
// ---- check the available info in _loc_itf[lk], _gloc_itf[lk], neigh_itf[lk]
//MPI_Barrier(_icomm);
////if (MyRank()==0) cout << "\n...neigh_itf ...\n" << neigh_itf << endl;
//if (MyRank() == 0) {
//for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
//cout << lk << " : local idx " << _loc_itf[lk] << " , global idx " << _gloc_itf[lk];
//cout << " with MPI ranks " << neigh_itf[lk] << endl;
//}
//}
//MPI_Barrier(_icomm);
// ---- store the valence (e.g., the number of subdomains it belongs to) of all local vertices
_valence.resize(Nnodes(),1);
for (size_t lk = 0; lk < _loc_itf.size(); ++lk)
{
_valence[_loc_itf[lk]] = neigh_itf[lk].size();
}
//DebugVector(_valence,"_valence",_icomm);
// ---- We ware going to use MPI_Alltoallv for data exchange on interfaces
// https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node109.htm#Node109
// https://www.open-mpi.org/doc/v4.0/man3/MPI_Alltoallv.3.php
//int MPI_Alltoallv(const void* sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void* recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm)
//
// MPI_Alltoallv needs:
// vector<double> sendbuf (MPI_IN_PLACE: used also as recvbuf)
// vector<int> sendcounts (the same as for recv)
// vector<int> sdispls (the same as for recv)
//
// We need to map the interface vertices onto the sendbuffer:
// vector<int> loc_itf local index of interface vertex lk
// vector<int> gloc_itf global index of interface vertex lk
// vector<int> buf2loc local indices of sendbuffer positions (the same as for recv)
// ---- Determine sendcounts[] and sdipls[] from neigh_itf[]
//vector<int> _sendcounts(NumProcs(), 0);
_sendcounts.resize(NumProcs(), 0);
for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
auto const &kneigh = neigh_itf[lk];
for (size_t ns = 0; ns < kneigh.size(); ++ns) {
++_sendcounts[kneigh[ns]];
}
}
//if (MyRank() == 0) cout << "\n..._sendcounts ...\n" << _sendcounts << endl;
//vector<int> _sdispls(NumProcs(), 0);
_sdispls.resize(NumProcs(), 0);
partial_sum(_sendcounts.cbegin(), _sendcounts.cend() - 1, _sdispls.begin() + 1);
//vector<int> _sdispls(NumProcs()+1, 0);
//partial_sum(_sendcounts.cbegin(), _sendcounts.cend(), _sdispls.begin() + 1);
//if (MyRank() == 0) cout << "\n..._sdispls ...\n" << _sdispls << endl;
// ---- Determine size of buffer 'nbuffer' and mapping 'buf2loc'
int const nbuffer = accumulate(_sendcounts.cbegin(), _sendcounts.cend(), 0);
//vector<int> _buf2loc(nbuffer, -1);
_buf2loc.resize(nbuffer, -1);
int buf_idx = 0; // position in buffer
for (int rank = 0; rank < NumProcs(); ++rank) {
assert( buf_idx == _sdispls[rank]);
for (size_t lk = 0; lk < _loc_itf.size(); ++lk ) {
auto const &kneigh = neigh_itf[lk];
if (find(kneigh.cbegin(),kneigh.cend(),rank)!=kneigh.cend())
{
_buf2loc[buf_idx] = _loc_itf[lk];
++buf_idx;
}
}
}
//if (MyRank() == 0) cout << "\n...buf2loc ...\n" << buf2loc << endl;
//DebugVector(buf2loc,"buf2loc",_icomm);
// ---- Allocate send/recv buffer
//vector<double> _sendbuf(nbuffer,-1.0);
_sendbuf.resize(nbuffer,-1.0);
assert(CheckInterfaceExchange_InPlace());
cout << " Check of data exchange (InPlace) successful!\n";
assert(CheckInterfaceExchange());
cout << " Check of data exchange successful!\n";
assert(CheckInterfaceAdd_InPlace());
cout << " Check of data add successful!\n";
assert(CheckInterfaceAdd());
cout << " Check of data add (InPlace) successful!\n";
vector<double> x(Nnodes(),-1.0);
VecAccu(x);
cout << " VecAccu (InPlace) successful!\n";
return;
}
bool ParMesh::CheckInterfaceExchange_InPlace() const
{
vector<double> x(Nnodes(),-1.0);
copy(_v_l2g.cbegin(),_v_l2g.cend(),x.begin()); // init x with global vertex indices
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = x[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
vector<double> y(x);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = -1.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
y[_buf2loc.at(ls)] = _sendbuf[ls];
}
double const eps=1e-10;
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
return bv;
}
bool ParMesh::CheckInterfaceExchange() const
{
vector<double> x(Nnodes(),-1.0);
copy(_v_l2g.cbegin(),_v_l2g.cend(),x.begin()); // init x with global vertex indices
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = x[_buf2loc.at(ls)];
}
vector<double> recvbuf(_sendbuf.size());
int ierr = MPI_Alltoallv(_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
recvbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
//DebugVector(recvbuf,"recvbuf",_icomm);
assert(ierr==0);
vector<double> y(x);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = -1.0; // only for interface nodes
for(size_t ls = 0; ls<recvbuf.size(); ++ls)
{
y[_buf2loc.at(ls)] = recvbuf[ls];
}
//cout << "WRONG : " << count(y.cbegin(),y.cend(), -1.0) << endl;
double const eps=1e-10;
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
return bv;
}
bool ParMesh::CheckInterfaceAdd_InPlace() const
{
vector<double> x(Nnodes(),-1.0);
for (size_t i=0; i<x.size(); ++i)
{
x[i] = _xc[2*i]+_xc[2*i+1]; // init x with coordinate values
}
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = x[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
vector<double> y(x);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
y[_buf2loc.at(ls)] += _sendbuf[ls];
}
MPI_Barrier(_icomm);
//DebugVector(x,"x",_icomm);
//DebugVector(y,"y",_icomm);
for (size_t i= 0; i<y.size(); ++i) y[i]/=_valence[i]; // divide by valence
double const eps=1e-10;
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
return bv;
}
bool ParMesh::CheckInterfaceAdd() const
{
vector<double> x(Nnodes(),-1.0);
for (size_t i=0; i<x.size(); ++i)
{
//x[i] = _xc[2*i]+_xc[2*i+1]; // init x with coordinate values
x[i] = _v_l2g[i];
}
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = x[_buf2loc.at(ls)];
}
vector<double> recvbuf(_sendbuf.size());
int ierr = MPI_Alltoallv(_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
recvbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
//DebugVector(recvbuf,"recvbuf",_icomm);
assert(ierr==0);
vector<double> y(x);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) y[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<recvbuf.size(); ++ls)
{
//if (0==MyRank()) cout << ls << ": " << _buf2loc.at(ls) << " " << y[_buf2loc.at(ls)] << "("<< x[_buf2loc.at(ls)] << ")" << " " << recvbuf[ls] << " (" << _sendbuf[ls] << ")" << endl;
y[_buf2loc.at(ls)] += recvbuf[ls];
}
MPI_Barrier(_icomm);
//DebugVector(x,"x",_icomm);
//DebugVector(y,"y",_icomm);
for (size_t i= 0; i<y.size(); ++i) y[i]/=_valence[i]; // divide by valence
double const eps=1e-10;
bool bv = equal(x.cbegin(),x.cend(),y.cbegin(),
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
return bv;
}
// ----------
void ParMesh::VecAccu(std::vector<double> &w) const
{
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = w[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
w[_buf2loc.at(ls)] += _sendbuf[ls];
}
return;
}
void ParMesh::VecAccu(std::vector<int> &w) const
{
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = w[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_INT,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_INT, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk) w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
w[_buf2loc.at(ls)] += _sendbuf[ls];
}
return;
}
int ParMesh::GlobalNodes() const
{
int local_nodes = _nnode;
int global_nodes = 0;
MPI_Allreduce(&local_nodes, &global_nodes, 1, MPI_INT, MPI_SUM, _icomm);
return global_nodes;
}
void ParMesh::Average(std::vector<double> &w) const
{
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
_sendbuf[ls] = w[_buf2loc.at(ls)];
}
int ierr = MPI_Alltoallv(MPI_IN_PLACE, _sendcounts.data(), _sdispls.data(), MPI_DOUBLE,
_sendbuf.data(), _sendcounts.data(), _sdispls.data(), MPI_DOUBLE, _icomm);
assert(ierr==0);
//DebugVector(_sendbuf,"_sendbuf",_icomm);
for(size_t lk = 0; lk<_loc_itf.size(); ++lk)
{
w[_loc_itf.at(lk)] = 0.0; // only for interface nodes
}
for(size_t ls = 0; ls<_sendbuf.size(); ++ls)
{
w[_buf2loc.at(ls)] += _sendbuf[ls];
}
// for nodes at interface: additionally divide by number of subdomains they belong to (valence)
for(size_t lk = 0; lk<_loc_itf.size(); ++lk)
{
w[_loc_itf.at(lk)] /= _valence[_loc_itf.at(lk)];
}
return;
}

153
ex7/ex7_4/ex7_4/par_geom.h Normal file
View file

@ -0,0 +1,153 @@
#ifndef PAR_GEOM_FILE
#define PAR_GEOM_FILE
#include "geom.h"
#include "vdop.h"
#include <array>
#include <functional> // function; C++11
#include <iostream>
#include <map>
#include <memory> // shared_ptr
#include <mpi.h> // MPI
#include <string>
#include <vector>
class ParMesh: public Mesh
{
public:
/**
* Constructor initializing the members with default values.
*
* @param[in] ndim space dimensions (dimension for coordinates)
* @param[in] nvert_e number of vertices per element (dimension for connectivity)
* @param[in] ndof_e degrees of freedom per element (= @p nvert_e for linear elements)
* @param[in] nedge_e number of edges per element (= @p nvert_e for linear elements in 2D)
* @param[in] icomm MPI communicator
*/
explicit ParMesh(int ndim, int nvert_e = 0, int ndof_e = 0, int nedge_e = 0, MPI_Comm const &icomm = MPI_COMM_WORLD);
ParMesh(ParMesh const &) = default;
ParMesh &operator=(ParMesh const &) = delete;
/**
* Destructor.
*
* See clang warning on
* <a href="https://stackoverflow.com/questions/28786473/clang-no-out-of-line-virtual-method-definitions-pure-abstract-c-class/40550578">weak-vtables</a>.
*/
virtual ~ParMesh();
/**
* Reads mesh data from a binary file.
*
* @param[in] sname suffix of file name
* @param[in] icomm MPI communicator
* @see ascii_write_mesh.m for the file format.
*/
explicit ParMesh(std::string const &sname, MPI_Comm const &icomm = MPI_COMM_WORLD);
void VecAccu(std::vector<double> &w) const;
void VecAccu(std::vector<int> &w) const;
/** Inner product
* @param[in] x vector
* @param[in] y vector
* @return resulting Euclidian inner product <x,y>
*/
double dscapr(std::vector<double> const &x, std::vector<double> const &y) const
{
return par_scalar(x, y, _icomm);
}
int GlobalNodes() const;
void Average(std::vector<double> &w) const;
private:
/**
* Reads the global triangle to subdomain mapping.
*
* @param[in] dname file name
*
* @see ascii_write_subdomains.m for the file format
*/
std::vector<int> ReadElementSubdomains(std::string const &dname);
/**
* Transform
*
* @param[in] myrank MPI rank of this process
* @param[in] t2d global mapping triangle to subdomain for all elements (vertex based)
*/
void Transform_Local2Global_Vertex(int myrank, std::vector<int> const &t2d);
/**
* Transform
*/
void Generate_VectorAdd();
bool CheckInterfaceExchange_InPlace() const;
bool CheckInterfaceExchange() const;
bool CheckInterfaceAdd_InPlace() const;
bool CheckInterfaceAdd() const;
public:
/** MPI rank of the calling process in communication group.
*
* @return MPI rank of the calling process
*/
int MyRank() const
{
return _myrank;
}
/** Number of MPI processes in communication group.
*
* @return Number of MPI processes
*/
int NumProcs() const
{
return _numprocs;
}
/** Returns recent
* @return MPI communicator
*/
MPI_Comm GetCommunicator() const
{
return _icomm;
}
private:
// Don't use &_icomm ==> Error
MPI_Comm const _icomm; //!< MPI communicator for the group of processes
int _numprocs; //!< number of MPI processes
int _myrank; //!< my MPI rank
std::vector<int> _v_l2g; //!< vertices: local to global mapping
std::vector<int> _t_l2g; //!< triangles: local to global mapping
std::map<int, int> _v_g2l; //!< vertices: global to local mapping
std::map<int, int> _t_g2l; //!< triangles: global to local mapping
//std::vector<int> e_l2g; //!< edges: local to global mapping
std::vector<int> _valence; //!< valence of local vertices, i.e. number of subdomains they belong to
// MPI_Alltoallv needs:
mutable std::vector<double> _sendbuf; //!< send buffer a n d receiving buffer (MPI_IN_PLACE)
std::vector<int> _sendcounts; //!< number of data to send to each MPI rank (the same as for recv)
std::vector<int> _sdispls; //!< offset of data to send to each MPI rank wrt. _senbuffer (the same as for recv)
//
// We need to map the interface vertices onto the sendbuffer:
std::vector<int> _loc_itf; //!< local index of interface vertex lk
std::vector<int> _gloc_itf; //!< global index of interface vertex lk
std::vector<int> _buf2loc; //!< local indices of sendbuffer positions (the same as for recv)
};
#endif

View file

@ -0,0 +1,71 @@
% Square:
% flatpak run org.octave.Octave <filename>
% or
% octave --no-window-system --no-gui -qf <filename>
clear all
clc
% %% L-shape
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 1 0;
% 2 2 1 1 0.5 1 0;
% 2 1 1 0.5 2 1 0;
% 2 1 0 2 2 1 0;
% 2 0 0 2 0 1 0]';
%% square
g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 0 1 1 0;
2 1 0 1 1 1 0;
2 0 0 1 0 1 0]';
% %% 2 squares
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 2;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0;
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 2 0;
% 2 2 1 1 1 2 0
% ]';
%% 4 squares
%g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 2;
% 2 1 0 1 1 1 3;
% 2 0 0 1 0 1 0;
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 2 0;
% 2 2 1 1 1 2 4;
% 2 1 1 1 0 2 1;
% 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 1 2 3 4;
% 2 1 0 2 2 3 0;
% 2 0 0 2 1 3 0;
% 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 1 2 4 0;
% 2 2 1 2 2 4 0
% 2 1 1 2 1 4 3
% ]';
[p,e,t] = initmesh(g,'hmax',0.1);
pdemesh(p,e,t)
%% GH
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
%
% coordinates p: [2][nnode]
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
ascii_write_mesh( p, t, e, mfilename);
ascii_write_subdomains( p, t, e, mfilename);
% tmp=t(1:3,:)

View file

@ -0,0 +1,558 @@
185
2
328
3
0 0
1 0
1 1
0 1
0.1 0
0.2 0
0.3 0
0.4 0
0.5 0
0.6 0
0.7 0
0.8 0
0.9 0
1 0.1
1 0.2
1 0.3
1 0.4
1 0.5
1 0.6
1 0.7
1 0.8
1 0.9
0.9 1
0.8 1
0.7 1
0.6 1
0.5 1
0.4 1
0.3 1
0.2 1
0.09999999999999998 1
0 0.9
0 0.8
0 0.7
0 0.6
0 0.5
0 0.4
0 0.3
0 0.2
0 0.09999999999999998
0.479527812121941 0.5130906855242849
0.04811823180383969 0.0438546166507735
0.9563795599175402 0.04804788885032887
0.04386052966676572 0.9518771386464404
0.9519837650414075 0.9564710564643847
0.7547325605803197 0.3556873639095056
0.360761237685547 0.2648713558910208
0.2679955285799354 0.6367316063076108
0.6565311368147725 0.7282810558667158
0.08977947929390173 0.1606758305515394
0.16074961553105 0.9101421685880838
0.9114032959455121 0.8441430342815788
0.8412127393896983 0.09032345243962579
0.4520016091121353 0.7693672638162853
0.5579173186257795 0.2370236678206477
0.2298339342907983 0.4531168320939027
0.6870727978845249 0.5344182069836515
0.8145967747561291 0.6882672637836746
0.2590391808211707 0.1580373572085882
0.8521442539624974 0.2588869931794255
0.1588406321984697 0.740526673693239
0.7456209517439575 0.8415774916282844
0.7341263127877148 0.09280026881354901
0.2671693065747615 0.909496199134008
0.09033464668980155 0.2671883646221719
0.8457896962288601 0.4896000777650233
0.3404825814904102 0.5045570302191981
0.4828554674400058 0.3357874026189672
0.5328318433844601 0.6502074760223741
0.6097225090876653 0.8567940574787075
0.3906433840357024 0.1354694263186264
0.1378922468965601 0.6086349988482294
0.7110817229397154 0.2318431574832541
0.3117181579370819 0.7783142400205344
0.220949681304264 0.310667563028674
0.5425918070322489 0.07560610459253665
0.4430039145575841 0.9116370271655028
0.08740928339643227 0.4436913622237557
0.7188986206847875 0.6636965188667157
0.9110251036728165 0.6437697100153658
0.9102437695032523 0.7452606401269005
0.1758720830768971 0.1044756994214669
0.8258438045125337 0.8981667834525472
0.1046890874548641 0.8239996484245049
0.8991462273231223 0.1744109197267267
0.6051175206745411 0.4230204952626953
0.3969635264466733 0.6241613575135659
0.3715057937073831 0.3880149382253817
0.8242474441869813 0.5849955263397347
0.8249925781817751 0.3740463471567774
0.524857955574242 0.7417226727211995
0.4596212700192013 0.2593236862528195
0.2616272257729989 0.5362197465141072
0.606859389427135 0.5708877499579769
0.6628384081676978 0.1354434799258081
0.1417735759502206 0.3537206398808048
0.3535213476926991 0.857695106111649
0.8243721350238892 0.7953207341692213
0.1809418706291789 0.2179884723304038
0.2184182171811329 0.8187415498053924
0.7884319570292208 0.183777415436584
0.9200690792392709 0.4495615809861258
0.4451391682507762 0.07311678395656888
0.5521636044351103 0.9216944906274662
0.07645110424394973 0.5525170940538346
0.6459882520842987 0.9291570899039412
0.06913750986171868 0.645365744095622
0.3540126618242007 0.06796940478874185
0.3350672820198897 0.1870674574637768
0.2681458646222014 0.07565948812083768
0.9304278242313601 0.2557924725551748
0.1888675945362029 0.6639844453684917
0.07615275558574391 0.7316010144871834
0.7330914849187046 0.9235761148662579
0.6708523871490314 0.8055961987547425
0.9254996827062275 0.09229062813530801
0.07558485132732408 0.9074193007828303
0.9079441512159013 0.9258426671715853
0.09255203863622692 0.07554591808558819
0.5680278383791504 0.3330895372138523
0.6999347455251212 0.4427774934447353
0.9169029401490904 0.54449677714876
0.7384739303161019 0.7539389212238149
0.3890119777777613 0.7104484710322342
0.2878025148437066 0.3851241789215418
0.629872124120922 0.6544815099998712
0.9007264265272032 0.3468323759374454
0.411279904374581 0.4567615871148493
0.4654915406621035 0.5928705227932682
0.5147995837137257 0.8309665940332217
0.1657576147654025 0.5169080635939939
0.4842020928392167 0.1607795698062726
0.7650269169529201 0.5114318451419555
0.7727886666860844 0.2740976924940586
0.2419510570158952 0.7238088796353837
0.2752622996523991 0.2404916166572419
0.4695150046459641 0.7063259741370521
0.3347920785441169 0.583686902316204
0.4092342435672485 0.3276796179871309
0.5063047985714836 0.4204246666548196
0.2916489655260076 0.4637829403109103
0.5599640954591117 0.4974158353977953
0.4058103707982076 0.2011311049665246
0.2039249392178127 0.5928750846678578
0.5939173661963685 0.7794857474330111
0.7083501471507929 0.1677776788423718
0.2871826927671263 0.8416108907006823
0.6452139711904199 0.198163050302754
0.1580509567525808 0.2869843728474166
0.06368210143114308 0.3527506715437289
0.6379941184879288 0.07475319919242245
0.352513946674906 0.9360345024951773
0.4295894166119973 0.8326060111402684
0.1662397140312891 0.4305970833461407
0.7747686535289771 0.6310503316801724
0.3210854659795955 0.1245584034703338
0.1257665255187338 0.6783828888007957
0.6810504641157901 0.8716254757826338
0.8566662522803288 0.4178384718512468
0.1420330245501709 0.04491064793897617
0.9561739529171728 0.141583384682825
0.8584457296716212 0.9560409249356761
0.04495620004107777 0.8579381142105297
0.5171767005108064 0.2908288148650228
0.7913019422018426 0.4313726220323573
0.6425026537330679 0.274463851858847
0.6633492402087484 0.3537206596142247
0.6768185989632656 0.6021176410080418
0.3093806310187861 0.7008983644164182
0.2975174797189623 0.3067216701578878
0.6319539373284636 0.4933605852039861
0.4046353295336855 0.5461031514210908
0.4362702351071284 0.3855427634700271
0.7411951440577802 0.5877461636960623
0.2088055498119787 0.3863707017530032
0.3866467015671689 0.79036800231969
0.8211383868684881 0.3213102960013891
0.3391487763640905 0.6517998595813517
0.4504917745206402 0.6572589474543384
0.3403171186033137 0.4396480629283334
0.3450512325288959 0.3342143314291603
0.5884423212101556 0.1464688161475419
0.5874820780580523 0.7111470153304259
0.5289939782208927 0.5649267052820812
0.7089627135729326 0.297207704024266
13 2 43
82 6 110
7 8 108
42 5 119
119 5 160
6 7 110
92 47 143
9 10 76
76 10 151
22 3 45
31 4 44
85 15 111
111 16 127
17 18 102
102 18 122
12 13 53
53 13 116
43 14 116
15 16 111
40 1 42
134 46 185
21 22 52
83 24 114
25 26 106
52 22 118
45 23 118
24 25 114
126 49 183
27 28 77
77 28 152
30 31 51
51 31 117
44 32 117
84 33 113
33 34 113
34 35 107
1 5 42
50 40 119
93 48 144
36 37 78
78 37 150
39 40 50
2 14 43
116 14 161
4 32 44
117 32 163
3 23 45
118 23 162
19 20 80
128 41 172
166 73 185
16 17 127
129 41 184
8 9 103
138 67 172
35 36 105
40 42 119
170 47 181
31 44 117
169 48 178
22 45 118
26 27 104
13 43 116
115 49 123
135 48 169
29 30 64
136 47 170
11 12 63
79 49 126
38 39 65
92 55 164
94 57 168
80 20 81
168 57 174
109 47 136
39 50 65
90 46 177
12 53 63
112 48 135
30 51 64
20 21 81
79 58 123
63 53 101
139 68 173
64 51 100
91 54 137
65 50 99
93 56 141
21 52 81
121 46 165
124 54 176
125 56 175
148 55 182
128 88 173
121 57 171
93 67 138
28 29 152
115 62 158
10 11 151
109 59 156
37 38 150
112 61 157
85 60 101
120 55 166
84 61 100
97 74 176
82 59 99
96 75 175
120 68 164
11 63 151
129 69 179
29 64 152
128 67 180
38 65 150
80 58 89
91 69 183
81 52 98
89 58 155
98 62 123
58 80 81
96 65 149
108 71 156
83 62 98
106 70 158
97 64 147
107 72 157
95 63 146
134 60 177
159 66 165
120 86 140
94 69 184
124 87 179
92 68 139
125 88 180
58 79 155
18 19 122
101 60 134
89 66 122
115 70 145
130 91 145
99 59 136
109 71 143
100 61 135
112 72 144
155 79 174
140 86 142
101 73 146
146 73 148
99 75 149
131 78 154
100 74 147
130 77 153
58 81 98
52 83 98
132 92 143
50 82 99
131 93 144
51 84 100
148 73 166
53 85 101
19 80 122
102 66 159
103 76 132
9 76 103
104 77 130
27 77 104
105 78 131
36 78 105
104 70 106
26 104 106
105 72 107
35 105 107
103 71 108
8 103 108
125 75 170
110 108 156
59 82 110
7 108 110
60 85 111
46 90 165
124 74 169
113 107 157
61 84 113
34 107 113
62 83 114
25 106 114
49 79 123
114 106 158
85 53 116
14 15 161
84 51 117
32 33 163
83 52 118
23 24 162
82 50 119
5 6 160
132 76 182
121 86 167
142 86 171
133 121 165
80 89 122
66 102 122
58 98 123
62 115 123
77 97 153
138 87 178
78 96 154
139 88 181
145 91 183
69 94 126
17 102 127
60 111 127
137 124 179
141 125 180
142 94 184
129 87 172
54 91 130
70 104 130
56 93 131
72 105 131
55 92 132
71 103 132
66 89 133
57 121 133
127 90 177
73 101 134
74 100 135
61 112 135
75 99 136
59 109 136
69 91 137
54 124 137
67 128 172
48 93 138
140 128 173
47 92 139
68 120 140
41 128 140
67 93 141
56 125 141
57 94 171
41 140 142
47 109 143
71 132 143
48 112 144
72 131 144
49 115 145
70 130 145
151 95 182
63 101 146
74 97 147
64 100 147
86 120 167
95 146 148
75 96 149
65 99 149
96 78 150
65 96 150
55 132 182
63 95 151
97 77 152
64 97 152
153 97 176
54 130 153
154 96 175
56 131 154
133 89 174
57 133 174
71 109 156
59 110 156
72 112 157
61 113 157
62 114 158
70 115 158
127 102 159
90 127 159
6 82 160
82 119 160
15 85 161
85 116 161
24 83 162
83 118 162
33 84 163
84 117 163
68 92 164
55 120 164
66 133 165
90 159 165
73 134 185
55 148 166
46 121 167
120 166 167
126 94 168
79 126 168
87 124 178
74 135 169
88 125 181
75 136 170
86 121 171
94 142 171
41 129 172
87 138 172
88 139 173
68 140 173
89 155 174
79 168 174
75 125 175
56 154 175
74 124 176
54 153 176
60 127 177
46 134 177
48 138 178
124 169 178
87 129 179
69 137 179
88 128 180
67 141 180
47 139 181
125 170 181
95 148 182
76 151 182
69 126 183
49 145 183
69 129 184
41 142 184
167 166 185
46 167 185
40
1 5
5 6
6 7
7 8
8 9
9 10
10 11
11 12
12 13
13 2
2 14
14 15
15 16
16 17
17 18
18 19
19 20
20 21
21 22
22 3
3 23
23 24
24 25
25 26
26 27
27 28
28 29
29 30
30 31
31 4
4 32
32 33
33 34
34 35
35 36
36 37
37 38
38 39
39 40
40 1

View file

@ -0,0 +1,329 @@
328
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1

View file

@ -0,0 +1,71 @@
% Square:
% flatpak run org.octave.Octave <filename>
% or
% octave --no-window-system --no-gui -qf <filename>
clear all
clc
% %% L-shape
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 1 0;
% 2 2 1 1 0.5 1 0;
% 2 1 1 0.5 2 1 0;
% 2 1 0 2 2 1 0;
% 2 0 0 2 0 1 0]';
%% square
% g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 0;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0]';
%% 2 squares
g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 0 1 1 2;
2 1 0 1 1 1 0;
2 0 0 1 0 1 0;
2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 2 2 0 1 2 0;
2 2 1 1 1 2 0
]';
%% 4 squares
%g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 2;
% 2 1 0 1 1 1 3;
% 2 0 0 1 0 1 0;
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 2 0;
% 2 2 1 1 1 2 4;
% 2 1 1 1 0 2 1;
% 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 1 2 3 4;
% 2 1 0 2 2 3 0;
% 2 0 0 2 1 3 0;
% 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 1 2 4 0;
% 2 2 1 2 2 4 0
% 2 1 1 2 1 4 3
% ]';
[p,e,t] = initmesh(g,'hmax',0.1);
pdemesh(p,e,t)
%% GH
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
%
% coordinates p: [2][nnode]
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
ascii_write_mesh( p, t, e, mfilename);
ascii_write_subdomains( p, t, e, mfilename);
% tmp=t(1:3,:)

1086
ex7/ex7_4/ex7_4/square_2.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,653 @@
652
1
2
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
2
1
1
1
1
2
2
2
2
1
1
1
1
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
2
2
1
1
1
1
1
1
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
2
2
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
1
1
1
1
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
1
1
2
2
1
1
2
2
1
1
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
1
1
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
1
1
2
2
1
1
2
2
2
2
2
2
1
1
2
2
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
2
2
1
1
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
2
2
2
2
2
2
1
1
2
2
2
2
1
1
1
1
1
1
1
1
2
2
2
2
2
2
1
1
1
1
2
2
1
1
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
1
1
1
1
1
1
2
2
2
2
1
1
2
2
2
2
2
1
2
1

View file

@ -0,0 +1,71 @@
% Square:
% flatpak run org.octave.Octave <filename>
% or
% octave --no-window-system --no-gui -qf <filename>
clear all
clc
% %% L-shape
% g=[2 0 2 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 1 0;
% 2 2 1 1 0.5 1 0;
% 2 1 1 0.5 2 1 0;
% 2 1 0 2 2 1 0;
% 2 0 0 2 0 1 0]';
%% square
% g=[2 0 1 0 0 1 0; % #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 0;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0]';
% %% 2 squares
% g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 1 1 0 1 1 2;
% 2 1 0 1 1 1 0;
% 2 0 0 1 0 1 0;
% 2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
% 2 2 2 0 1 2 0;
% 2 2 1 1 1 2 0
% ]';
%% 4 squares
g=[2 0 1 0 0 1 0; % 1 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 0 1 1 2;
2 1 0 1 1 1 3;
2 0 0 1 0 1 0;
2 1 2 0 0 2 0; % 2 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 2 2 0 1 2 0;
2 2 1 1 1 2 4;
% 2 1 1 1 0 2 1;
% 2 0 1 1 1 3 1; % 3 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 1 1 1 2 3 4;
2 1 0 2 2 3 0;
2 0 0 2 1 3 0;
% 2 1 2 1 1 4 2; % 4 #vertices,v_1x, v_2x, v_1y, v_2y, subdomain_left, subdomain_right
2 2 2 1 2 4 0;
2 2 1 2 2 4 0
% 2 1 1 2 1 4 3
]';
[p,e,t] = initmesh(g,'hmax',0.1);
pdemesh(p,e,t)
%% GH
% output from <https://de.mathworks.com/help/pde/ug/initmesh.html initmesh>
%
% coordinates p: [2][nnode]
% connectivity t: [4][nelem] with t(4,:) are the subdomain numbers
% edges e: [7][nedges] boundary edges
% e([1,2],:) - start/end vertex of edge
% e([3,4],:) - start/end values
% e(5,:) - segment number
% e([6,7],:) - left/right subdomain
ascii_write_mesh( p, t, e, mfilename);
ascii_write_subdomains( p, t, e, mfilename);
% tmp=t(1:3,:)

Binary file not shown.

2176
ex7/ex7_4/ex7_4/square_4.txt Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

711
ex7/ex7_4/ex7_4/uv.txt Normal file
View file

@ -0,0 +1,711 @@
189 2 332 3
0 0
1 0
1 1
0 1
0.1 0
0.2 0
0.3 0
0.4 0
0.5 0
0.6 0
0.7 0
0.8 0
0.9 0
1 0.1
1 0.2
1 0.3
1 0.4
1 0.5
1 0.6
1 0.7
1 0.8
1 0.9
0.9 1
0.8 1
0.7 1
0.6 1
0.5 1
0.4 1
0.3 1
0.2 1
0.1 1
0 0.9
0 0.8
0 0.7
0 0.6
0 0.5
0 0.4
0 0.3
0 0.2
0 0.1
0.479684 0.513096
0.0481001 0.0438139
0.965456 0.0484346
0.0451033 0.961897
0.951568 0.955138
0.754828 0.354966
0.360392 0.264508
0.267427 0.637367
0.656472 0.728531
0.0895934 0.160441
0.160541 0.910786
0.911386 0.844141
0.841625 0.0900995
0.451559 0.770058
0.557953 0.236427
0.229318 0.453048
0.687177 0.534082
0.814618 0.688238
0.258637 0.157666
0.852304 0.258371
0.158345 0.741149
0.745635 0.841799
0.734225 0.0924118
0.266818 0.909939
0.0900392 0.266867
0.845905 0.489216
0.340275 0.504784
0.482979 0.335446
0.532712 0.650471
0.609668 0.857218
0.390369 0.135169
0.137448 0.608978
0.711137 0.231036
0.311061 0.779084
0.220383 0.310202
0.542551 0.0753476
0.442762 0.912034
0.0870955 0.443563
0.718892 0.663674
0.911061 0.643693
0.910252 0.745257
0.175659 0.104224
0.825832 0.898216
0.104444 0.824712
0.899601 0.174158
0.605353 0.422553
0.396664 0.624666
0.371373 0.387899
0.824315 0.584766
0.825103 0.373429
0.524605 0.742235
0.459558 0.258909
0.261176 0.536511
0.606947 0.570757
0.662867 0.13484
0.141276 0.353324
0.353007 0.858336
0.824377 0.795396
0.180506 0.217545
0.21789 0.819436
0.7886 0.183202
0.920144 0.449301
0.445028 0.0729222
0.552079 0.922008
0.0761811 0.552606
0.645993 0.929394
0.0688885 0.645589
0.353835 0.0678044
0.33465 0.18669
0.267936 0.0754627
0.930563 0.255545
0.188302 0.664576
0.0758913 0.731951
0.73311 0.923739
0.67084 0.805934
0.927247 0.0922677
0.0756899 0.909417
0.90786 0.925626
0.0924729 0.0754185
0.568222 0.332532
0.700102 0.44222
0.916963 0.544333
0.738472 0.754101
0.388486 0.711163
0.287351 0.384911
0.629828 0.654561
0.900814 0.346453
0.41132 0.456786
0.465434 0.593118
0.514559 0.831524
0.165281 0.517019
0.484085 0.160379
0.765138 0.511037
0.772882 0.273336
0.241305 0.724548
0.274741 0.24004
0.469165 0.706886
0.334436 0.584173
0.409135 0.327415
0.506546 0.420168
0.291267 0.463861
0.560193 0.497192
0.405521 0.200759
0.20339 0.593316
0.593794 0.779935
0.708431 0.167068
0.286585 0.842293
0.645259 0.197405
0.157539 0.286478
0.0634282 0.35253
0.638015 0.0744131
0.352258 0.936361
0.429133 0.833293
0.1657 0.43037
0.7748 0.630926
0.320743 0.124262
0.125329 0.678845
0.681069 0.871955
0.856774 0.417367
0.141959 0.0447961
0.956726 0.141515
0.858417 0.95601
0.0449026 0.858609
0.51729 0.290331
0.791421 0.430793
0.642597 0.273685
0.663496 0.353015
0.67685 0.601987
0.308736 0.701675
0.297005 0.306339
0.632153 0.492996
0.404572 0.546354
0.436385 0.385368
0.741249 0.587512
0.208229 0.386035
0.386067 0.791133
0.821222 0.320652
0.338641 0.652462
0.450215 0.657713
0.340125 0.439662
0.344719 0.333947
0.588452 0.145922
0.58736 0.711465
0.529103 0.564926
0.709038 0.296367
1 0.95
0.05 1
1 0.05
0.95 1
13 2 43
85 15 111
82 6 110
7 8 108
43 2 188
42 5 119
119 5 160
6 7 110
92 47 143
9 10 76
76 10 151
111 16 127
186 3 189
17 18 102
102 18 122
12 13 53
53 13 116
43 14 116
15 16 111
40 1 42
134 46 185
21 22 52
83 24 114
25 26 106
52 22 118
45 23 118
24 25 114
126 49 183
27 28 77
77 28 152
30 31 51
51 31 117
44 32 117
84 33 113
33 34 113
34 35 107
1 5 42
50 40 119
118 23 162
4 32 44
93 48 144
36 37 78
78 37 150
39 40 50
116 14 161
117 32 163
44 31 187
45 22 186
19 20 80
128 41 172
166 73 185
16 17 127
129 41 184
8 9 103
138 67 172
35 36 105
40 42 119
170 47 181
31 44 117
169 48 178
22 45 118
26 27 104
13 43 116
115 49 123
135 48 169
29 30 64
136 47 170
11 12 63
79 49 126
38 39 65
92 55 164
94 57 168
80 20 81
168 57 174
109 47 136
39 50 65
90 46 177
12 53 63
112 48 135
30 51 64
20 21 81
79 58 123
63 53 101
139 68 173
64 51 100
91 54 137
65 50 99
93 56 141
21 52 81
121 46 165
124 54 176
125 56 175
148 55 182
128 88 173
121 57 171
93 67 138
28 29 152
115 62 158
10 11 151
109 59 156
37 38 150
112 61 157
85 60 101
120 55 166
84 61 100
97 74 176
82 59 99
96 75 175
120 68 164
11 63 151
129 69 179
29 64 152
128 67 180
38 65 150
80 58 89
91 69 183
81 52 98
89 58 155
98 62 123
58 80 81
96 65 149
108 71 156
83 62 98
106 70 158
97 64 147
107 72 157
95 63 146
134 60 177
159 66 165
120 86 140
94 69 184
124 87 179
92 68 139
125 88 180
58 79 155
18 19 122
101 60 134
89 66 122
115 70 145
130 91 145
99 59 136
109 71 143
100 61 135
112 72 144
155 79 174
140 86 142
101 73 146
146 73 148
99 75 149
131 78 154
100 74 147
130 77 153
58 81 98
52 83 98
132 92 143
50 82 99
131 93 144
51 84 100
148 73 166
53 85 101
19 80 122
102 66 159
103 76 132
9 76 103
104 77 130
27 77 104
105 78 131
36 78 105
104 70 106
26 104 106
105 72 107
35 105 107
103 71 108
8 103 108
125 75 170
110 108 156
59 82 110
7 108 110
60 85 111
46 90 165
124 74 169
113 107 157
61 84 113
34 107 113
62 83 114
25 106 114
49 79 123
114 106 158
85 53 116
14 15 161
84 51 117
32 33 163
83 52 118
23 24 162
82 50 119
5 6 160
132 76 182
121 86 167
142 86 171
133 121 165
80 89 122
66 102 122
58 98 123
62 115 123
77 97 153
138 87 178
78 96 154
139 88 181
145 91 183
69 94 126
17 102 127
60 111 127
137 124 179
141 125 180
142 94 184
129 87 172
54 91 130
70 104 130
56 93 131
72 105 131
55 92 132
71 103 132
66 89 133
57 121 133
127 90 177
73 101 134
74 100 135
61 112 135
75 99 136
59 109 136
69 91 137
54 124 137
67 128 172
48 93 138
140 128 173
47 92 139
68 120 140
41 128 140
67 93 141
56 125 141
57 94 171
41 140 142
47 109 143
71 132 143
48 112 144
72 131 144
49 115 145
70 130 145
151 95 182
63 101 146
74 97 147
64 100 147
86 120 167
95 146 148
75 96 149
65 99 149
96 78 150
65 96 150
55 132 182
63 95 151
97 77 152
64 97 152
153 97 176
54 130 153
154 96 175
56 131 154
133 89 174
57 133 174
71 109 156
59 110 156
72 112 157
61 113 157
62 114 158
70 115 158
127 102 159
90 127 159
6 82 160
82 119 160
15 85 161
85 116 161
24 83 162
83 118 162
33 84 163
84 117 163
68 92 164
55 120 164
66 133 165
90 159 165
73 134 185
55 148 166
46 121 167
120 166 167
126 94 168
79 126 168
87 124 178
74 135 169
88 125 181
75 136 170
86 121 171
94 142 171
41 129 172
87 138 172
88 139 173
68 140 173
89 155 174
79 168 174
75 125 175
56 154 175
74 124 176
54 153 176
60 127 177
46 134 177
48 138 178
124 169 178
87 129 179
69 137 179
88 128 180
67 141 180
47 139 181
125 170 181
95 148 182
76 151 182
69 126 183
49 145 183
69 129 184
41 142 184
167 166 185
46 167 185
4 44 187
14 43 188
23 45 189
45 186 189
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1

135
ex7/ex7_4/ex7_4/vdop.cpp Normal file
View file

@ -0,0 +1,135 @@
#include "vdop.h"
#include <cassert> // assert()
#include <cmath>
#include <iostream>
#include <vector>
using namespace std;
void vddiv(vector<double> & x, vector<double> const& y,
vector<double> const& z)
{
assert( x.size()==y.size() && y.size()==z.size() );
size_t n = x.size();
#pragma omp parallel for
for (size_t k = 0; k < n; ++k)
{
x[k] = y[k] / z[k];
}
return;
}
//******************************************************************************
void vdaxpy(std::vector<double> & x, std::vector<double> const& y,
double alpha, std::vector<double> const& z )
{
assert( x.size()==y.size() && y.size()==z.size() );
size_t n = x.size();
#pragma omp parallel for
for (size_t k = 0; k < n; ++k)
{
x[k] = y[k] + alpha * z[k];
}
return;
}
//******************************************************************************
double dscapr(std::vector<double> const& x, std::vector<double> const& y)
{
assert( x.size()==y.size());
size_t n = x.size();
double s = 0.0;
//#pragma omp parallel for reduction(+:s)
for (size_t k = 0; k < n; ++k)
{
s += x[k] * y[k];
}
return s;
}
//******************************************************************************
//void DebugVector(vector<double> const &v)
//{
//cout << "\nVector (nnode = " << v.size() << ")\n";
//for (size_t j = 0; j < v.size(); ++j)
//{
//cout.setf(ios::right, ios::adjustfield);
//cout << v[j] << " ";
//}
//cout << endl;
//return;
//}
//******************************************************************************
bool CompareVectors(std::vector<double> const& x, int const n, double const y[], double const eps)
{
bool bn = (static_cast<int>(x.size())==n);
if (!bn)
{
cout << "######### Error: " << "number of elements" << endl;
}
//bool bv = equal(x.cbegin(),x.cend(),y);
bool bv = equal(x.cbegin(),x.cend(),y,
[eps](double a, double b) -> bool
{ return std::abs(a-b)<eps*(1.0+0.5*(std::abs(a)+ std::abs(b))); }
);
if (!bv)
{
assert(static_cast<int>(x.size())==n);
cout << "######### Error: " << "values" << endl;
}
return bn && bv;
}
//******************************************************************************
double par_scalar(vector<double> const &x, vector<double> const &y, MPI_Comm const& icomm)
{
const double s = dscapr(x,y);
double sg;
MPI_Allreduce(&s,&sg,1,MPI_DOUBLE,MPI_SUM,icomm);
return(sg);
}
//******************************************************************************
void ExchangeAll(vector<double> const &xin, vector<double> &yout, MPI_Comm const &icomm)
{
int myrank, numprocs,ierr(-1);
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
MPI_Comm_size(icomm, &numprocs);
int const N=xin.size();
int const sendcount = N/numprocs; // equal sized junks
assert(sendcount*numprocs==N); // really all junk sized?
assert(xin.size()==yout.size());
auto sendbuf = xin.data();
auto recvbuf = yout.data();
ierr = MPI_Alltoall(sendbuf, sendcount, MPI_DOUBLE,
recvbuf, sendcount, MPI_DOUBLE, icomm);
assert(0==ierr);
return;
}
//******************************************************************************
void ExchangeAllInPlace(vector<double> &xin, MPI_Comm const &icomm)
{
int myrank, numprocs,ierr(-1);
MPI_Comm_rank(icomm, &myrank); // my MPI-rank
MPI_Comm_size(icomm, &numprocs);
int const N=xin.size();
int const sendcount = N/numprocs; // equal sized junks
assert(sendcount*numprocs==N); // really all junk sized?
auto sendbuf = xin.data();
ierr = MPI_Alltoall(MPI_IN_PLACE, sendcount, MPI_DOUBLE,
sendbuf, sendcount, MPI_DOUBLE, icomm);
assert(0==ierr);
return;
}

166
ex7/ex7_4/ex7_4/vdop.h Normal file
View file

@ -0,0 +1,166 @@
#ifndef VDOP_FILE
#define VDOP_FILE
#include <iostream>
#include <mpi.h> // MPI
#include <string>
#include <vector>
/** @brief Element-wise vector divison x_k = y_k/z_k.
*
* @param[out] x target vector
* @param[in] y source vector
* @param[in] z source vector
*
*/
void vddiv(std::vector<double> &x, std::vector<double> const &y,
std::vector<double> const &z);
/** @brief Element-wise daxpy operation x(k) = y(k) + alpha*z(k).
*
* @param[out] x target vector
* @param[in] y source vector
* @param[in] alpha scalar
* @param[in] z source vector
*
*/
void vdaxpy(std::vector<double> &x, std::vector<double> const &y,
double alpha, std::vector<double> const &z );
/** @brief Calculates the Euclidean inner product of two vectors.
*
* @param[in] x vector
* @param[in] y vector
* @return Euclidean inner product @f$\langle x,y \rangle@f$
*
*/
double dscapr(std::vector<double> const &x, std::vector<double> const &y);
inline
double L2_scapr(std::vector<double> const &x, std::vector<double> const &y)
{
return dscapr(x, y) / x.size();
}
/** Parallel inner product
@param[in] x vector
@param[in] y vector
@param[in] icomm MPI communicator
@return resulting Euclidian inner product <x,y>
*/
double par_scalar(std::vector<double> const &x, std::vector<double> const &y,
MPI_Comm const& icomm=MPI_COMM_WORLD);
/* ReadId : Input and broadcast of an integer */
inline
int ReadIn(std::string const &ss = std::string(), MPI_Comm const &icomm = MPI_COMM_WORLD)
{
MPI_Barrier(icomm);
int myrank; /* my rank number */
MPI_Comm_rank(icomm, &myrank);
int id;
if (myrank == 0) {
std::cout << "\n\n " << ss << " : Which process do you want to debug ? \n";
std::cin >> id;
}
MPI_Bcast(&id, 1, MPI_INT, 0, icomm);
return id;
}
/**
* Print entries of a vector to standard output.
*
* @param[in] v vector values
* @param[in] ss string containing the vector name
* @param[in] icomm communicator group for MPI
*
*/
//void DebugVector(std::vector<double> const &v);
template <class T>
void DebugVector(std::vector<T> const &v, std::string const &ss = std::string(), MPI_Comm const &icomm = MPI_COMM_WORLD)
{
MPI_Barrier(icomm);
int numprocs; /* # processes */
MPI_Comm_size(icomm, &numprocs);
int myrank; /* my rank number */
MPI_Comm_rank(icomm, &myrank);
int readid = ReadIn(ss); /* Read readid */
while ( (0 <= readid) && (readid < numprocs) ) {
if (myrank == readid) {
std::cout << "\n\n process " << readid;
std::cout << "\n .... " << ss << " (nnode = " << v.size() << ")\n";
for (size_t j = 0; j < v.size(); ++j) {
std::cout.setf(std::ios::right, std::ios::adjustfield);
std::cout << v[j] << " ";
}
std::cout << std::endl;
fflush(stdout);
}
readid = ReadIn(ss, icomm); /* Read readid */
}
MPI_Barrier(icomm);
return;
}
/** @brief Compares an STL vector with POD vector.
*
* The accuracy criteria @f$ |x_k-y_k| < \varepsilon \left({1+0.5(|x_k|+|y_k|)}\right) @f$
* follows the book by
* <a href="https://www.springer.com/la/book/9783319446592">Stoyan/Baran</a>, p.8.
*
* @param[in] x STL vector
* @param[in] n length of POD vector
* @param[in] y POD vector
* @param[in] eps relative accuracy criteria (default := 0.0).
* @return true iff pairwise vector elements are relatively close to each other.
*
*/
bool CompareVectors(std::vector<double> const &x, int n, double const y[], double const eps = 0.0);
/** Output operator for vector
* @param[in,out] s output stream, e.g. @p cout
* @param[in] v vector
*
* @return output stream
*/
template <class T>
std::ostream &operator<<(std::ostream &s, std::vector<T> const &v)
{
for (auto vp : v) {
s << vp << " ";
}
return s;
}
/** Exchanges equal size partions of vector @p xin with all MPI processes.
* The received data are return in vector @p yout .
*
* @param[in] xin input vector
* @param[out] yout output vector
* @param[in] icomm MPI communicator
*
*/
void ExchangeAll(std::vector<double> const &xin, std::vector<double> &yout, MPI_Comm const &icomm = MPI_COMM_WORLD);
/** Exchanges equal size partions of vector @p xin with all MPI processes.
* The received data are return in vector @p xin .
*
* @param[in,out] xin input/output vector
* @param[in] icomm MPI communicator
*
*/
void ExchangeAllInPlace(std::vector<double> &xin, MPI_Comm const &icomm = MPI_COMM_WORLD);
#endif

View file

@ -0,0 +1,20 @@
%% Visualize results
%
% flatpak run org.octave.Octave <filename>
% or
% octave --no-window-system --no-gui -qf <filename>
%
% or
% matlab -nosplash < <filename>
clear all
clc
%%
fname = 'uv.txt';
[xc,ia,v] = ascii_read_meshvector(fname);
h = trisurf(ia, xc(:,1), xc(:,2), v);
waitfor(h) % wait for closing the figure