sheet 4
This commit is contained in:
parent
c21fee7862
commit
65a23d88d6
67 changed files with 14385 additions and 0 deletions
6
sheet4/A/.vscode/settings.json
vendored
Normal file
6
sheet4/A/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"ostream": "cpp",
|
||||
"iostream": "cpp"
|
||||
}
|
||||
}
|
||||
0
sheet4/A/.vscode/settings.json:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/A/.vscode/settings.json:Zone.Identifier:Zone.Identifier
vendored
Normal file
BIN
sheet4/A/A.pdf
Normal file
BIN
sheet4/A/A.pdf
Normal file
Binary file not shown.
2563
sheet4/A/Doxyfile
Normal file
2563
sheet4/A/Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
32
sheet4/A/Makefile
Normal file
32
sheet4/A/Makefile
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
#
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
# use Intel compilers
|
||||
#COMPILER=ICC_
|
||||
|
||||
# use PGI compilers
|
||||
# COMPILER=PGI_
|
||||
|
||||
|
||||
SOURCES = main.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = main.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
LINKFLAGS += -g
|
||||
# do not use -pg with PGI compilers
|
||||
|
||||
ifndef COMPILER
|
||||
COMPILER=GCC_
|
||||
endif
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
$(PROGRAM): $(OBJECTS)
|
||||
$(CXX) $(CXXFLAGS) $(OBJECTS) -llapacke -lopenblas -o $(PROGRAM)
|
||||
53
sheet4/A/main.cpp
Normal file
53
sheet4/A/main.cpp
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <lapacke.h>
|
||||
using namespace std;
|
||||
|
||||
int main()
|
||||
{
|
||||
int n = 10;
|
||||
unsigned int nodes = n+1;
|
||||
int rhs = 1;
|
||||
double a = 1.0;
|
||||
double alpha = 2.0;
|
||||
double gB = 1.0;
|
||||
|
||||
double d = n + a/3.0/n;
|
||||
double s = - n + a / 6.0 / n;
|
||||
|
||||
cout << d << endl;
|
||||
cout << s << endl;
|
||||
|
||||
//Lapacke overwrites upper/lower diagonal so store twice
|
||||
vector<double> upperdiagonal(n,s);
|
||||
vector<double> lowerdiagonal(n,s);
|
||||
vector<double> diagonal(nodes,2*d);
|
||||
vector<double> F(nodes,1.0/n);
|
||||
|
||||
diagonal[0]=1.0;
|
||||
diagonal[n]=d+alpha;
|
||||
|
||||
upperdiagonal[0] = 0.0;
|
||||
//
|
||||
|
||||
F[0] = 0;
|
||||
F[n]= 1.0/n/2.0 +alpha*gB;
|
||||
|
||||
|
||||
//Tridiagonal
|
||||
LAPACKE_dgtsv(LAPACK_COL_MAJOR,nodes,rhs,lowerdiagonal.data(),diagonal.data(),upperdiagonal.data(),F.data(),nodes);
|
||||
|
||||
|
||||
cout << "Solution u_h at nodes x_1..x_" << n << ":\n";
|
||||
for (unsigned int i = 0; i < nodes; ++i) {
|
||||
|
||||
double xi = double(i) /double(n);
|
||||
cout << "u_h(" << xi << ") = " << F[i] << "\n";
|
||||
// F will be overwritten with sol
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
|
||||
}
|
||||
BIN
sheet4/A/plot.png
Normal file
BIN
sheet4/A/plot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 31 KiB |
1826
sheet4/A/small_Doxyfile
Normal file
1826
sheet4/A/small_Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
16
sheet4/B/.vscode/c_cpp_properties.json
vendored
Normal file
16
sheet4/B/.vscode/c_cpp_properties.json
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/bin/gcc",
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "gnu++17",
|
||||
"intelliSenseMode": "linux-gcc-x64"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
||||
6
sheet4/B/.vscode/settings.json
vendored
Normal file
6
sheet4/B/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"ostream": "cpp",
|
||||
"iostream": "cpp"
|
||||
}
|
||||
}
|
||||
0
sheet4/B/.vscode/settings.json:Zone - Kopie - Kopie.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie - Kopie.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie.Identifier:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie.Identifier:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/B/.vscode/settings.json:Zone.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
BIN
sheet4/B/B.pdf
Normal file
BIN
sheet4/B/B.pdf
Normal file
Binary file not shown.
2563
sheet4/B/Doxyfile
Normal file
2563
sheet4/B/Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
32
sheet4/B/Makefile
Normal file
32
sheet4/B/Makefile
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
#
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
# use Intel compilers
|
||||
#COMPILER=ICC_
|
||||
|
||||
# use PGI compilers
|
||||
# COMPILER=PGI_
|
||||
|
||||
|
||||
SOURCES = main.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = main.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
LINKFLAGS += -g
|
||||
# do not use -pg with PGI compilers
|
||||
|
||||
ifndef COMPILER
|
||||
COMPILER=GCC_
|
||||
endif
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
$(PROGRAM): $(OBJECTS)
|
||||
$(CXX) $(CXXFLAGS) $(OBJECTS) -llapacke -lopenblas -o $(PROGRAM)
|
||||
86
sheet4/B/main.cpp
Normal file
86
sheet4/B/main.cpp
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <lapacke.h>
|
||||
using namespace std;
|
||||
|
||||
int n = 10;
|
||||
double threshold = 1.0/sqrt(2);
|
||||
double lambda(double x)
|
||||
{
|
||||
if(x < threshold)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 10;
|
||||
}
|
||||
|
||||
double fillOffDiagonal(unsigned int i)
|
||||
{
|
||||
|
||||
double x_l = i/double(n);
|
||||
double x_u = (i+1)/double(n);
|
||||
|
||||
if(x_l < threshold && x_u > threshold)
|
||||
{
|
||||
return -n*n*(threshold-x_l+10*(x_u-threshold));
|
||||
}
|
||||
return -n*lambda(x_l);
|
||||
}
|
||||
|
||||
double fillDiagonal(int i)
|
||||
{
|
||||
double x_l = (i-1)/double(n);
|
||||
double x_u = (i+1)/double(n);
|
||||
if(x_l < threshold && x_u > threshold)
|
||||
{
|
||||
return n*n*(threshold-x_l+10*(x_u-threshold));
|
||||
}
|
||||
return 2*n*lambda(x_l);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
//Ignored the first Row of K cause we now that u(0)=0
|
||||
|
||||
int rhs = 1;
|
||||
unsigned int nodes = n+1;
|
||||
|
||||
|
||||
//Lapacke overwrites upper/lower diagonal so store twice
|
||||
vector<double> upperdiagonal(n,0.0);
|
||||
vector<double> lowerdiagonal(n,0.0);
|
||||
vector<double> diagonal(nodes,1.0);
|
||||
vector<double> F(nodes,0.0);
|
||||
|
||||
for(int i=0; i < n; i++)
|
||||
{
|
||||
upperdiagonal[i] = fillOffDiagonal(i);
|
||||
lowerdiagonal[i] = fillOffDiagonal(i);
|
||||
diagonal[i+1] = fillDiagonal(i+1);
|
||||
}
|
||||
|
||||
upperdiagonal[0]=0.0;
|
||||
lowerdiagonal[n-1]=0.0;
|
||||
diagonal[n] = 1.0;
|
||||
F[n] = 1.0;
|
||||
|
||||
|
||||
//Tridiagonal
|
||||
LAPACKE_dgtsv(LAPACK_COL_MAJOR,nodes,rhs,lowerdiagonal.data(),diagonal.data(),upperdiagonal.data(),F.data(),nodes);
|
||||
|
||||
|
||||
cout << "Solution u_h at nodes x_0..x_" << n << ":\n";
|
||||
|
||||
for (unsigned int i = 0; i < nodes; ++i) {
|
||||
|
||||
double xi = double(i) /double(n);
|
||||
cout << "u_h(" << xi << ") = " << F[i] << "\n";
|
||||
// F will be overwritten with sol
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
}
|
||||
BIN
sheet4/B/plot.png
Normal file
BIN
sheet4/B/plot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 32 KiB |
1826
sheet4/B/small_Doxyfile
Normal file
1826
sheet4/B/small_Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
16
sheet4/C/.vscode/c_cpp_properties.json
vendored
Normal file
16
sheet4/C/.vscode/c_cpp_properties.json
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/bin/gcc",
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "gnu++17",
|
||||
"intelliSenseMode": "linux-gcc-x64"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
||||
0
sheet4/C/.vscode/c_cpp_properties.json:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/c_cpp_properties.json:Zone.Identifier
vendored
Normal file
6
sheet4/C/.vscode/settings.json
vendored
Normal file
6
sheet4/C/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"ostream": "cpp",
|
||||
"iostream": "cpp"
|
||||
}
|
||||
}
|
||||
0
sheet4/C/.vscode/settings.json:Zone - Kopie - Kopie (2).Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie - Kopie (2).Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie - Kopie (2).Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie - Kopie (2).Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie - Kopie.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie - Kopie.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier:Zone - Kopie - Kopie.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier:Zone - Kopie - Kopie.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier:Zone - Kopie.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone - Kopie.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
0
sheet4/C/.vscode/settings.json:Zone.Identifier:Zone.Identifier:Zone.Identifier
vendored
Normal file
BIN
sheet4/C/C.pdf
Normal file
BIN
sheet4/C/C.pdf
Normal file
Binary file not shown.
2563
sheet4/C/Doxyfile
Normal file
2563
sheet4/C/Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
32
sheet4/C/Makefile
Normal file
32
sheet4/C/Makefile
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
#
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
# use Intel compilers
|
||||
#COMPILER=ICC_
|
||||
|
||||
# use PGI compilers
|
||||
# COMPILER=PGI_
|
||||
|
||||
|
||||
SOURCES = main.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = main.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
LINKFLAGS += -g
|
||||
# do not use -pg with PGI compilers
|
||||
|
||||
ifndef COMPILER
|
||||
COMPILER=GCC_
|
||||
endif
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
$(PROGRAM): $(OBJECTS)
|
||||
$(CXX) $(CXXFLAGS) $(OBJECTS) -llapacke -lopenblas -o $(PROGRAM)
|
||||
51
sheet4/C/main.cpp
Normal file
51
sheet4/C/main.cpp
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <lapacke.h>
|
||||
using namespace std;
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned int ns[5] = {10,20,30,40,70};
|
||||
int p = 70;
|
||||
int rhs = 1;
|
||||
for(unsigned int i = 0; i< sizeof(ns)/sizeof(ns[0]); i++)
|
||||
{
|
||||
int n=ns[i];
|
||||
unsigned int nodes = n+1;
|
||||
|
||||
vector<double> upperdiagonal(n,-n+p/2.0);
|
||||
vector<double> lowerdiagonal(n,-n-p/2.0);
|
||||
vector<double> diagonal(nodes,2*n);
|
||||
vector<double> F(nodes,0.0);
|
||||
upperdiagonal[0]=0.0;
|
||||
lowerdiagonal[n-1]=0.0;
|
||||
diagonal[0] = 1.0;
|
||||
diagonal[n] = 1.0;
|
||||
F[n] = 1.0;
|
||||
|
||||
//Tridiagonal
|
||||
LAPACKE_dgtsv(LAPACK_COL_MAJOR,nodes,rhs,lowerdiagonal.data(),diagonal.data(),upperdiagonal.data(),F.data(),nodes);
|
||||
|
||||
|
||||
cout << "Solution u_h at nodes x_0..x_" << n << ":\n";
|
||||
|
||||
for (unsigned int j = 0; j < nodes; ++j) {
|
||||
|
||||
double xi = double(j) /double(n);
|
||||
cout << "u_h(" << xi << ") = " << F[j] << "\n";
|
||||
// F will be overwritten with sol
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
From Methode der finiten Elemente fuer Ingenieure:
|
||||
small n approximates pu' not good espicially if n < p/2=35 therefor this oscillation (see plot) occurs.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
|
||||
}
|
||||
180
sheet4/C/output.txt
Normal file
180
sheet4/C/output.txt
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
Solution u_h at nodes x_0..x_10:
|
||||
u_h(0) = -3.51571e-17
|
||||
u_h(0.1) = -0.00786414
|
||||
u_h(0.2) = 0.00629131
|
||||
u_h(0.3) = -0.0191885
|
||||
u_h(0.4) = 0.0266752
|
||||
u_h(0.5) = -0.0558794
|
||||
u_h(0.6) = 0.0927188
|
||||
u_h(0.7) = -0.174758
|
||||
u_h(0.8) = 0.3067
|
||||
u_h(0.9) = -0.559925
|
||||
u_h(1) = 1
|
||||
Solution u_h at nodes x_0..x_20:
|
||||
u_h(0) = -3.2746e-17
|
||||
u_h(0.05) = -2.41868e-11
|
||||
u_h(0.1) = 6.44981e-11
|
||||
u_h(0.15) = -2.6068e-10
|
||||
u_h(0.2) = 9.31639e-10
|
||||
u_h(0.25) = -3.4402e-09
|
||||
u_h(0.3) = 1.25899e-08
|
||||
u_h(0.35) = -4.6187e-08
|
||||
u_h(0.4) = 1.69328e-07
|
||||
u_h(0.45) = -6.20895e-07
|
||||
u_h(0.5) = 2.27659e-06
|
||||
u_h(0.55) = -8.34752e-06
|
||||
u_h(0.6) = 3.06075e-05
|
||||
u_h(0.65) = -0.000112228
|
||||
u_h(0.7) = 0.000411501
|
||||
u_h(0.75) = -0.00150884
|
||||
u_h(0.8) = 0.00553241
|
||||
u_h(0.85) = -0.0202855
|
||||
u_h(0.9) = 0.0743802
|
||||
u_h(0.95) = -0.272727
|
||||
u_h(1) = 1
|
||||
Solution u_h at nodes x_0..x_30:
|
||||
u_h(0) = -6.65912e-18
|
||||
u_h(0.0333333) = -6.65912e-18
|
||||
u_h(0.0666667) = -6.65912e-18
|
||||
u_h(0.1) = -6.65912e-18
|
||||
u_h(0.133333) = -6.65912e-18
|
||||
u_h(0.166667) = -6.65912e-18
|
||||
u_h(0.2) = -6.65912e-18
|
||||
u_h(0.233333) = -6.65912e-18
|
||||
u_h(0.266667) = -6.65912e-18
|
||||
u_h(0.3) = -6.65912e-18
|
||||
u_h(0.333333) = -6.65907e-18
|
||||
u_h(0.366667) = -6.6598e-18
|
||||
u_h(0.4) = -6.65023e-18
|
||||
u_h(0.433333) = -6.77472e-18
|
||||
u_h(0.466667) = -5.1563e-18
|
||||
u_h(0.5) = -2.61958e-17
|
||||
u_h(0.533333) = 2.47317e-16
|
||||
u_h(0.566667) = -3.30835e-15
|
||||
u_h(0.6) = 4.29153e-14
|
||||
u_h(0.633333) = -5.57992e-13
|
||||
u_h(0.666667) = 7.25381e-12
|
||||
u_h(0.7) = -9.42996e-11
|
||||
u_h(0.733333) = 1.22589e-09
|
||||
u_h(0.766667) = -1.59366e-08
|
||||
u_h(0.8) = 2.07176e-07
|
||||
u_h(0.833333) = -2.69329e-06
|
||||
u_h(0.866667) = 3.50128e-05
|
||||
u_h(0.9) = -0.000455166
|
||||
u_h(0.933333) = 0.00591716
|
||||
u_h(0.966667) = -0.0769231
|
||||
u_h(1) = 1
|
||||
Solution u_h at nodes x_0..x_40:
|
||||
u_h(0) = -4.79093e-18
|
||||
u_h(0.025) = -4.79093e-18
|
||||
u_h(0.05) = -4.79093e-18
|
||||
u_h(0.075) = -4.79093e-18
|
||||
u_h(0.1) = -4.79093e-18
|
||||
u_h(0.125) = -4.79093e-18
|
||||
u_h(0.15) = -4.79093e-18
|
||||
u_h(0.175) = -4.79093e-18
|
||||
u_h(0.2) = -4.79093e-18
|
||||
u_h(0.225) = -4.79093e-18
|
||||
u_h(0.25) = -4.79093e-18
|
||||
u_h(0.275) = -4.79093e-18
|
||||
u_h(0.3) = -4.79093e-18
|
||||
u_h(0.325) = -4.79093e-18
|
||||
u_h(0.35) = -4.79093e-18
|
||||
u_h(0.375) = -4.79093e-18
|
||||
u_h(0.4) = -4.79093e-18
|
||||
u_h(0.425) = -4.79093e-18
|
||||
u_h(0.45) = -4.79093e-18
|
||||
u_h(0.475) = -4.79093e-18
|
||||
u_h(0.5) = -4.79093e-18
|
||||
u_h(0.525) = -4.79089e-18
|
||||
u_h(0.55) = -4.79026e-18
|
||||
u_h(0.575) = -4.78078e-18
|
||||
u_h(0.6) = -4.63869e-18
|
||||
u_h(0.625) = -2.50728e-18
|
||||
u_h(0.65) = 2.94639e-17
|
||||
u_h(0.675) = 5.09032e-16
|
||||
u_h(0.7) = 7.70256e-15
|
||||
u_h(0.725) = 1.15605e-13
|
||||
u_h(0.75) = 1.73415e-12
|
||||
u_h(0.775) = 2.60123e-11
|
||||
u_h(0.8) = 3.90184e-10
|
||||
u_h(0.825) = 5.85277e-09
|
||||
u_h(0.85) = 8.77915e-08
|
||||
u_h(0.875) = 1.31687e-06
|
||||
u_h(0.9) = 1.97531e-05
|
||||
u_h(0.925) = 0.000296296
|
||||
u_h(0.95) = 0.00444444
|
||||
u_h(0.975) = 0.0666667
|
||||
u_h(1) = 1
|
||||
Solution u_h at nodes x_0..x_70:
|
||||
u_h(0) = -1.30667e-17
|
||||
u_h(0.0142857) = -1.30667e-17
|
||||
u_h(0.0285714) = -1.30667e-17
|
||||
u_h(0.0428571) = -1.30667e-17
|
||||
u_h(0.0571429) = -1.30667e-17
|
||||
u_h(0.0714286) = -1.30667e-17
|
||||
u_h(0.0857143) = -1.30667e-17
|
||||
u_h(0.1) = -1.30667e-17
|
||||
u_h(0.114286) = -1.30667e-17
|
||||
u_h(0.128571) = -1.30667e-17
|
||||
u_h(0.142857) = -1.30667e-17
|
||||
u_h(0.157143) = -1.30667e-17
|
||||
u_h(0.171429) = -1.30667e-17
|
||||
u_h(0.185714) = -1.30667e-17
|
||||
u_h(0.2) = -1.30667e-17
|
||||
u_h(0.214286) = -1.30667e-17
|
||||
u_h(0.228571) = -1.30667e-17
|
||||
u_h(0.242857) = -1.30667e-17
|
||||
u_h(0.257143) = -1.30667e-17
|
||||
u_h(0.271429) = -1.30667e-17
|
||||
u_h(0.285714) = -1.30667e-17
|
||||
u_h(0.3) = -1.30667e-17
|
||||
u_h(0.314286) = -1.30667e-17
|
||||
u_h(0.328571) = -1.30667e-17
|
||||
u_h(0.342857) = -1.30666e-17
|
||||
u_h(0.357143) = -1.30664e-17
|
||||
u_h(0.371429) = -1.30657e-17
|
||||
u_h(0.385714) = -1.30637e-17
|
||||
u_h(0.4) = -1.30576e-17
|
||||
u_h(0.414286) = -1.30393e-17
|
||||
u_h(0.428571) = -1.29845e-17
|
||||
u_h(0.442857) = -1.282e-17
|
||||
u_h(0.457143) = -1.23265e-17
|
||||
u_h(0.471429) = -1.08459e-17
|
||||
u_h(0.485714) = -6.40428e-18
|
||||
u_h(0.5) = 6.92065e-18
|
||||
u_h(0.514286) = 4.68954e-17
|
||||
u_h(0.528571) = 1.6682e-16
|
||||
u_h(0.542857) = 5.26593e-16
|
||||
u_h(0.557143) = 1.60591e-15
|
||||
u_h(0.571429) = 4.84387e-15
|
||||
u_h(0.585714) = 1.45577e-14
|
||||
u_h(0.6) = 4.36994e-14
|
||||
u_h(0.614286) = 1.31124e-13
|
||||
u_h(0.628571) = 3.93399e-13
|
||||
u_h(0.642857) = 1.18022e-12
|
||||
u_h(0.657143) = 3.54069e-12
|
||||
u_h(0.671429) = 1.06221e-11
|
||||
u_h(0.685714) = 3.18663e-11
|
||||
u_h(0.7) = 9.55991e-11
|
||||
u_h(0.714286) = 2.86797e-10
|
||||
u_h(0.728571) = 8.60392e-10
|
||||
u_h(0.742857) = 2.58117e-09
|
||||
u_h(0.757143) = 7.74352e-09
|
||||
u_h(0.771429) = 2.32306e-08
|
||||
u_h(0.785714) = 6.96917e-08
|
||||
u_h(0.8) = 2.09075e-07
|
||||
u_h(0.814286) = 6.27225e-07
|
||||
u_h(0.828571) = 1.88168e-06
|
||||
u_h(0.842857) = 5.64503e-06
|
||||
u_h(0.857143) = 1.69351e-05
|
||||
u_h(0.871429) = 5.08053e-05
|
||||
u_h(0.885714) = 0.000152416
|
||||
u_h(0.9) = 0.000457247
|
||||
u_h(0.914286) = 0.00137174
|
||||
u_h(0.928571) = 0.00411523
|
||||
u_h(0.942857) = 0.0123457
|
||||
u_h(0.957143) = 0.037037
|
||||
u_h(0.971429) = 0.111111
|
||||
u_h(0.985714) = 0.333333
|
||||
u_h(1) = 1
|
||||
BIN
sheet4/C/plot.png
Normal file
BIN
sheet4/C/plot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 49 KiB |
1826
sheet4/C/small_Doxyfile
Normal file
1826
sheet4/C/small_Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
124
sheet4/CLANG_default.mk
Normal file
124
sheet4/CLANG_default.mk
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=CLANG_
|
||||
|
||||
#CLANGPATH=//usr/lib/llvm-10/bin/
|
||||
CC = ${CLANGPATH}clang
|
||||
CXX = ${CLANGPATH}clang++
|
||||
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
|
||||
#F77 = gfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
|
||||
WARNINGS += -Weverything
|
||||
WARNINGS += -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded
|
||||
WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic -ferror-limit=1
|
||||
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
|
||||
|
||||
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
|
||||
# don't use -Ofast
|
||||
# -ftrapv
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# different libraries in Ubuntu or manajaró
|
||||
ifndef UBUNTU
|
||||
UBUNTU=1
|
||||
endif
|
||||
|
||||
# BLAS, LAPACK
|
||||
LINKFLAGS += -llapack -lblas
|
||||
# -lopenblas
|
||||
ifeq ($(UBUNTU),1)
|
||||
# ubuntu
|
||||
else
|
||||
# on archlinux
|
||||
LINKFLAGS += -lcblas
|
||||
endif
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -flto
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# very good check
|
||||
# http://clang.llvm.org/extra/clang-tidy/
|
||||
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
|
||||
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
|
||||
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
|
||||
#READABILITY=,readability*${SWITCH_OFF}
|
||||
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
|
||||
#TIDYFLAGS += -checks='modernize*
|
||||
# ???
|
||||
#TIDYFLAGS = -checks='cert*' -header-filter=.*
|
||||
# MPI checks ??
|
||||
#TIDYFLAGS = -checks='mpi*'
|
||||
# ??
|
||||
#TIDYFLAGS = -checks='performance*' -header-filter=.*
|
||||
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
|
||||
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
codecheck: tidy_check
|
||||
tidy_check:
|
||||
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
|
||||
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
# time ./${PROGRAM} ${PARAMS}
|
||||
./${PROGRAM} ${PARAMS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^ ${PARAMS}
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
codecheck: tidy_check
|
||||
183
sheet4/GCC_default.mk
Normal file
183
sheet4/GCC_default.mk
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
# Basic Defintions for using GNU-compiler suite sequentially
|
||||
# requires setting of COMPILER=GCC_
|
||||
|
||||
CC = gcc
|
||||
CXX = g++
|
||||
F77 = gfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
#LINKFLAGS += -lblas
|
||||
# The <cblas.h> header requires extern "C".
|
||||
|
||||
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
|
||||
-Wredundant-decls -Winline -fmax-errors=1
|
||||
# -Wunreachable-code
|
||||
#CXXFLAGS += -ffast-math -march=native ${WARNINGS}
|
||||
CXXFLAGS += -O0 -funroll-all-loops -std=c++17 ${WARNINGS}
|
||||
#-msse3
|
||||
# -ftree-vectorizer-verbose=2 -DNDEBUG
|
||||
# -ftree-vectorizer-verbose=5
|
||||
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
|
||||
|
||||
# CFLAGS = -ffast-math -O0 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
|
||||
# CFLAGS = -ffast-math -O0 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
|
||||
# #CFLAGS = -ffast-math -O0 -DNDEBUG -msse3 -fopenmp
|
||||
# FFLAGS = -ffast-math -O0 -DNDEBUG -msse3 -fopenmp
|
||||
# LFLAGS = -ffast-math -O0 -DNDEBUG -msse3 -fopenmp
|
||||
LINKFLAGS += -O0
|
||||
|
||||
#architecture
|
||||
#CPU = -march=znver2
|
||||
CXXFLAGS += ${CPU}
|
||||
LINKFLAGS += ${CPU}
|
||||
|
||||
# different libraries in Ubuntu or manajaró
|
||||
ifndef UBUNTU
|
||||
UBUNTU=1
|
||||
endif
|
||||
|
||||
# BLAS, LAPACK
|
||||
ifeq ($(UBUNTU),1)
|
||||
LINKFLAGS += -llapack -lblas
|
||||
# -lopenblas
|
||||
else
|
||||
# on archlinux
|
||||
LINKFLAGS += -llapack -lopenblas -lcblas
|
||||
endif
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -flto
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# for debugging purpose (save code)
|
||||
# -fsanitize=leak # only one out the three can be used
|
||||
# -fsanitize=address
|
||||
# -fsanitize=thread
|
||||
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
|
||||
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
|
||||
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
|
||||
#CXXFLAGS += ${SANITARY}
|
||||
#LINKFLAGS += ${SANITARY}
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
|
||||
-@rm -rf html
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
#run: ${PROGRAM}
|
||||
# time ./${PROGRAM} ${PARAMS}
|
||||
./${PROGRAM} ${PARAMS}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
zip: clean
|
||||
@echo "Zip the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
.SUFFIXES: .f90
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
|
||||
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
.f90.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
|
||||
cache: ${PROGRAM}
|
||||
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
|
||||
# kcachegrind callgrind.out.<pid> &
|
||||
kcachegrind `ls -1tr callgrind.out.* |tail -1`
|
||||
|
||||
# Check for wrong memory accesses, memory leaks, ...
|
||||
# use smaller data sets
|
||||
# no "-pg" in compile/link options
|
||||
mem: ${PROGRAM}
|
||||
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
|
||||
# Graphical interface
|
||||
# valkyrie
|
||||
|
||||
# Simple run time profiling of your code
|
||||
# CXXFLAGS += -g -pg
|
||||
# LINKFLAGS += -pg
|
||||
prof: ${PROGRAM}
|
||||
perf record ./$^ ${PARAMS}
|
||||
perf report
|
||||
# gprof -b ./$^ > gp.out
|
||||
# kprof -f gp.out -p gprof &
|
||||
|
||||
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
|
||||
# * install
|
||||
# * sudo vi /etc/sysctl.conf
|
||||
# add kernel.perf_event_paranoid = 0
|
||||
|
||||
#Trace your heap:
|
||||
#> heaptrack ./main.GCC_
|
||||
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
|
||||
heap: ${PROGRAM}
|
||||
heaptrack ./$^ ${PARAMS} 11
|
||||
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
|
||||
|
||||
codecheck: $(SOURCES)
|
||||
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
|
||||
|
||||
|
||||
########################################################################
|
||||
# get the detailed status of all optimization flags
|
||||
info:
|
||||
echo "detailed status of all optimization flags"
|
||||
$(CXX) --version
|
||||
$(CXX) -Q $(CXXFLAGS) --help=optimizers
|
||||
lscpu
|
||||
inxi -C
|
||||
lstopo
|
||||
|
||||
# Excellent hardware info
|
||||
# hardinfo
|
||||
# Life monitoring of CPU frequency etc.
|
||||
# sudo i7z
|
||||
|
||||
# Memory consumption
|
||||
# vmstat -at -SM 3
|
||||
# xfce4-taskmanager
|
||||
|
||||
|
||||
# https://www.tecmint.com/check-linux-cpu-information/
|
||||
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
|
||||
|
||||
# Debugging:
|
||||
# https://wiki.archlinux.org/index.php/Debugging
|
||||
125
sheet4/ICC_default.mk
Normal file
125
sheet4/ICC_default.mk
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
# Basic Defintions for using INTEL compiler suite sequentially
|
||||
# requires setting of COMPILER=ICC_
|
||||
|
||||
#BINDIR = /opt/intel/bin/
|
||||
|
||||
CC = ${BINDIR}icc
|
||||
CXX = ${BINDIR}icpc
|
||||
F77 = ${BINDIR}ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
|
||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
|
||||
# -Winline -Wredundant-decls -Wunreachable-code
|
||||
CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
# -vec-report=3
|
||||
# -qopt-report=5 -qopt-report-phase=vec
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd -msse3
|
||||
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# LAPACK, BLAS: use MKL by INTEL
|
||||
# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
CXXFLAGS += -mkl
|
||||
LINKFLAGS += -mkl
|
||||
|
||||
# interprocedural optimization
|
||||
#CXXFLAGS += -ipo
|
||||
#LINKFLAGS += -ipo
|
||||
|
||||
# annotated assembler file
|
||||
ANNOTED = -fsource-asm -S
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# cache: ${PROGRAM}
|
||||
# valgrind --tool=callgrind --simulate-cache=yes ./$^
|
||||
# # kcachegrind callgrind.out.<pid> &
|
||||
#
|
||||
# # Check for wrong memory accesses, memory leaks, ...
|
||||
# # use smaller data sets
|
||||
# mem: ${PROGRAM}
|
||||
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
|
||||
#
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
# prof: ${PROGRAM}
|
||||
# ./$^
|
||||
# gprof -b ./$^ > gp.out
|
||||
# # kprof -f gp.out -p gprof &
|
||||
#
|
||||
|
||||
|
||||
mem: inspector
|
||||
prof: amplifier
|
||||
cache: amplifier
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# alternatively to the solution abouve:
|
||||
#edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 .
|
||||
vtune-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
inspxe-gui &
|
||||
|
||||
advisor:
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
vtune-gui &
|
||||
|
||||
icc-info:
|
||||
icpc -# main.cpp
|
||||
|
||||
|
||||
|
||||
|
||||
176
sheet4/ONEAPI_default.mk
Normal file
176
sheet4/ONEAPI_default.mk
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
# Basic Defintions for using INTEL compiler suite sequentially
|
||||
# requires setting of COMPILER=ONEAPI_
|
||||
|
||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
||||
# requires
|
||||
# source /opt/intel/oneapi/setvars.sh
|
||||
# on AMD: export MKL_DEBUG_CPU_TYPE=5
|
||||
|
||||
#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
|
||||
#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
|
||||
#export KMP_AFFINITY=verbose,compact
|
||||
|
||||
CC = ${BINDIR}icc
|
||||
CXX = ${BINDIR}dpcpp
|
||||
F77 = ${BINDIR}ifort
|
||||
LINKER = ${CXX}
|
||||
|
||||
## Compiler flags
|
||||
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
|
||||
WARNINGS += -Wpessimizing-move -Wredundant-move
|
||||
#-wd2015,2012,2014 -wn3
|
||||
# -Winline -Wredundant-decls -Wunreachable-code
|
||||
# -qopt-subscript-in-range
|
||||
# -vec-threshold0
|
||||
|
||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include"
|
||||
#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include"
|
||||
LINKFLAGS += -O3
|
||||
|
||||
# interprocedural optimization
|
||||
CXXFLAGS += -ipo
|
||||
LINKFLAGS += -ipo
|
||||
LINKFLAGS += -flto
|
||||
|
||||
# annotated Assembler file
|
||||
ANNOTED = -fsource-asm -S
|
||||
|
||||
#architecture
|
||||
CPU = -march=core-avx2
|
||||
#CPU += -mtp=zen
|
||||
# -xCORE-AVX2
|
||||
# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
|
||||
CXXFLAGS += ${CPU}
|
||||
LINKFLAGS += ${CPU}
|
||||
|
||||
# use MKL by INTEL
|
||||
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
|
||||
# sequential MKL
|
||||
# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
|
||||
CXXFLAGS += -qmkl=sequential -UMKL_ILP64
|
||||
LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
|
||||
|
||||
# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static
|
||||
# install intel-oneapi-compiler-static
|
||||
# or
|
||||
LINKFLAGS += -shared-intel
|
||||
|
||||
|
||||
OPENMP = -qopenmp
|
||||
CXXFLAGS += ${OPENMP}
|
||||
LINKFLAGS += ${OPENMP}
|
||||
|
||||
|
||||
# profiling tools
|
||||
#CXXFLAGS += -pg
|
||||
#LINKFLAGS += -pg
|
||||
# -vec-report=3
|
||||
# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
|
||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par
|
||||
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg
|
||||
# Redirect report from *.optrpt to stderr
|
||||
# -qopt-report-file=stderr
|
||||
# Guided paralellization
|
||||
# -guide -parallel
|
||||
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
|
||||
# -auto-p32 -simd
|
||||
|
||||
## run time checks
|
||||
# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
|
||||
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
rm -f ${PROGRAM} ${OBJECTS} *.optrpt
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# some tools
|
||||
# Cache behaviour (CXXFLAGS += -g tracks down to source lines)
|
||||
# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
|
||||
|
||||
mem: inspector
|
||||
prof: vtune
|
||||
cache: inspector
|
||||
|
||||
gap_par_report:
|
||||
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
|
||||
|
||||
# GUI for performance report
|
||||
amplifier: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
||||
amplxe-gui &
|
||||
|
||||
# GUI for Memory and Thread analyzer (race condition)
|
||||
inspector: ${PROGRAM}
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# inspxe-gui &
|
||||
vtune-gui ./${PROGRAM} &
|
||||
|
||||
advisor:
|
||||
source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
|
||||
# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
|
||||
advisor --collect=survey ./${PROGRAM}
|
||||
# advisor --collect=roofline ./${PROGRAM}
|
||||
advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
|
||||
|
||||
vtune:
|
||||
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
|
||||
# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
|
||||
export ADVIXE_EXPERIMENTAL=roofline
|
||||
vtune -collect hotspots ./${PROGRAM}
|
||||
vtune -report hotspots -r r000hs > vtune.out
|
||||
# vtune-gui ./${PROGRAM} &
|
||||
|
||||
icc-info:
|
||||
icpc -# main.cpp
|
||||
|
||||
# MKL on AMD
|
||||
# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
|
||||
#
|
||||
# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
|
||||
# export MKL_DEBUG_CPU_TYPE=5
|
||||
# export MKL_NUM_THRAEDS=1
|
||||
# export MKL_DYNAMIC=false
|
||||
# on Intel compiler
|
||||
# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html
|
||||
94
sheet4/PGI_default.mk
Normal file
94
sheet4/PGI_default.mk
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
# Basic Defintions for using PGI-compiler suite sequentially
|
||||
# requires setting of COMPILER=PGI_
|
||||
# OPTIRUN = optirun
|
||||
|
||||
|
||||
CC = pgcc
|
||||
CXX = pgc++
|
||||
F77 = pgfortran
|
||||
LINKER = ${CXX}
|
||||
|
||||
#LINKFLAGS += -llapack -lblas
|
||||
# on mephisto:
|
||||
#CXXFLAGS += -I/share/apps/atlas/include
|
||||
#LINKFLAGS += -L/share/apps/atlas/lib
|
||||
#LINKFLAGS += -lcblas -latlas
|
||||
|
||||
#LINKFLAGS += -lblas
|
||||
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
|
||||
|
||||
WARNINGS = -Minform=warn
|
||||
# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow -Wredundant-decls
|
||||
# -pedantic -Wunreachable-code -Wextra -Winline
|
||||
# -Wunreachable-code
|
||||
|
||||
#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
|
||||
PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
|
||||
# -Minfo
|
||||
# -Mprof=time
|
||||
# -Mprof=lines
|
||||
# take care with option -Msafeptr
|
||||
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
|
||||
#CXXFLAGS += -O3 -std=c++11 -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
|
||||
# -fastsse -fargument-noalias ${WARNINGS} -msse3 -vec-report=3
|
||||
|
||||
default: ${PROGRAM}
|
||||
|
||||
${PROGRAM}: ${OBJECTS}
|
||||
$(LINKER) $^ ${LINKFLAGS} -o $@
|
||||
|
||||
clean:
|
||||
@rm -f ${PROGRAM} ${OBJECTS}
|
||||
|
||||
clean_all:: clean
|
||||
@rm -f *_ *~ *.bak *.log *.out *.tar
|
||||
|
||||
run: clean ${PROGRAM}
|
||||
./${PROGRAM}
|
||||
|
||||
# tar the current directory
|
||||
MY_DIR = `basename ${PWD}`
|
||||
tar: clean_all
|
||||
@echo "Tar the directory: " ${MY_DIR}
|
||||
@cd .. ;\
|
||||
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
|
||||
cd ${MY_DIR}
|
||||
# tar cf `basename ${PWD}`.tar *
|
||||
|
||||
doc:
|
||||
doxygen Doxyfile
|
||||
|
||||
#########################################################################
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(F77) -c $(FFLAGS) -o $@ $<
|
||||
|
||||
##################################################################################################
|
||||
# # some tools
|
||||
# # Simple run time profiling of your code
|
||||
# # CXXFLAGS += -g -pg
|
||||
# # LINKFLAGS += -pg
|
||||
|
||||
|
||||
# Profiling options PGI, see: pgcollect -help
|
||||
# CPU_PROF = -allcache
|
||||
CPU_PROF = -time
|
||||
# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
|
||||
#GPU_PROF = -cuda=branch:cc20
|
||||
#
|
||||
PROF_FILE = pgprof.out
|
||||
|
||||
cache: prof
|
||||
|
||||
prof: ${PROGRAM}
|
||||
${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
|
||||
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
|
||||
|
||||
info:
|
||||
pgaccelinfo -v
|
||||
Loading…
Add table
Add a link
Reference in a new issue