This commit is contained in:
dino.celebic 2025-11-13 13:17:09 +01:00
commit e662f3c84b
9 changed files with 38 additions and 37 deletions

2
.gitignore vendored
View file

@ -1,3 +1,5 @@
*.GCC_
*.exe
main main
*.o *.o
.vscode/ .vscode/

View file

@ -8,10 +8,9 @@
#include "task_7.h" #include "task_7.h"
#include "timing.h" #include "timing.h"
#include <cblas.h> // cBLAS Library #include <cblas.h> // cBLAS Library
#include <lapacke.h>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <lapacke.h>
void task_1() { void task_1() {
printf("\n\n-------------- Task 1 --------------\n\n"); printf("\n\n-------------- Task 1 --------------\n\n");
@ -235,31 +234,33 @@ void task_7() {
// Benchmark // Benchmark
cout << fixed << setprecision(4); cout << fixed << setprecision(4);
size_t NLOOPS = 1000; size_t NLOOPS = 200;
size_t Nrhs = 2000;
cout << "Solution time per right hand side in milliseconds: sec*1000/Nrhs" << endl;
cout << "N = " << " | 1 | 2 | 4 | 8 | 16 | 32 " << endl; cout << "N = " << " | 1 | 2 | 4 | 8 | 16 | 32 " << endl;
cout << "---------|--------|--------|--------|--------|--------|-------" << endl; cout << "------------|--------|--------|--------|--------|--------|-------" << endl;
for (int exp = 1; exp < 10; ++exp) { for (int k = 1; k < 10; ++k) {
cout << "Nrhs = " << static_cast<size_t>(pow(2,exp)); cout << "Nrhs = " << Nrhs*k;
for (size_t N : {1, 2, 4, 8, 16, 32}) { for (size_t N : {1, 2, 4, 8, 16, 32}) {
tic(); tic();
for (size_t i = 0; i < NLOOPS; ++i) { for (size_t i = 0; i < NLOOPS; ++i) {
benchmark_lapacke(N, static_cast<size_t>(pow(2,exp))); benchmark_lapacke(N, Nrhs*k);
} }
double sec = toc(); double sec = toc()*1000 / (static_cast<double>(Nrhs)*k);
cout << " | " << sec; cout << " | " << sec;
} }
cout << endl; cout << endl;
} }
printf("\nFor fixed n, the solution time per rhs does not slow down consistently and scales very well.\nIts faster than expected."); printf("\nFor fixed n, the solution time per rhs stays roughly constant.");
} }
int main() { int main() {
task_1(); // task_1();
task_2(); // task_2();
task_3(); // task_3();
task_4(); // task_4();
task_5(); // task_5();
task_6(); // task_6();
task_7(); task_7();
printf("\n\n"); printf("\n\n");

View file

@ -1,7 +1,7 @@
#include "task_3.h" #include "task_3.h"
#include <vector>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <vector>
using namespace std; using namespace std;

View file

@ -2,8 +2,8 @@
#include "task_4+6.h" #include "task_4+6.h"
#include "timing.h" #include "timing.h"
#include <cblas.h> // cBLAS Library #include <cblas.h> // cBLAS Library
#include <vector>
#include <iostream> #include <iostream>
#include <vector>
using namespace std; using namespace std;
void print_performance(double sec, size_t memory, size_t flops, unsigned int size) { void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {

View file

@ -1,9 +1,9 @@
#include "task_4+6.h" #include "task_4+6.h"
#include "task_5.h" #include "task_5.h"
#include "timing.h" #include "timing.h"
#include <vector>
#include <iostream>
#include <cmath> #include <cmath>
#include <iostream>
#include <vector>
using namespace std; using namespace std;
double norm(vector<double> const &x) { double norm(vector<double> const &x) {

View file

@ -1,8 +1,8 @@
#include "task_7.h" #include "task_7.h"
#include <cmath>
#include <iostream>
#include <lapacke.h> #include <lapacke.h>
#include <vector> #include <vector>
#include <iostream>
#include <cmath>
using namespace std; using namespace std;
tuple<vector<double>, vector<double>> init_M(size_t N, size_t Nrhs) { tuple<vector<double>, vector<double>> init_M(size_t N, size_t Nrhs) {

View file

@ -183,20 +183,20 @@ A * x =
0.000000 1.000000 0.000000 1.000000
Solution time per right hand side in milliseconds: sec*1000/Nrhs
N = | 1 | 2 | 4 | 8 | 16 | 32 N = | 1 | 2 | 4 | 8 | 16 | 32
---------|--------|--------|--------|--------|--------|------- ------------|--------|--------|--------|--------|--------|-------
Nrhs = 2 | 0.0047 | 0.0045 | 0.0046 | 0.0130 | 0.0203 | 0.0476 Nrhs = 2000 | 0.2122 | 0.0048 | 0.0079 | 0.0544 | 0.0850 | 0.1523
Nrhs = 4 | 0.0027 | 0.0031 | 0.0033 | 0.0046 | 0.0085 | 0.0250 Nrhs = 4000 | 0.0036 | 0.0053 | 0.0085 | 0.0142 | 0.0748 | 0.1596
Nrhs = 8 | 0.0035 | 0.0035 | 0.0045 | 0.0061 | 0.0119 | 0.0300 Nrhs = 6000 | 0.0033 | 0.0043 | 0.0069 | 0.0147 | 0.0327 | 0.1801
Nrhs = 16 | 0.0085 | 0.0062 | 0.0221 | 0.0113 | 0.0599 | 0.0757 Nrhs = 8000 | 0.0036 | 0.0041 | 0.0069 | 0.0197 | 0.0342 | 0.1795
Nrhs = 32 | 0.0122 | 0.0165 | 0.0112 | 0.0123 | 0.0238 | 0.0834 Nrhs = 10000 | 0.0028 | 0.0042 | 0.0067 | 0.0140 | 0.0328 | 0.1883
Nrhs = 64 | 0.0072 | 0.0078 | 0.0164 | 0.0133 | 0.0421 | 0.0666 Nrhs = 12000 | 0.0025 | 0.0044 | 0.0067 | 0.0157 | 0.0320 | 0.1981
Nrhs = 128 | 0.0073 | 0.0189 | 0.0269 | 0.0199 | 0.0337 | 0.1041 Nrhs = 14000 | 0.0023 | 0.0038 | 0.0065 | 0.0157 | 0.0333 | 0.1916
Nrhs = 256 | 0.0107 | 0.0135 | 0.0279 | 0.0351 | 0.0582 | 0.1438 Nrhs = 16000 | 0.0022 | 0.0036 | 0.0073 | 0.0140 | 0.0324 | 0.1824
Nrhs = 512 | 0.0276 | 0.0174 | 0.0237 | 0.1027 | 0.1113 | 0.2417 Nrhs = 18000 | 0.0028 | 0.0038 | 0.0066 | 0.0145 | 0.0337 | 0.1741
For fixed n, the solution time per rhs does not slow down consistently and scales very well. For fixed n, the solution time per rhs stays roughly constant.
Its faster than expected.
-------------- Task 8 -------------- -------------- Task 8 --------------

View file

@ -1 +0,0 @@

View file

@ -1 +0,0 @@