Ex8 and minor improvements

This commit is contained in:
Markus Schmidt 2025-11-12 02:04:18 +01:00
commit 77bc8c6aa3
50 changed files with 214845 additions and 43 deletions

View file

@ -82,8 +82,12 @@ vector<double> benchmark_D(const vector<double>& coeff, const vector<double>& x)
double norm2(const vector<double>& x)
{
double s = 0.0;
for (unsigned int i = 0; i < x.size(); ++i)
s += x[i]*x[i];
double xi;
for (unsigned int i = 0; i < x.size(); ++i){
xi = x[i];
s += xi*xi;
}
return sqrt(s);
}
@ -116,7 +120,7 @@ vector<double> matrixMultColumnWise(const vector<double> &A, const vector<double
double sum = 0.0;
for (unsigned int k = 0; k < L; k++)
{
sum += A[k*L+i]*B[k*N+j];
sum += A[k*M+i]*B[k*N+j];
}
C[i*N+j] = sum;
}

View file

@ -131,6 +131,7 @@ int main(int argc, char **argv)
double bytesC = (MC * LC + LC * NC + MC * NC)* sizeof(double);
cout << "\n===== Benchmark C =====\n";
cout << guardC << endl;
cout << "bytes: " << bytesC << endl;
cout << "Timing in sec. : " << tC << endl;
cout << "GFLOPS : " << flopsC / tC / 1024 / 1024 / 1024 << endl;
@ -172,6 +173,7 @@ int main(int argc, char **argv)
double bytesD = (p + 2 * ND)*sizeof(double);
cout << "\n===== Benchmark D =====\n";
cout << guardD << endl;
cout << "bytes: " << bytesD << endl;
cout << "Timing in sec. : " << tD << endl;
cout << "GFLOPS : " << flopsD / tD / 1024 / 1024 / 1024 << endl;
@ -208,6 +210,8 @@ int main(int argc, char **argv)
cout << "GFLOPS : " << 2.0 * NA / tA / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : "
<< NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
//a bit faster due to only accessing one vector
}
@ -240,6 +244,8 @@ int main(int argc, char **argv)
cout << "GFLOPS : " << 5.0 * NA / tA / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : "
<< 2.0 * NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
//in comparison to benchmark A: a bit slower runtime but more than double the amount of FLOPS therefor also more GFLOPS
}
@ -276,11 +282,16 @@ int main(int argc, char **argv)
double bytesC = (MC * LC + LC * NC + MC * NC)* sizeof(double);
cout << "\n===== Benchmark 5C =====\n";
cout << guardC << endl;
cout << "bytes: " << bytesC << endl;
cout << "Timing in sec. : " << tC << endl;
cout << "GFLOPS : " << flopsC / tC / 1024 / 1024 / 1024 << endl;
cout << "GiByte/s : " << bytesC / tC / 1024 / 1024 / 1024 << endl;
//slower than rowwise access, due to incoherent acces in the vector memory of A
//Transpose matrix, the it is also row wise-access or reorder loops
}
return 0;
} // memory for x and y will be deallocated by their destructors
}

51
sheet3/345/output.txt Normal file
View file

@ -0,0 +1,51 @@
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o main.o main.cpp
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o mylib.o mylib.cpp
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o benchmark.o benchmark.cpp
g++ main.o mylib.o benchmark.o -g -O0 -llapack -lblas -flto -o main.GCC_
./main.GCC_
===== Benchmark A =====
<xA,yA> = 1.4e+06
Timing in sec. : 0.00893637
GFLOPS : 0.291808
GiByte/s : 2.33446
===== Benchmark B =====
340000
bytes: 2.31472e+07
Timing in sec. : 0.0133897
GFLOPS : 0.402029
GiByte/s : 1.61001
===== Benchmark C =====
7.37196e+07
bytes: 2.4e+07
Timing in sec. : 8.67235
GFLOPS : 0.21478
GiByte/s : 0.00257736
===== Benchmark D =====
10500
bytes: 3.20001e+07
Timing in sec. : 0.101087
GFLOPS : 0.515935
GiByte/s : 0.294821
===== Benchmark 5A =====
NORM = 150114
Timing in sec. : 0.00703533
GFLOPS : 0.370658
GiByte/s : 1.48263
===== Benchmark 5B =====
<xA,yA> = 1.4e+06
Timing in sec. : 0.0108377
GFLOPS : 0.601533
GiByte/s : 1.92491
===== Benchmark 5C =====
7.37196e+07
bytes: 2.4e+07
Timing in sec. : 15.2407
GFLOPS : 0.122215
GiByte/s : 0.00146658