Ex8 and minor improvements
This commit is contained in:
parent
2195a9db0a
commit
77bc8c6aa3
50 changed files with 214845 additions and 43 deletions
|
|
@ -82,8 +82,12 @@ vector<double> benchmark_D(const vector<double>& coeff, const vector<double>& x)
|
|||
double norm2(const vector<double>& x)
|
||||
{
|
||||
double s = 0.0;
|
||||
for (unsigned int i = 0; i < x.size(); ++i)
|
||||
s += x[i]*x[i];
|
||||
double xi;
|
||||
for (unsigned int i = 0; i < x.size(); ++i){
|
||||
xi = x[i];
|
||||
s += xi*xi;
|
||||
}
|
||||
|
||||
return sqrt(s);
|
||||
}
|
||||
|
||||
|
|
@ -116,7 +120,7 @@ vector<double> matrixMultColumnWise(const vector<double> &A, const vector<double
|
|||
double sum = 0.0;
|
||||
for (unsigned int k = 0; k < L; k++)
|
||||
{
|
||||
sum += A[k*L+i]*B[k*N+j];
|
||||
sum += A[k*M+i]*B[k*N+j];
|
||||
}
|
||||
C[i*N+j] = sum;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -131,6 +131,7 @@ int main(int argc, char **argv)
|
|||
double bytesC = (MC * LC + LC * NC + MC * NC)* sizeof(double);
|
||||
|
||||
cout << "\n===== Benchmark C =====\n";
|
||||
cout << guardC << endl;
|
||||
cout << "bytes: " << bytesC << endl;
|
||||
cout << "Timing in sec. : " << tC << endl;
|
||||
cout << "GFLOPS : " << flopsC / tC / 1024 / 1024 / 1024 << endl;
|
||||
|
|
@ -172,6 +173,7 @@ int main(int argc, char **argv)
|
|||
double bytesD = (p + 2 * ND)*sizeof(double);
|
||||
|
||||
cout << "\n===== Benchmark D =====\n";
|
||||
cout << guardD << endl;
|
||||
cout << "bytes: " << bytesD << endl;
|
||||
cout << "Timing in sec. : " << tD << endl;
|
||||
cout << "GFLOPS : " << flopsD / tD / 1024 / 1024 / 1024 << endl;
|
||||
|
|
@ -208,6 +210,8 @@ int main(int argc, char **argv)
|
|||
cout << "GFLOPS : " << 2.0 * NA / tA / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : "
|
||||
<< NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
|
||||
|
||||
//a bit faster due to only accessing one vector
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -240,6 +244,8 @@ int main(int argc, char **argv)
|
|||
cout << "GFLOPS : " << 5.0 * NA / tA / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : "
|
||||
<< 2.0 * NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
|
||||
|
||||
//in comparison to benchmark A: a bit slower runtime but more than double the amount of FLOPS therefor also more GFLOPS
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -276,11 +282,16 @@ int main(int argc, char **argv)
|
|||
double bytesC = (MC * LC + LC * NC + MC * NC)* sizeof(double);
|
||||
|
||||
cout << "\n===== Benchmark 5C =====\n";
|
||||
cout << guardC << endl;
|
||||
cout << "bytes: " << bytesC << endl;
|
||||
cout << "Timing in sec. : " << tC << endl;
|
||||
cout << "GFLOPS : " << flopsC / tC / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : " << bytesC / tC / 1024 / 1024 / 1024 << endl;
|
||||
|
||||
//slower than rowwise access, due to incoherent acces in the vector memory of A
|
||||
|
||||
//Transpose matrix, the it is also row wise-access or reorder loops
|
||||
}
|
||||
|
||||
return 0;
|
||||
} // memory for x and y will be deallocated by their destructors
|
||||
}
|
||||
|
|
|
|||
51
sheet3/345/output.txt
Normal file
51
sheet3/345/output.txt
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o main.o main.cpp
|
||||
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o mylib.o mylib.cpp
|
||||
g++ -c -g -O0 -funroll-all-loops -std=c++17 -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -Wredundant-decls -Winline -fmax-errors=1 -flto -o benchmark.o benchmark.cpp
|
||||
g++ main.o mylib.o benchmark.o -g -O0 -llapack -lblas -flto -o main.GCC_
|
||||
./main.GCC_
|
||||
|
||||
===== Benchmark A =====
|
||||
<xA,yA> = 1.4e+06
|
||||
Timing in sec. : 0.00893637
|
||||
GFLOPS : 0.291808
|
||||
GiByte/s : 2.33446
|
||||
|
||||
===== Benchmark B =====
|
||||
340000
|
||||
bytes: 2.31472e+07
|
||||
Timing in sec. : 0.0133897
|
||||
GFLOPS : 0.402029
|
||||
GiByte/s : 1.61001
|
||||
|
||||
===== Benchmark C =====
|
||||
7.37196e+07
|
||||
bytes: 2.4e+07
|
||||
Timing in sec. : 8.67235
|
||||
GFLOPS : 0.21478
|
||||
GiByte/s : 0.00257736
|
||||
|
||||
===== Benchmark D =====
|
||||
10500
|
||||
bytes: 3.20001e+07
|
||||
Timing in sec. : 0.101087
|
||||
GFLOPS : 0.515935
|
||||
GiByte/s : 0.294821
|
||||
|
||||
===== Benchmark 5A =====
|
||||
NORM = 150114
|
||||
Timing in sec. : 0.00703533
|
||||
GFLOPS : 0.370658
|
||||
GiByte/s : 1.48263
|
||||
|
||||
===== Benchmark 5B =====
|
||||
<xA,yA> = 1.4e+06
|
||||
Timing in sec. : 0.0108377
|
||||
GFLOPS : 0.601533
|
||||
GiByte/s : 1.92491
|
||||
|
||||
===== Benchmark 5C =====
|
||||
7.37196e+07
|
||||
bytes: 2.4e+07
|
||||
Timing in sec. : 15.2407
|
||||
GFLOPS : 0.122215
|
||||
GiByte/s : 0.00146658
|
||||
Loading…
Add table
Add a link
Reference in a new issue