#include "benchmarks.h" #include "benchmark_tests.h" #include "factorization_solve_tests.h" #include int main() { // ---------------------------------- 1. ---------------------------------- // results in file "test_system.txt" // ---------------------------------- 2. ---------------------------------- // Memory FLOPS Read-write operations // (A) 2N 2N 2N // (B) MN + N 2NM 2NM + M // (C) ML + LM 2LNM 2LNM + MN // (D) (p+1) + N 3N(p+1) N(2 + 3(p+1)) // ---------------------------------- 3. ---------------------------------- // implementation in file "benchmarks.cpp" // ---------------------------------- 4.& 6. ---------------------------------- vector> results; results.push_back(test_A(250, 50000000, scalar)); results.push_back(test_A(250, 50000000, Kahan_skalar)); results.push_back(test_A(250, 50000000, scalar_cBLAS)); // Timing GFLOPS GiByte/s // ------------------------------------------ // scalar 0.039 2.4 19 // Kahan_skalar 0.037 2.5 20 // scalar_cBLAS 0.032 2.9 23 results.push_back(test_B(100, 20000, 10000, MatVec)); results.push_back(test_B(100, 20000, 10000, MatVec_cBLAS)); // Timing GFLOPS GiByte/s // ------------------------------------------ // MatVec 0.1 3.6 29 // MatVec_cBLAS 0.074 5 40 results.push_back(test_C(25, 500, 1000, 1500, MatMat)); results.push_back(test_C(25, 500, 1000, 1500, MatMat_cBLAS)); // Timing GFLOPS GiByte/s // ------------------------------------------ // MatMat 0.57 2.5 20 // MatMat_cBLAS 0.019 75 6e+02 // unrealistic results.push_back(test_D(100, 100, 1000000)); // Timing GFLOPS GiByte/s // ------------------------------------------ // 0.11 2.5 20 cout << endl << "Timing\tGFLOPS\tGiByte/s" << endl; cout << "------------------------------" << endl; for (size_t i = 0; i < results.size(); ++i) cout << results[i][0] << "\t" << results[i][1] << "\t" << results[i][2] << endl; cout << endl; // ---------------------------------- 5. ---------------------------------- // 5.(a) Observation: time to calculate norm is approximately half the time as for the scalar product. // Reason: only have to access entries of x, so less memory that has to be accessed // // 5.(b) Runtime for Kahan_scalar is roughly the same as for the normal scalar product // ---------------------------------- 6. ---------------------------------- // see 4. // ---------------------------------- 7. ---------------------------------- CheckCorrectness(); // Checked correctness by computing the inverse of A CheckDuration(5000); // The solving time per RHS scales roughly with factor 1/n_rhs // ---------------------------------- 8. ---------------------------------- // done seperately return 0; }