SciFEM_Schratter/ex3_benchmarks/main.cpp

#include "benchmarks.h"
#include "benchmark_tests.h"
#include "factorization_solve_tests.h"
#include <iostream>

int main()
{
    // ---------------------------------- 1. ----------------------------------
    // results in file "test_system.txt"


    // ---------------------------------- 2. ----------------------------------
    //      Memory      FLOPS       Read-write operations
    // (A)  2N          2N          2N
    // (B)  MN + N      2NM         2NM + M
    // (C)  ML + LM     2LNM        2LNM + MN
    // (D)  (p+1) + N   3N(p+1)     N(2 + 3(p+1))


    // ---------------------------------- 3. ----------------------------------
    // implementation in file "benchmarks.cpp"


    // ---------------------------------- 4.& 6. ----------------------------------
    vector<vector<double>> results;

    results.push_back(test_A(250, 50000000, scalar));
    results.push_back(test_A(250, 50000000, Kahan_skalar));
    results.push_back(test_A(250, 50000000, scalar_cBLAS));
    //                  Timing  GFLOPS  GiByte/s
    // ------------------------------------------
    // scalar           0.039   2.4     19
    // Kahan_skalar     0.037   2.5     20
    // scalar_cBLAS     0.032   2.9     23


    results.push_back(test_B(100, 20000, 10000, MatVec));
    results.push_back(test_B(100, 20000, 10000, MatVec_cBLAS));
    //                  Timing  GFLOPS  GiByte/s
    // ------------------------------------------
    // MatVec           0.1     3.6     29
    // MatVec_cBLAS     0.074   5       40


    results.push_back(test_C(25, 500, 1000, 1500, MatMat));
    results.push_back(test_C(25, 500, 1000, 1500, MatMat_cBLAS));
    //                  Timing  GFLOPS  GiByte/s
    // ------------------------------------------
    // MatMat           0.57    2.5     20
    // MatMat_cBLAS     0.019   75      6e+02   // unrealistic


    results.push_back(test_D(100, 100, 1000000));
    //                  Timing  GFLOPS  GiByte/s
    // ------------------------------------------
    //                  0.11    2.5     20


    cout << endl << "Timing\tGFLOPS\tGiByte/s" << endl;
    cout << "------------------------------" << endl;
    for (size_t i = 0; i < results.size(); ++i)
        cout << results[i][0] << "\t" << results[i][1] << "\t" << results[i][2] << endl;
    cout << endl;


    // ---------------------------------- 5. ----------------------------------
    // 5.(a) Observation: time to calculate norm is approximately half the time as for the scalar product.
    // Reason: only have to access entries of x, so less memory that has to be accessed
    //
    // 5.(b) Runtime for Kahan_scalar is roughly the same as for the normal scalar product


    // ---------------------------------- 6. ----------------------------------
    // see 4.


    // ---------------------------------- 7. ----------------------------------
    CheckCorrectness();
    // Checked correctness by computing the inverse of A

    CheckDuration(5000);
    // The solving time per RHS scales roughly with factor 1/n_rhs


    // ---------------------------------- 8. ----------------------------------
    // done seperately


    return 0;

}