diff --git a/ex3/code/main.cpp b/ex3/code/main.cpp index 9f91412..f6d50a5 100644 --- a/ex3/code/main.cpp +++ b/ex3/code/main.cpp @@ -1,14 +1,17 @@ +// Compile with "make" / "make run" or +// g++ *.cpp -O3 -ffast-math -lopenblas -llapacke -o main + #include "task_3.h" #include "task_4+6.h" #include "task_5.h" #include "task_7.h" #include "timing.h" +#include // cBLAS Library +#include #include #include -#include // cBLAS Library -#include void task_1() { printf("\n\n-------------- Task 1 --------------\n\n"); @@ -23,12 +26,12 @@ void task_1() { // Memory per array = 610.4 MiB (= 0.6 GiB). // Total memory required = 1831.1 MiB (= 1.8 GiB). // Each kernel will be executed 20 times. - // The *best* time for each kernel (excluding the first iteration) - // will be used to compute the reported bandwidth. + // The *best* time for each kernel (excluding the first iteration) + // will be used to compute the reported bandwidth. // ------------------------------------------------------------- // Your clock granularity/precision appears to be 1 microseconds. // Each test below will take on the order of 116886 microseconds. - // (= 116886 clock ticks) + // (= 116886 clock ticks) // Increase the size of the arrays if this shows that // you are not getting at least 20 clock ticks per test. // ------------------------------------------------------------- @@ -46,25 +49,25 @@ void task_1() { // ------------------------------------------------------------- // ./flops.exe - // FLOPS C Program (Double Precision), V2.0 18 Dec 1992 + // FLOPS C Program (Double Precision), V2.0 18 Dec 1992 - // Module Error RunTime MFLOPS + // Module Error RunTime MFLOPS // (usec) - // 1 4.0146e-13 0.0024 5827.9076 - // 2 -1.4166e-13 0.0007 10037.8942 - // 3 4.7184e-14 0.0039 4371.9185 - // 4 -1.2557e-13 0.0034 4355.5711 - // 5 -1.3800e-13 0.0066 4415.6439 - // 6 3.2380e-13 0.0065 4441.6299 - // 7 -8.4583e-11 0.0053 2277.1707 - // 8 3.4867e-13 0.0069 4367.6094 + // 1 4.0146e-13 0.0024 5827.9076 + // 2 -1.4166e-13 0.0007 10037.8942 + // 3 4.7184e-14 0.0039 4371.9185 + // 4 -1.2557e-13 0.0034 4355.5711 + // 5 -1.3800e-13 0.0066 4415.6439 + // 6 3.2380e-13 0.0065 4441.6299 + // 7 -8.4583e-11 0.0053 2277.1707 + // 8 3.4867e-13 0.0069 4367.6094 - // Iterations = 512000000 - // NullTime (usec) = 0.0000 - // MFLOPS(1) = 7050.6178 - // MFLOPS(2) = 3461.6233 - // MFLOPS(3) = 4175.0442 - // MFLOPS(4) = 4389.7311 + // Iterations = 512000000 + // NullTime (usec) = 0.0000 + // MFLOPS(1) = 7050.6178 + // MFLOPS(2) = 3461.6233 + // MFLOPS(3) = 4175.0442 + // MFLOPS(4) = 4389.7311 } void task_2() { @@ -231,7 +234,7 @@ void task_7() { // ################################# // Benchmark - cout << fixed << setprecision(4); // 4 digits after decimal + cout << fixed << setprecision(4); size_t NLOOPS = 1000; cout << "N = " << " | 1 | 2 | 4 | 8 | 16 | 32 " << endl; cout << "---------|--------|--------|--------|--------|--------|-------" << endl; @@ -248,9 +251,6 @@ void task_7() { cout << endl; } printf("\nFor fixed n, the solution time per rhs does not slow down consistently and scales very well.\nIts faster than expected."); - - - } int main() { diff --git a/ex3/code/task_3.cpp b/ex3/code/task_3.cpp index 22a0482..57c5561 100644 --- a/ex3/code/task_3.cpp +++ b/ex3/code/task_3.cpp @@ -1,7 +1,6 @@ #include "task_3.h" #include #include -#include #include using namespace std; diff --git a/ex3/code/task_4+6.cpp b/ex3/code/task_4+6.cpp index bcde52c..51359f9 100644 --- a/ex3/code/task_4+6.cpp +++ b/ex3/code/task_4+6.cpp @@ -1,9 +1,9 @@ #include "task_3.h" #include "task_4+6.h" #include "timing.h" +#include // cBLAS Library #include #include -#include // cBLAS Library using namespace std; void print_performance(double sec, size_t memory, size_t flops, unsigned int size) { diff --git a/ex3/code/task_5.h b/ex3/code/task_5.h index 24609ba..b8f970b 100644 --- a/ex3/code/task_5.h +++ b/ex3/code/task_5.h @@ -1,5 +1,4 @@ #pragma once - #include using namespace std; diff --git a/ex3/code/task_7.cpp b/ex3/code/task_7.cpp index 1121ec7..86b523d 100644 --- a/ex3/code/task_7.cpp +++ b/ex3/code/task_7.cpp @@ -1,9 +1,8 @@ #include "task_7.h" - -#include -#include -#include #include +#include +#include +#include using namespace std; tuple, vector> init_M(size_t N, size_t Nrhs) {