task8

2025-11-11 15:48:42 +01:00 · 2025-11-11 15:48:42 +01:00 · f0e10857a1
commit f0e10857a1
parent 7a02dff345
5 changed files with 28 additions and 31 deletions
--- a/ex3/code/main.cpp
+++ b/ex3/code/main.cpp
@ -1,14 +1,17 @@
 // Compile with "make" / "make run" or
 // g++ *.cpp -O3 -ffast-math -lopenblas -llapacke -o main
 #include "task_3.h"
 #include "task_4+6.h"
 #include "task_5.h"
 #include "task_7.h"
 #include "timing.h"
 #include <cblas.h>               // cBLAS Library
 #include <lapacke.h>
 #include <iomanip>
 #include <iostream>
 #include <cblas.h>               // cBLAS Library
 #include <lapacke.h>
 void task_1() {
    printf("\n\n-------------- Task 1 --------------\n\n");
@ -23,12 +26,12 @@ void task_1() {
    // Memory per array = 610.4 MiB (= 0.6 GiB).
    // Total memory required = 1831.1 MiB (= 1.8 GiB).
    // Each kernel will be executed 20 times.
-    //  The *best* time for each kernel (excluding the first iteration)
+    //     The *best* time for each kernel (excluding the first iteration)
-    //  will be used to compute the reported bandwidth.
+    //     will be used to compute the reported bandwidth.
    // -------------------------------------------------------------
    // Your clock granularity/precision appears to be 1 microseconds.
    // Each test below will take on the order of 116886 microseconds.
-    //    (= 116886 clock ticks)
+    //     (= 116886 clock ticks)
    // Increase the size of the arrays if this shows that
    // you are not getting at least 20 clock ticks per test.
    // -------------------------------------------------------------
@ -46,25 +49,25 @@ void task_1() {
    // -------------------------------------------------------------
    // ./flops.exe
-    //    FLOPS C Program (Double Precision), V2.0 18 Dec 1992
+    //     FLOPS C Program (Double Precision), V2.0 18 Dec 1992
-    //    Module     Error        RunTime      MFLOPS
+    //     Module     Error        RunTime      MFLOPS
    //                             (usec)
-    //      1      4.0146e-13      0.0024   5827.9076
+    //         1      4.0146e-13      0.0024   5827.9076
-    //      2     -1.4166e-13      0.0007  10037.8942
+    //         2     -1.4166e-13      0.0007  10037.8942
-    //      3      4.7184e-14      0.0039   4371.9185
+    //         3      4.7184e-14      0.0039   4371.9185
-    //      4     -1.2557e-13      0.0034   4355.5711
+    //         4     -1.2557e-13      0.0034   4355.5711
-    //      5     -1.3800e-13      0.0066   4415.6439
+    //         5     -1.3800e-13      0.0066   4415.6439
-    //      6      3.2380e-13      0.0065   4441.6299
+    //         6      3.2380e-13      0.0065   4441.6299
-    //      7     -8.4583e-11      0.0053   2277.1707
+    //         7     -8.4583e-11      0.0053   2277.1707
-    //      8      3.4867e-13      0.0069   4367.6094
+    //         8      3.4867e-13      0.0069   4367.6094
-    //    Iterations      =  512000000
+    //     Iterations      =  512000000
-    //    NullTime (usec) =     0.0000
+    //     NullTime (usec) =     0.0000
-    //    MFLOPS(1)       =  7050.6178
+    //     MFLOPS(1)       =  7050.6178
-    //    MFLOPS(2)       =  3461.6233
+    //     MFLOPS(2)       =  3461.6233
-    //    MFLOPS(3)       =  4175.0442
+    //     MFLOPS(3)       =  4175.0442
-    //    MFLOPS(4)       =  4389.7311
+    //     MFLOPS(4)       =  4389.7311
 }
 void task_2() {
@ -231,7 +234,7 @@ void task_7() {
    // #################################
    // Benchmark
-    cout << fixed << setprecision(4);  // 4 digits after decimal
+    cout << fixed << setprecision(4);
    size_t NLOOPS = 1000;
    cout << "N    = " << "  | 1      | 2      | 4      | 8      | 16     | 32 " << endl;
    cout <<       "---------|--------|--------|--------|--------|--------|-------" << endl;
@ -248,9 +251,6 @@ void task_7() {
        cout << endl;
    }
    printf("\nFor fixed n, the solution time per rhs does not slow down consistently and scales very well.\nIts faster than expected.");
 }
 int main() {
--- a/ex3/code/task_3.cpp
+++ b/ex3/code/task_3.cpp
@ -1,7 +1,6 @@
 #include "task_3.h"
 #include <vector>
 #include <cassert>
 #include <iostream>
 #include <cmath>
 using namespace std;
--- a/ex3/code/task_4+6.cpp
+++ b/ex3/code/task_4+6.cpp
@ -1,9 +1,9 @@
 #include "task_3.h"
 #include "task_4+6.h"
 #include "timing.h"
 #include <cblas.h>               // cBLAS Library
 #include <vector>
 #include <iostream>
 #include <cblas.h>               // cBLAS Library
 using namespace std;
 void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {
--- a/ex3/code/task_5.h
+++ b/ex3/code/task_5.h
@ -1,5 +1,4 @@
 #pragma once
 #include <vector>
 using namespace std;
--- a/ex3/code/task_7.cpp
+++ b/ex3/code/task_7.cpp
@ -1,9 +1,8 @@
 #include "task_7.h"
 #include <iostream>
 #include <vector>
 #include <cmath>
 #include <lapacke.h>
 #include <vector>
 #include <iostream>
 #include <cmath>
 using namespace std;
 tuple<vector<double>, vector<double>> init_M(size_t N, size_t Nrhs) {