Ex8 and minor improvements

2025-11-12 02:04:18 +01:00 · 2025-11-12 02:04:18 +01:00 · 77bc8c6aa3
commit 77bc8c6aa3
parent 2195a9db0a
51 changed files with 214885 additions and 83 deletions
--- a/sheet3/7/Makefile
+++ b/sheet3/7/Makefile
@ -13,8 +13,7 @@ COMPILER=GCC_
 # COMPILER=PGI_


-
-SOURCES = main.cpp 
+SOURCES = main.cpp benchmark.cpp
 OBJECTS = $(SOURCES:.cpp=.o)

 PROGRAM	= main.${COMPILER}
--- a/sheet3/7/benchmark.cpp
+++ b/sheet3/7/benchmark.cpp
@ -0,0 +1,43 @@
+#include <iostream>
+#include <vector>   
+#include <cmath>
+using namespace std;
+#include <cblas.h>
+
+// Inner product
+double benchmark_A(const vector<double> &x, const vector<double> &y)
+{
+   
+   
+    return cblas_ddot(x.size(),x.data(),1,y.data(),1);
+
+}
+
+//Matrix-vector product
+vector<double> benchmark_B(const vector<double> &A, const vector<double> &x)
+{
+    unsigned int N = x.size();
+    unsigned int M = A.size() / N;
+    vector<double> b(M, 0.0);
+
+    cblas_dgemv(CblasRowMajor,CblasNoTrans,M,N,1,A.data(),N,x.data(),1,0.0,b.data(),1);
+
+    return b;
+}
+
+
+//Matrix-Matrix product
+vector<double> benchmark_C(const vector<double> &A, const vector<double> &B, unsigned int M)
+{
+    unsigned int L = A.size()/M;
+    unsigned int N = B.size()/L;
+    vector<double> C(M*N,0.0);
+    
+    cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,M,N,L,1.0,A.data(),L,B.data(),N,0.0,C.data(),N);
+
+    return C;
+
+}
+
+
+
--- a/sheet3/7/benchmark.h
+++ b/sheet3/7/benchmark.h
@ -0,0 +1,21 @@
+#ifndef BENCHMARK_H
+#define BENCHMARK_H
+
+
+#include <vector>
+using namespace std;
+
+double benchmark_A(const vector<double> &x,
+                   const vector<double> &y);
+
+vector<double> benchmark_B(const vector<double> &A,
+                                const vector<double> &x);
+
+vector<double> benchmark_C(const vector<double> &A,
+                                const vector<double> &B,
+                                unsigned int M);
+
+
+
+
+#endif 
--- a/sheet3/7/benchmark.o
+++ b/sheet3/7/benchmark.o
--- a/sheet3/7/main.cpp
+++ b/sheet3/7/main.cpp
@ -8,17 +8,17 @@
 #include <sstream>
 #include <vector>
 #include <lapacke.h>
-
+#include "timing.h"
+#include "benchmark.h"
 using namespace std;
 using namespace std::chrono;  // timing

 int main()
 {   
-    unsigned int n= 10;
-    unsigned int nhrs = 1;
+    unsigned int n= 32;

    vector<double> M(n*n,4.0);
-   
+    
    for(unsigned int i=0; i<n; i++)
    {
        for(unsigned int j=0; j<n; j++)
@ -32,30 +32,77 @@ int main()

        
    }
-
+    vector<double> M2 = M;
    


    vector<int> ipiv(n); //pivots
    LAPACKE_dgetrf(LAPACK_ROW_MAJOR,n,n, M.data(),n,ipiv.data());  //M=PLU 

+    

-    unsigned int runtimes[] = {1,2,4,8,16,32};

-    for(unsigned int i=0; i < 6;i++)
+    double time;
+    unsigned int nhrsmax = 1000000;
+    for(unsigned int i=nhrsmax/10; i < nhrsmax;i+=nhrsmax/10)
    {
-        nhrs = runtimes[i];
-         vector<double> b(n*nhrs,0.0);
-        for (unsigned int j=0; j<n; j++)
-        {
-            for (unsigned int k=0; k<nhrs; k++)
-            {
-                b[j*nhrs+k] = j*nhrs+k;
-            }
-        } 
-        LAPACKE_dgetrs(LAPACK_ROW_MAJOR,'N',n,nhrs,M.data(),n,ipiv.data(),b.data(),nhrs);
-    }
+        
+        unsigned int nhrs = i;

+        //FOR CHECKING
+        vector<double> X(n*nhrs,1.0);
+
+        vector<double> b = benchmark_C(M2,X,n);
+        
+        tic();
+        LAPACKE_dgetrs(LAPACK_ROW_MAJOR,'N',n,nhrs,M.data(),n,ipiv.data(),b.data(),nhrs);
+        time = toc();
+        cout << "Time for nhrs=" << nhrs << ": " << time << endl;
+
+
+        
+        double max_err = 0.0;
+        for (unsigned int j = 0; j < n * nhrs; j++)
+        {
+            double err = b[j] - X[j];
+            err *= err;
+            if (err > max_err) max_err = err;
+        }
+        cout <<"max err^2:" << max_err <<endl;
+        cout <<endl;
+
+    }
+        /*
+    
+        Time for nhrs=100000: 0.0605495
+        max err^2:4.93038e-32
+
+        Time for nhrs=200000: 0.127608
+        max err^2:4.93038e-32
+
+        Time for nhrs=300000: 0.182197
+        max err^2:4.93038e-32
+
+        Time for nhrs=400000: 0.202608
+        max err^2:4.93038e-32
+
+        Time for nhrs=500000: 0.24484
+        max err^2:4.93038e-32
+
+        Time for nhrs=600000: 0.298055
+        max err^2:4.93038e-32
+
+        Time for nhrs=700000: 0.362414
+        max err^2:4.93038e-32
+
+        Time for nhrs=800000: 0.410004
+        max err^2:4.93038e-32
+
+        Time for nhrs=900000: 0.492339
+        max err^2:4.93038e-32
+
+        Time grows slow (linearly)
+    */
    
    

--- a/sheet3/7/mylib.o
+++ b/sheet3/7/mylib.o
--- a/sheet3/7/timing.h
+++ b/sheet3/7/timing.h
@ -0,0 +1,51 @@
+//
+//    Gundolf Haase, Oct 18 2024
+//
+#pragma once
+#include <chrono>                  // timing
+#include <stack>
+
+//using Clock = std::chrono::system_clock;   //!< The wall clock timer chosen
+using Clock = std::chrono::high_resolution_clock;
+using TPoint= std::chrono::time_point<Clock>;
+
+// [Galowicz, C++17 STL Cookbook, p. 29]
+
+std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
+
+/** Starts stopwatch timer.
+ *  Use as @code tic(); myfunction(...) ; double tsec = toc();  @endcode
+ * 
+ *  The timining can be nested and the recent time point is stored on top of the stack.
+ * 
+ *  @return recent time point
+ *  @see toc
+ */
+ auto tic()
+{
+    MyStopWatch.push(Clock::now());
+    return MyStopWatch.top();
+}
+ 
+/** Returns the elapsed time from stopwatch. 
+ * 
+ * The time point from top of the stack is used
+ * if time point @p t_b is not passed as input parameter.
+ * Use as @code tic(); myfunction(...) ; double tsec = toc();  @endcode
+ * or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b);  @endcode
+ * The last option is to be used in the case of 
+ * non-nested but overlapping time measurements.
+ * 
+ * @param[in]  t_b start time of some stop watch
+ * @return elapsed time in seconds.
+ *
+*/ 
+ double toc(TPoint const &t_b = MyStopWatch.top())
+{
+    // https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
+    using Unit      = std::chrono::seconds;
+    using FpSeconds = std::chrono::duration<double, Unit::period>;        
+    auto t_e = Clock::now();
+    MyStopWatch.pop();
+    return FpSeconds(t_e-t_b).count();
+}