Ex8 and minor improvements
This commit is contained in:
parent
2195a9db0a
commit
77bc8c6aa3
50 changed files with 214845 additions and 43 deletions
|
|
@ -13,8 +13,7 @@ COMPILER=GCC_
|
|||
# COMPILER=PGI_
|
||||
|
||||
|
||||
|
||||
SOURCES = main.cpp
|
||||
SOURCES = main.cpp benchmark.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = main.${COMPILER}
|
||||
|
|
|
|||
43
sheet3/7/benchmark.cpp
Normal file
43
sheet3/7/benchmark.cpp
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
using namespace std;
|
||||
#include <cblas.h>
|
||||
|
||||
// Inner product
|
||||
double benchmark_A(const vector<double> &x, const vector<double> &y)
|
||||
{
|
||||
|
||||
|
||||
return cblas_ddot(x.size(),x.data(),1,y.data(),1);
|
||||
|
||||
}
|
||||
|
||||
//Matrix-vector product
|
||||
vector<double> benchmark_B(const vector<double> &A, const vector<double> &x)
|
||||
{
|
||||
unsigned int N = x.size();
|
||||
unsigned int M = A.size() / N;
|
||||
vector<double> b(M, 0.0);
|
||||
|
||||
cblas_dgemv(CblasRowMajor,CblasNoTrans,M,N,1,A.data(),N,x.data(),1,0.0,b.data(),1);
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
//Matrix-Matrix product
|
||||
vector<double> benchmark_C(const vector<double> &A, const vector<double> &B, unsigned int M)
|
||||
{
|
||||
unsigned int L = A.size()/M;
|
||||
unsigned int N = B.size()/L;
|
||||
vector<double> C(M*N,0.0);
|
||||
|
||||
cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,M,N,L,1.0,A.data(),L,B.data(),N,0.0,C.data(),N);
|
||||
|
||||
return C;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
21
sheet3/7/benchmark.h
Normal file
21
sheet3/7/benchmark.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef BENCHMARK_H
|
||||
#define BENCHMARK_H
|
||||
|
||||
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
double benchmark_A(const vector<double> &x,
|
||||
const vector<double> &y);
|
||||
|
||||
vector<double> benchmark_B(const vector<double> &A,
|
||||
const vector<double> &x);
|
||||
|
||||
vector<double> benchmark_C(const vector<double> &A,
|
||||
const vector<double> &B,
|
||||
unsigned int M);
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
Binary file not shown.
|
|
@ -8,17 +8,17 @@
|
|||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <lapacke.h>
|
||||
|
||||
#include "timing.h"
|
||||
#include "benchmark.h"
|
||||
using namespace std;
|
||||
using namespace std::chrono; // timing
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned int n= 10;
|
||||
unsigned int nhrs = 1;
|
||||
unsigned int n= 32;
|
||||
|
||||
vector<double> M(n*n,4.0);
|
||||
|
||||
|
||||
for(unsigned int i=0; i<n; i++)
|
||||
{
|
||||
for(unsigned int j=0; j<n; j++)
|
||||
|
|
@ -32,30 +32,77 @@ int main()
|
|||
|
||||
|
||||
}
|
||||
|
||||
vector<double> M2 = M;
|
||||
|
||||
|
||||
|
||||
vector<int> ipiv(n); //pivots
|
||||
LAPACKE_dgetrf(LAPACK_ROW_MAJOR,n,n, M.data(),n,ipiv.data()); //M=PLU
|
||||
|
||||
|
||||
|
||||
unsigned int runtimes[] = {1,2,4,8,16,32};
|
||||
|
||||
for(unsigned int i=0; i < 6;i++)
|
||||
double time;
|
||||
unsigned int nhrsmax = 1000000;
|
||||
for(unsigned int i=nhrsmax/10; i < nhrsmax;i+=nhrsmax/10)
|
||||
{
|
||||
nhrs = runtimes[i];
|
||||
vector<double> b(n*nhrs,0.0);
|
||||
for (unsigned int j=0; j<n; j++)
|
||||
{
|
||||
for (unsigned int k=0; k<nhrs; k++)
|
||||
{
|
||||
b[j*nhrs+k] = j*nhrs+k;
|
||||
}
|
||||
}
|
||||
LAPACKE_dgetrs(LAPACK_ROW_MAJOR,'N',n,nhrs,M.data(),n,ipiv.data(),b.data(),nhrs);
|
||||
}
|
||||
|
||||
unsigned int nhrs = i;
|
||||
|
||||
//FOR CHECKING
|
||||
vector<double> X(n*nhrs,1.0);
|
||||
|
||||
vector<double> b = benchmark_C(M2,X,n);
|
||||
|
||||
tic();
|
||||
LAPACKE_dgetrs(LAPACK_ROW_MAJOR,'N',n,nhrs,M.data(),n,ipiv.data(),b.data(),nhrs);
|
||||
time = toc();
|
||||
cout << "Time for nhrs=" << nhrs << ": " << time << endl;
|
||||
|
||||
|
||||
|
||||
double max_err = 0.0;
|
||||
for (unsigned int j = 0; j < n * nhrs; j++)
|
||||
{
|
||||
double err = b[j] - X[j];
|
||||
err *= err;
|
||||
if (err > max_err) max_err = err;
|
||||
}
|
||||
cout <<"max err^2:" << max_err <<endl;
|
||||
cout <<endl;
|
||||
|
||||
}
|
||||
/*
|
||||
|
||||
Time for nhrs=100000: 0.0605495
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time for nhrs=200000: 0.127608
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time for nhrs=300000: 0.182197
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time for nhrs=400000: 0.202608
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time for nhrs=500000: 0.24484
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time for nhrs=600000: 0.298055
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time for nhrs=700000: 0.362414
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time for nhrs=800000: 0.410004
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time for nhrs=900000: 0.492339
|
||||
max err^2:4.93038e-32
|
||||
|
||||
Time grows slow (linearly)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
BIN
sheet3/7/mylib.o
BIN
sheet3/7/mylib.o
Binary file not shown.
51
sheet3/7/timing.h
Normal file
51
sheet3/7/timing.h
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
//
|
||||
// Gundolf Haase, Oct 18 2024
|
||||
//
|
||||
#pragma once
|
||||
#include <chrono> // timing
|
||||
#include <stack>
|
||||
|
||||
//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
using TPoint= std::chrono::time_point<Clock>;
|
||||
|
||||
// [Galowicz, C++17 STL Cookbook, p. 29]
|
||||
|
||||
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
|
||||
|
||||
/** Starts stopwatch timer.
|
||||
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
|
||||
*
|
||||
* The timining can be nested and the recent time point is stored on top of the stack.
|
||||
*
|
||||
* @return recent time point
|
||||
* @see toc
|
||||
*/
|
||||
auto tic()
|
||||
{
|
||||
MyStopWatch.push(Clock::now());
|
||||
return MyStopWatch.top();
|
||||
}
|
||||
|
||||
/** Returns the elapsed time from stopwatch.
|
||||
*
|
||||
* The time point from top of the stack is used
|
||||
* if time point @p t_b is not passed as input parameter.
|
||||
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
|
||||
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
|
||||
* The last option is to be used in the case of
|
||||
* non-nested but overlapping time measurements.
|
||||
*
|
||||
* @param[in] t_b start time of some stop watch
|
||||
* @return elapsed time in seconds.
|
||||
*
|
||||
*/
|
||||
double toc(TPoint const &t_b = MyStopWatch.top())
|
||||
{
|
||||
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
|
||||
using Unit = std::chrono::seconds;
|
||||
using FpSeconds = std::chrono::duration<double, Unit::period>;
|
||||
auto t_e = Clock::now();
|
||||
MyStopWatch.pop();
|
||||
return FpSeconds(t_e-t_b).count();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue