Pushing everything again, accidentally deleted my remote repository
This commit is contained in:
commit
1bee3e8e5b
101 changed files with 9428 additions and 0 deletions
30
ex5/ex5_4/Makefile
Normal file
30
ex5/ex5_4/Makefile
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#
|
||||
# use GNU-Compiler tools
|
||||
COMPILER=GCC_
|
||||
# alternatively from the shell
|
||||
# export COMPILER=GCC_
|
||||
# or, alternatively from the shell
|
||||
# make COMPILER=GCC_
|
||||
|
||||
# use Intel compilers
|
||||
#COMPILER=ICC_
|
||||
|
||||
# use PGI compilers
|
||||
# COMPILER=PGI_
|
||||
|
||||
|
||||
SOURCES = main.cpp benchmarks.cpp benchmark_tests.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
PROGRAM = main.${COMPILER}
|
||||
|
||||
# uncomment the next to lines for debugging and detailed performance analysis
|
||||
CXXFLAGS += -g
|
||||
LINKFLAGS += -g
|
||||
# do not use -pg with PGI compilers
|
||||
|
||||
ifndef COMPILER
|
||||
COMPILER=GCC_
|
||||
endif
|
||||
|
||||
include ../${COMPILER}default.mk
|
||||
375
ex5/ex5_4/benchmark_tests.cpp
Normal file
375
ex5/ex5_4/benchmark_tests.cpp
Normal file
|
|
@ -0,0 +1,375 @@
|
|||
#include "benchmark_tests.h"
|
||||
#include "benchmarks.h"
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
using namespace std::chrono;
|
||||
|
||||
vector<double> test_A(const size_t &NLOOPS, const size_t &N)
|
||||
{
|
||||
cout << "#################### (A) ####################" << endl;
|
||||
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||
cout << "\nN = " << N << endl;
|
||||
|
||||
|
||||
// Memory allocation
|
||||
cout << "Memory allocation\n";
|
||||
|
||||
vector<double> x(N), y(N);
|
||||
|
||||
cout.precision(2);
|
||||
cout << 2.0*N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||
cout.precision(6);
|
||||
|
||||
|
||||
// Data initialization
|
||||
// Special: x_i = i+1; y_i = 1/x_i ==> <x,y> == N
|
||||
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
{
|
||||
x[i] = i % 219 + 1;
|
||||
y[i] = 1.0/x[i];
|
||||
}
|
||||
|
||||
|
||||
cout << "\nStart Benchmarking scalar\n";
|
||||
|
||||
auto t1 = system_clock::now(); // start timer
|
||||
// Do calculation
|
||||
double check(0.0),ss(0.0);
|
||||
for (size_t i = 0; i < NLOOPS; ++i)
|
||||
{
|
||||
check = scalar_parallel(x, y);
|
||||
ss += check; // prevents the optimizer from removing unused calculation results.
|
||||
}
|
||||
|
||||
auto t2 = system_clock::now(); // stop timer
|
||||
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||
|
||||
|
||||
|
||||
// Check the correct result
|
||||
cout << "\n <x,y> = " << check << endl;
|
||||
if (static_cast<unsigned int>(check) != N)
|
||||
cout << " !! W R O N G result !!\n";
|
||||
cout << endl;
|
||||
|
||||
|
||||
// Timings and Performance
|
||||
cout << endl;
|
||||
cout.precision(2);
|
||||
|
||||
|
||||
double Gflops = 2.0*N / t_diff / 1024 / 1024 / 1024;
|
||||
double MemBandwidth = 2.0*N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
|
||||
|
||||
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||
cout << "Timing in sec. : " << t_diff << endl;
|
||||
cout << "GFLOPS : " << Gflops << endl;
|
||||
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||
|
||||
|
||||
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||
}
|
||||
|
||||
vector<double> test_A_sum(const size_t &NLOOPS, const size_t &N)
|
||||
{
|
||||
cout << "#################### (A) sum ####################" << endl;
|
||||
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||
cout << "\nN = " << N << endl;
|
||||
|
||||
|
||||
// Memory allocation
|
||||
cout << "Memory allocation\n";
|
||||
|
||||
vector<double> x(N);
|
||||
|
||||
cout.precision(2);
|
||||
cout << 1.0*N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||
cout.precision(6);
|
||||
|
||||
|
||||
// Data initialization
|
||||
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
{
|
||||
x[i] = 1;
|
||||
}
|
||||
|
||||
|
||||
cout << "\nStart Benchmarking sum\n";
|
||||
|
||||
auto t1 = system_clock::now(); // start timer
|
||||
// Do calculation
|
||||
double check(0.0),ss(0.0);
|
||||
for (size_t i = 0; i < NLOOPS; ++i)
|
||||
{
|
||||
check = sum(x);
|
||||
ss += check; // prevents the optimizer from removing unused calculation results.
|
||||
}
|
||||
|
||||
auto t2 = system_clock::now(); // stop timer
|
||||
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||
|
||||
|
||||
|
||||
// Check the correct result
|
||||
cout << "\n <x,y> = " << check << endl;
|
||||
if (static_cast<unsigned int>(check) != N)
|
||||
cout << " !! W R O N G result !!\n";
|
||||
cout << endl;
|
||||
|
||||
|
||||
// Timings and Performance
|
||||
cout << endl;
|
||||
cout.precision(2);
|
||||
|
||||
|
||||
double Gflops = 1.0*N / t_diff / 1024 / 1024 / 1024;
|
||||
double MemBandwidth = 1.0*N / t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
|
||||
|
||||
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||
cout << "Timing in sec. : " << t_diff << endl;
|
||||
cout << "GFLOPS : " << Gflops << endl;
|
||||
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||
|
||||
|
||||
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||
}
|
||||
|
||||
|
||||
vector<double> test_B(const size_t &NLOOPS, const size_t &N, const size_t &M)
|
||||
{
|
||||
cout << "#################### (B) ####################" << endl;
|
||||
|
||||
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||
cout << "\nN = " << N << endl;
|
||||
cout << "\nM = " << M << endl;
|
||||
|
||||
// Memory allocation
|
||||
cout << "Memory allocation\n";
|
||||
|
||||
vector<double> A(M*N);
|
||||
vector<double> x(N);
|
||||
|
||||
cout.precision(2);
|
||||
cout << (1.0*M*N + N) * sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||
cout.precision(6);
|
||||
|
||||
// Data initialization
|
||||
|
||||
for (size_t i = 0; i < M; ++i)
|
||||
for (size_t j = 0; j < N; ++j)
|
||||
A[N*i + j] = (i + j) % 219 + 1;
|
||||
|
||||
|
||||
for (size_t j = 0; j < N; ++j)
|
||||
{
|
||||
x[j] = 1.0/A[N*17 + j];
|
||||
}
|
||||
|
||||
cout << "\nStart Benchmarking MatVec\n";
|
||||
|
||||
auto t1 = system_clock::now(); // start timer
|
||||
// Do calculation
|
||||
vector<double> b(M);
|
||||
|
||||
for (size_t i = 0; i < NLOOPS; ++i)
|
||||
{
|
||||
b = MatVec_parallel(A, x);
|
||||
}
|
||||
|
||||
auto t2 = system_clock::now(); // stop timer
|
||||
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||
|
||||
|
||||
// Check the correct result
|
||||
cout << "\n <A[17,*],x> = " << b[17] << endl;
|
||||
if (static_cast<size_t>(b[17]) != N)
|
||||
{
|
||||
cout << " !! W R O N G result !!\n";
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
|
||||
// Timings and Performance
|
||||
cout << endl;
|
||||
cout.precision(2);
|
||||
|
||||
double Gflops = (2.0*N*M) / t_diff / 1024 / 1024 / 1024;
|
||||
double MemBandwidth = (2.0*N*M + M)/ t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
|
||||
|
||||
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||
cout << "Timing in sec. : " << t_diff << endl;
|
||||
cout << "GFLOPS : " << Gflops << endl;
|
||||
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||
|
||||
|
||||
|
||||
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||
}
|
||||
|
||||
|
||||
vector<double> test_C(const size_t &NLOOPS, const size_t &L, const size_t &M, const size_t &N)
|
||||
{
|
||||
cout << "#################### (C) ####################" << endl;
|
||||
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||
cout << "\nL = " << L << endl;
|
||||
cout << "\nM = " << M << endl;
|
||||
cout << "\nN = " << N << endl;
|
||||
|
||||
|
||||
// Memory allocation
|
||||
cout << "Memory allocation\n";
|
||||
|
||||
vector<double> A(M*L);
|
||||
vector<double> B(L*N);
|
||||
|
||||
cout.precision(2);
|
||||
cout << (1.0*M*L + L*N) *sizeof(A[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||
cout.precision(6);
|
||||
|
||||
|
||||
// Data initialization
|
||||
|
||||
for (size_t i = 0; i < M; ++i)
|
||||
for (size_t k = 0; k < L; ++k)
|
||||
A[L*i + k] = (i + k) % 219 + 1;
|
||||
|
||||
for (size_t k = 0; k < L; ++k)
|
||||
for (size_t j = 0; j < N; ++j)
|
||||
B[N*k + j] = 1.0/A[L*17 + k];
|
||||
|
||||
|
||||
cout << "\nStart Benchmarking MatMat\n";
|
||||
|
||||
auto t1 = system_clock::now(); // start timer
|
||||
// Do calculation
|
||||
vector<double> C(M*N);
|
||||
double check;
|
||||
double check_sum = 0;
|
||||
|
||||
for (size_t i = 0; i < NLOOPS; ++i)
|
||||
{
|
||||
C = MatMat_parallel(A, B, L);
|
||||
|
||||
check = C[N*17];
|
||||
check_sum += check; // prevents the optimizer from removing unused calculation results.
|
||||
}
|
||||
cout << check_sum;
|
||||
|
||||
auto t2 = system_clock::now(); // stop timer
|
||||
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||
|
||||
|
||||
// Check the correct result
|
||||
cout << "\n C[17,0] = " << check << endl;
|
||||
if (static_cast<unsigned int>(check) != L)
|
||||
{
|
||||
cout << " !! W R O N G result !!, should be " << L <<"\n";
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
// Timings and Performance
|
||||
cout << endl;
|
||||
cout.precision(2);
|
||||
|
||||
|
||||
double Gflops = (2.0*L*N*M) / t_diff / 1024 / 1024 / 1024;
|
||||
double MemBandwidth = (2.0*L*N*M + M*N)/ t_diff / 1024 / 1024 / 1024 * sizeof(A[0]);
|
||||
|
||||
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||
cout << "Timing in sec. : " << t_diff << endl;
|
||||
cout << "GFLOPS : " << Gflops << endl;
|
||||
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||
|
||||
|
||||
|
||||
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||
}
|
||||
|
||||
|
||||
vector<double> test_D(const size_t &NLOOPS, const size_t &N, const size_t &p)
|
||||
{
|
||||
cout << "#################### (D) ####################" << endl;
|
||||
cout << "\nLOOPS = " << NLOOPS << endl;
|
||||
cout << "\nN = " << N << endl;
|
||||
cout << "\np = " << p << endl;
|
||||
|
||||
// Memory allocation
|
||||
cout << "Memory allocation\n";
|
||||
|
||||
vector<double> a(p + 1, 0);
|
||||
vector<double> x(N);
|
||||
|
||||
cout.precision(2);
|
||||
cout << (1.0*(p + 1) + N) *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n";
|
||||
cout.precision(6);
|
||||
|
||||
// Data initialization
|
||||
|
||||
for (size_t j = 0; j < N; ++j)
|
||||
x[j] = 1.0*j;
|
||||
|
||||
for (size_t k = 0; k < p + 1; ++k)
|
||||
a[k] = pow(-1.0, k); // poly(x) = 1 - x + x^2 - x^3 + x^4 - ...
|
||||
|
||||
|
||||
|
||||
cout << "\nStart Benchmarking poly\n";
|
||||
|
||||
auto t1 = system_clock::now(); // start timer
|
||||
// Do calculation
|
||||
vector<double> y(N);
|
||||
double check;
|
||||
double check_sum;
|
||||
|
||||
for (size_t i = 0; i < NLOOPS; ++i)
|
||||
{
|
||||
y = poly_parallel(a, x);
|
||||
check = y[0];
|
||||
|
||||
check_sum += check; // prevents the optimizer from removing unused calculation results.
|
||||
}
|
||||
|
||||
auto t2 = system_clock::now(); // stop timer
|
||||
auto duration = duration_cast<microseconds>(t2 - t1); // duration in microseconds
|
||||
double t_diff = static_cast<double>(duration.count()) / 1e6; // overall duration in seconds
|
||||
t_diff = t_diff/NLOOPS; // duration per loop seconds
|
||||
|
||||
|
||||
|
||||
// Check the correct result
|
||||
cout << "\n poly(" << x[0] << ") = " << check << endl;
|
||||
if (abs(check - 1.0) > 1.0/1e6)
|
||||
{
|
||||
cout << " !! W R O N G result !!\n";
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
|
||||
// Timings and Performance
|
||||
cout << endl;
|
||||
cout.precision(2);
|
||||
|
||||
|
||||
double Gflops = (N*(p + 1)*3.0) / t_diff / 1024 / 1024 / 1024;
|
||||
double MemBandwidth = (N*(2.0 + 3.0*(p + 1)))/ t_diff / 1024 / 1024 / 1024 * sizeof(x[0]);
|
||||
|
||||
cout << "Total duration : " << t_diff*NLOOPS << endl;
|
||||
cout << "Timing in sec. : " << t_diff << endl;
|
||||
cout << "GFLOPS : " << Gflops << endl;
|
||||
cout << "GiByte/s : " << MemBandwidth << endl;
|
||||
|
||||
|
||||
|
||||
return vector<double>{t_diff, Gflops, MemBandwidth};
|
||||
}
|
||||
13
ex5/ex5_4/benchmark_tests.h
Normal file
13
ex5/ex5_4/benchmark_tests.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
vector<double> test_A(const size_t &NLOOPS, const size_t &N);
|
||||
|
||||
vector<double> test_A_sum(const size_t &NLOOPS, const size_t &N);
|
||||
|
||||
vector<double> test_B(const size_t &NLOOPS, const size_t &N, const size_t &M);
|
||||
|
||||
vector<double> test_C(const size_t &NLOOPS, const size_t &L, const size_t &M, const size_t &N);
|
||||
|
||||
vector<double> test_D(const size_t &NLOOPS, const size_t &N, const size_t &p);
|
||||
141
ex5/ex5_4/benchmarks.cpp
Normal file
141
ex5/ex5_4/benchmarks.cpp
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
#include "benchmarks.h"
|
||||
#include <cassert> // assert()
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <omp.h>
|
||||
|
||||
// (A) Inner product of two vectors (from skalar_stl)
|
||||
double scalar_parallel(vector<double> const &x, vector<double> const &y)
|
||||
{
|
||||
assert(x.size() == y.size());
|
||||
size_t const N = x.size();
|
||||
double sum = 0.0;
|
||||
//#pragma omp parallel for default(none) shared(x, y, N) reduction(+:sum) schedule(runtime)
|
||||
#pragma omp parallel for shared(x, y, N) reduction(+:sum)
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
{
|
||||
sum += x[i] * y[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
// (A) Vector entry sum
|
||||
double sum(vector<double> const &x)
|
||||
{
|
||||
double sum = 0.0;
|
||||
#pragma omp parallel for shared(x) reduction(+:sum)
|
||||
for (size_t i = 0; i < x.size(); ++i)
|
||||
{
|
||||
sum += x[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
// (B) Matrix-vector product (from intro_vector_densematrix)
|
||||
vector<double> MatVec_parallel(vector<double> const &A, vector<double> const &x)
|
||||
{
|
||||
size_t const nelem = A.size();
|
||||
size_t const N = x.size();
|
||||
assert(nelem % N == 0); // make sure multiplication is possible
|
||||
size_t const M = nelem/N;
|
||||
|
||||
vector<double> b(M);
|
||||
|
||||
#pragma omp parallel for shared(A, x, N, M, b)
|
||||
for (size_t i = 0; i < M; ++i)
|
||||
{
|
||||
double tmp = 0.0;
|
||||
for (size_t j = 0; j < N; ++j)
|
||||
tmp += A[N*i + j] * x[j];
|
||||
b[i] = tmp;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
// (C) Matrix-matrix product
|
||||
vector<double> MatMat_parallel(vector<double> const &A, vector<double> const &B, size_t const &L)
|
||||
{
|
||||
size_t const nelem_A = A.size();
|
||||
size_t const nelem_B = B.size();
|
||||
|
||||
assert(nelem_A % L == 0 && nelem_B % L == 0);
|
||||
|
||||
size_t const M = nelem_A/L;
|
||||
size_t const N = nelem_B/L;
|
||||
|
||||
|
||||
vector<double> C(M*N);
|
||||
|
||||
|
||||
#pragma omp parallel for shared(A, B, M, N, L, C)
|
||||
for (size_t i = 0; i < M; ++i)
|
||||
{
|
||||
for (size_t k = 0; k < L; ++k)
|
||||
{
|
||||
for (size_t j = 0; j < N; ++j)
|
||||
{
|
||||
C[N*i + j] += A[L*i + k]*B[N*k + j];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return C;
|
||||
}
|
||||
|
||||
|
||||
// (D) Evaluation of a polynomial function
|
||||
vector<double> poly_parallel(vector<double> const &a, vector<double> const &x)
|
||||
{
|
||||
size_t const N = x.size();
|
||||
size_t const p = a.size() - 1;
|
||||
vector<double> y(N, 0);
|
||||
|
||||
#pragma omp parallel for shared(a, x, N, p, y)
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
{
|
||||
double x_temp = x[i];
|
||||
double y_temp = 0;
|
||||
for (size_t k = 0; k < p + 1; ++k)
|
||||
{
|
||||
y_temp += x_temp*y_temp + a[p - k];
|
||||
}
|
||||
y[i] = y_temp;
|
||||
}
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
55
ex5/ex5_4/benchmarks.h
Normal file
55
ex5/ex5_4/benchmarks.h
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
#pragma once
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
/** (A) Inner product of two vectors (from skalar_stl)
|
||||
@param[in] x vector
|
||||
@param[in] y vector
|
||||
@return resulting Euclidian inner product <x,y>
|
||||
*/
|
||||
double scalar_parallel(vector<double> const &x, vector<double> const &y);
|
||||
|
||||
|
||||
/** (A) Sum entries of vector
|
||||
@param[in] x vector
|
||||
@return sum
|
||||
*/
|
||||
double sum(vector<double> const &x);
|
||||
|
||||
|
||||
/** (B) Matrix-vector product (from intro_vector_densematrix)
|
||||
* @param[in] A dense matrix (1D access)
|
||||
* @param[in] u vector
|
||||
*
|
||||
* @return resulting vector
|
||||
*/
|
||||
vector<double> MatVec_parallel(vector<double> const &A, vector<double> const &x);
|
||||
|
||||
|
||||
/** (C) Matrix-matrix product
|
||||
* @param[in] A MxL dense matrix (1D access)
|
||||
* @param[in] B LxN dense matrix (1D access)
|
||||
* @param[in] shared_dim shared dimension L
|
||||
*
|
||||
* @return resulting MxN matrix
|
||||
*/
|
||||
vector<double> MatMat_parallel(vector<double> const &A, vector<double> const &B, size_t const &shared_dim);
|
||||
|
||||
|
||||
/** (D) Evaluation of a polynomial function using Horner's scheme
|
||||
* @param[in] a coefficient vector
|
||||
* @param[in] x vector with input values
|
||||
*
|
||||
* @return vector with output values
|
||||
*/
|
||||
vector<double> poly_parallel(vector<double> const &a, vector<double> const &x);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
84
ex5/ex5_4/main.cpp
Normal file
84
ex5/ex5_4/main.cpp
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
#include "benchmark_tests.h"
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
int main()
|
||||
{
|
||||
vector<vector<double>> results_scalar;
|
||||
results_scalar.push_back(test_A(2000000, pow(10,3)));
|
||||
results_scalar.push_back(test_A(1000000, pow(10,4)));
|
||||
results_scalar.push_back(test_A(100000, pow(10,5)));
|
||||
results_scalar.push_back(test_A(10000, pow(10,6)));
|
||||
results_scalar.push_back(test_A(750, pow(10,7)));
|
||||
results_scalar.push_back(test_A(125, pow(10,8)));
|
||||
|
||||
|
||||
vector<vector<double>> results_sum;
|
||||
results_sum.push_back(test_A_sum(3000000, pow(10,3)));
|
||||
results_sum.push_back(test_A_sum(2000000, pow(10,4)));
|
||||
results_sum.push_back(test_A_sum(1000000, pow(10,5)));
|
||||
results_sum.push_back(test_A_sum(50000, pow(10,6)));
|
||||
results_sum.push_back(test_A_sum(2000, pow(10,7)));
|
||||
results_sum.push_back(test_A_sum(250, pow(10,8)));
|
||||
|
||||
|
||||
test_B(100, 20000, 10000);
|
||||
|
||||
test_C(25, 500, 1000, 1500);
|
||||
|
||||
test_D(100, 100, 1000000);
|
||||
|
||||
|
||||
|
||||
cout << endl << "###### Scalar ######" << endl;
|
||||
cout << "Timing\tGFLOPS\tGiByte/s" << endl;
|
||||
cout << "------------------------------" << endl;
|
||||
for (size_t i = 0; i < results_scalar.size(); ++i)
|
||||
cout << results_scalar[i][0] << "\t" << results_scalar[i][1] << "\t" << results_scalar[i][2] << endl;
|
||||
|
||||
cout << endl << "###### Sum ######" << endl;
|
||||
cout << "Timing\tGFLOPS\tGiByte/s" << endl;
|
||||
cout << "------------------------------" << endl;
|
||||
for (size_t i = 0; i < results_sum.size(); ++i)
|
||||
cout << results_sum[i][0] << "\t" << results_sum[i][1] << "\t" << results_sum[i][2] << endl;
|
||||
|
||||
|
||||
|
||||
|
||||
// ###### Scalar ######
|
||||
// Timing GFLOPS GiByte/s
|
||||
// ------------------------------
|
||||
// 3.4e-06 0.54 4.3
|
||||
// 4.6e-06 4 32
|
||||
// 1.6e-05 12 95
|
||||
// 0.0011 1.7 13
|
||||
// 0.0097 1.9 15
|
||||
// 0.075 2.5 20
|
||||
|
||||
|
||||
// ###### Sum ######
|
||||
// Timing GFLOPS GiByte/s
|
||||
// ------------------------------
|
||||
// 5.5e-06 0.17 1.3
|
||||
// 5.4e-06 1.7 14
|
||||
// 1.5e-05 6.1 49
|
||||
// 0.00013 7.2 57
|
||||
// 0.0033 2.8 23
|
||||
// 0.032 2.9 23
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// ######### NOT PARALLEL (from exercise sheet 2) #########
|
||||
// Timing GFLOPS GiByte/s
|
||||
// ----------------------------------
|
||||
// (A) 0.038 2.5 20
|
||||
// (B) 0.13 2.9 23
|
||||
// (C) 0.44 3.2 25
|
||||
// (D) 0.19 1.5 12
|
||||
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue