148 lines
No EOL
4.3 KiB
C++
148 lines
No EOL
4.3 KiB
C++
#include "task_3.h"
|
|
#include "task_4+6.h"
|
|
#include "timing.h"
|
|
#include <vector>
|
|
#include <iostream>
|
|
#include <cblas.h> // cBLAS Library
|
|
using namespace std;
|
|
|
|
void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {
|
|
printf("Memory allocated : %.3f GByte\n", 1.0 * memory / 1024 / 1024 / 1024 * size);
|
|
printf("Duration per loop : %.3f sec\n", sec);
|
|
printf("GFLOPS : %.3f\n", 1.0 * flops / sec / 1024 / 1024 / 1024);
|
|
printf("GiByte/s : %.3f\n", 1.0 * memory / sec / 1024 / 1024 / 1024 * size);
|
|
}
|
|
|
|
tuple<vector<double>, vector<double>> init_A(size_t N) {
|
|
vector<double> x(N), y(N);
|
|
for (size_t i = 0; i < N; ++i) {
|
|
x[i] = i%219 + 1.0;
|
|
y[i] = 1.0 / x[i];
|
|
}
|
|
return make_tuple(x, y);
|
|
}
|
|
|
|
void benchmark_A(vector<double> const &x, vector<double> const &y, size_t NLOOPS, bool cblas) {
|
|
size_t N = x.size();
|
|
|
|
double s(0.0), sum(0.0);
|
|
if (cblas == false) {
|
|
for (size_t i = 0; i < NLOOPS; ++i) {
|
|
s = scalar(x, y);
|
|
sum += s;
|
|
}
|
|
} else if (cblas == true) {
|
|
for (size_t i = 0; i < NLOOPS; ++i) {
|
|
s = cblas_ddot(N, x.data(), 1, y.data(), 1);
|
|
sum += s;
|
|
}
|
|
}
|
|
|
|
// Check correctness
|
|
if (static_cast<size_t>(sum) != N*NLOOPS) {printf(" !! W R O N G result !!\n");}
|
|
}
|
|
|
|
tuple<vector<double>, vector<double>> init_B(size_t M, size_t N) {
|
|
vector<double> A(M*N), x(N);
|
|
for (size_t i = 0; i < M; ++i) {
|
|
for (size_t j = 0; j < N; ++j) {
|
|
A[i*N + j] = (i+j)%219 + 1.0;
|
|
}
|
|
}
|
|
for (size_t j = 0; j < N; ++j) {
|
|
x[j] = 1.0/A[17*N + j];
|
|
}
|
|
return make_tuple(A, x);
|
|
}
|
|
|
|
void benchmark_B(vector<double> const &A, vector<double> const &x, size_t NLOOPS, bool cblas) {
|
|
size_t N = x.size();
|
|
size_t M = A.size() / N;
|
|
vector<double> b(M);
|
|
double sum(0.0);
|
|
|
|
if (cblas == false) {
|
|
for (size_t i = 0; i < NLOOPS; ++i) {
|
|
b = matrix_vec(A,x);
|
|
sum += b[17];
|
|
}
|
|
} else if (cblas == true) {
|
|
for (size_t i = 0; i < NLOOPS; ++i) {
|
|
cblas_dgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0, A.data(), N, x.data(), 1, 0, b.data(), 1);
|
|
sum += b[17];
|
|
}
|
|
}
|
|
|
|
// Check correctness
|
|
if (static_cast<size_t>(sum) != N*NLOOPS) {printf(" !! W R O N G result !!\n");}
|
|
}
|
|
|
|
tuple<vector<double>, vector<double>> init_C(size_t M, size_t N, size_t L) {
|
|
vector<double> A(M*L), B(L*N);
|
|
for (size_t i = 0; i < M; ++i) {
|
|
for (size_t j = 0; j < L; ++j) {
|
|
A[i*L + j] = (i+j)%219 + 1.0;
|
|
}
|
|
}
|
|
// B chosen such that C[0,17]=L
|
|
// so B[i,17] = 1/A[0,i]
|
|
for (size_t i = 0; i < L; ++i) {
|
|
for (size_t j = 0; j < N; ++j) {
|
|
if (j==17) {
|
|
B[i*N + 17] = 1.0/A[i];
|
|
} else {
|
|
B[i*N + j] = (i+j)%219 + 1.0;
|
|
}
|
|
}
|
|
}
|
|
return make_tuple(A, B);
|
|
}
|
|
|
|
void benchmark_C(vector<double> const &A, vector<double> const &B, size_t L, size_t NLOOPS, bool cblas) {
|
|
size_t M = A.size() / L;
|
|
size_t N = B.size() / L;
|
|
vector<double> C(M*N);
|
|
double sum(0.0);
|
|
|
|
if (cblas == false) {
|
|
for (size_t i = 0; i < NLOOPS; ++i) {
|
|
C = matrix_matrix(A,B,M);
|
|
sum += C[17];
|
|
}
|
|
} else if (cblas == true) {
|
|
for (size_t i = 0; i < NLOOPS; ++i) {
|
|
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, L, 1.0, A.data(), L, B.data(), N, 0.0, C.data(), N);
|
|
sum += C[17];
|
|
}
|
|
}
|
|
|
|
// Check correctness
|
|
if (static_cast<size_t>(sum) != L*NLOOPS) {printf(" !! W R O N G result !!\n");}
|
|
}
|
|
|
|
tuple<vector<double>, vector<double>> init_D(size_t N, size_t p) {
|
|
// x_i = i/N for i=0,...,N-1
|
|
// a_j = 1 for j=0,...,p-1
|
|
vector<double> x(N), a(p);
|
|
for (size_t i = 0; i < N; ++i) {
|
|
x[i] = static_cast<double>(i) / N;
|
|
}
|
|
for (size_t j = 0; j < p; ++j) {
|
|
a[j] = 1.0;
|
|
}
|
|
return make_tuple(x, a);
|
|
}
|
|
|
|
void benchmark_D(vector<double> const &x, vector<double> const &a, size_t NLOOPS) {
|
|
size_t N = x.size();
|
|
vector<double> y(N);
|
|
double sum(0.0);
|
|
|
|
for (size_t i = 0; i < NLOOPS; ++i) {
|
|
y = poly(x,a);
|
|
sum += y[0];
|
|
}
|
|
|
|
// Check correctness
|
|
if (static_cast<size_t>(sum) != NLOOPS) {printf(" !! W R O N G result sum = %f !!\n", sum);}
|
|
} |