#include "task_3.h" #include "task_4+6.h" #include "timing.h" #include // cBLAS Library #include #include using namespace std; void print_performance(double sec, size_t memory, size_t flops, unsigned int size) { printf("Memory allocated : %.3f GByte\n", 1.0 * memory / 1024 / 1024 / 1024 * size); printf("Duration per loop : %.3f sec\n", sec); printf("GFLOPS : %.3f\n", 1.0 * flops / sec / 1024 / 1024 / 1024); printf("GiByte/s : %.3f\n", 1.0 * memory / sec / 1024 / 1024 / 1024 * size); } tuple, vector> init_A(size_t N) { vector x(N), y(N); for (size_t i = 0; i < N; ++i) { x[i] = i%219 + 1.0; y[i] = 1.0 / x[i]; } return make_tuple(x, y); } void benchmark_A(vector const &x, vector const &y, size_t NLOOPS, bool cblas) { size_t N = x.size(); double s(0.0), sum(0.0); if (cblas == false) { for (size_t i = 0; i < NLOOPS; ++i) { s = scalar(x, y); sum += s; } } else if (cblas == true) { for (size_t i = 0; i < NLOOPS; ++i) { s = cblas_ddot(N, x.data(), 1, y.data(), 1); sum += s; } } // Check correctness if (static_cast(sum) != N*NLOOPS) {printf(" !! W R O N G result !!\n");} } tuple, vector> init_B(size_t M, size_t N) { vector A(M*N), x(N); for (size_t i = 0; i < M; ++i) { for (size_t j = 0; j < N; ++j) { A[i*N + j] = (i+j)%219 + 1.0; } } for (size_t j = 0; j < N; ++j) { x[j] = 1.0/A[17*N + j]; } return make_tuple(A, x); } void benchmark_B(vector const &A, vector const &x, size_t NLOOPS, bool cblas) { size_t N = x.size(); size_t M = A.size() / N; vector b(M); double sum(0.0); if (cblas == false) { for (size_t i = 0; i < NLOOPS; ++i) { b = matrix_vec(A,x); sum += b[17]; } } else if (cblas == true) { for (size_t i = 0; i < NLOOPS; ++i) { cblas_dgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0, A.data(), N, x.data(), 1, 0, b.data(), 1); sum += b[17]; } } // Check correctness if (static_cast(sum) != N*NLOOPS) {printf(" !! W R O N G result !!\n");} } tuple, vector> init_C(size_t M, size_t N, size_t L) { vector A(M*L), B(L*N); for (size_t i = 0; i < M; ++i) { for (size_t j = 0; j < L; ++j) { A[i*L + j] = (i+j)%219 + 1.0; } } // B chosen such that C[0,17]=L // so B[i,17] = 1/A[0,i] for (size_t i = 0; i < L; ++i) { for (size_t j = 0; j < N; ++j) { if (j==17) { B[i*N + 17] = 1.0/A[i]; } else { B[i*N + j] = (i+j)%219 + 1.0; } } } return make_tuple(A, B); } void benchmark_C(vector const &A, vector const &B, size_t L, size_t NLOOPS, bool cblas) { size_t M = A.size() / L; size_t N = B.size() / L; vector C(M*N); double sum(0.0); if (cblas == false) { for (size_t i = 0; i < NLOOPS; ++i) { C = matrix_matrix(A,B,M); sum += C[17]; } } else if (cblas == true) { for (size_t i = 0; i < NLOOPS; ++i) { cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, L, 1.0, A.data(), L, B.data(), N, 0.0, C.data(), N); sum += C[17]; } } // Check correctness if (static_cast(sum) != L*NLOOPS) {printf(" !! W R O N G result !!\n");} } tuple, vector> init_D(size_t N, size_t p) { // x_i = i/N for i=0,...,N-1 // a_j = 1 for j=0,...,p-1 vector x(N), a(p); for (size_t i = 0; i < N; ++i) { x[i] = static_cast(i) / N; } for (size_t j = 0; j < p; ++j) { a[j] = 1.0; } return make_tuple(x, a); } void benchmark_D(vector const &x, vector const &a, size_t NLOOPS) { size_t N = x.size(); vector y(N); double sum(0.0); for (size_t i = 0; i < NLOOPS; ++i) { y = poly(x,a); sum += y[0]; } // Check correctness if (static_cast(sum) != NLOOPS) {printf(" !! W R O N G result sum = %f !!\n", sum);} }