#pragma once #include using namespace std; void print_performance(double sec, size_t memory, size_t flops, unsigned int size); tuple, vector> init_A(size_t N); tuple, vector> init_B(size_t M, size_t N); tuple, vector> init_C(size_t M, size_t N, size_t L); tuple, vector> init_D(size_t N, size_t p); void benchmark_A(vector const &x, vector const &y, size_t NLOOPS, bool cblas); void benchmark_B(vector const &A, vector const &x, size_t NLOOPS, bool cblas); void benchmark_C(vector const &A, vector const &B, size_t L, size_t NLOOPS, bool cblas); void benchmark_D(vector const &x, vector const &a, size_t NLOOPS);