scf_celebic/ex3/code/task_4+6.cpp

#include "task_3.h"
#include "task_4+6.h"
#include "timing.h"
#include <vector>
#include <iostream>
#include <cblas.h>               // cBLAS Library
using namespace std;

void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {
    printf("Memory allocated  : %.3f GByte\n", 1.0 * memory / 1024 / 1024 / 1024 * size);
    printf("Duration per loop : %.3f sec\n", sec);
    printf("GFLOPS            : %.3f\n", 1.0 * flops / sec / 1024 / 1024 / 1024);
    printf("GiByte/s          : %.3f\n", 1.0 * memory / sec / 1024 / 1024 / 1024 * size);
}

tuple<vector<double>, vector<double>> init_A(size_t N) {
    vector<double> x(N), y(N);
    for (size_t i = 0; i < N; ++i) {
        x[i] = i%219 + 1.0;
        y[i] = 1.0 / x[i];
    }
    return make_tuple(x, y);
}

void benchmark_A(vector<double> const &x, vector<double> const &y, size_t NLOOPS, bool cblas) {
    size_t N = x.size();

    double s(0.0), sum(0.0);
    if (cblas == false) {
        for (size_t i = 0; i < NLOOPS; ++i) {
            s = scalar(x, y);
            sum += s;
        }
    } else if (cblas == true) {
        for (size_t i = 0; i < NLOOPS; ++i) {
            s = cblas_ddot(N, x.data(), 1, y.data(), 1);
            sum += s;
        }
    }

    // Check correctness
    if (static_cast<size_t>(sum) != N*NLOOPS) {printf("  !!   W R O N G  result   !!\n");}
}

tuple<vector<double>, vector<double>> init_B(size_t M, size_t N) {
    vector<double> A(M*N), x(N);
    for (size_t i = 0; i < M; ++i) {
        for (size_t j = 0; j < N; ++j) {
            A[i*N + j] = (i+j)%219 + 1.0;
        }
    }
    for (size_t j = 0; j < N; ++j) {
        x[j] = 1.0/A[17*N + j];
    }
    return make_tuple(A, x);
}

void benchmark_B(vector<double> const &A, vector<double> const &x, size_t NLOOPS, bool cblas) {
    size_t N = x.size();
    size_t M = A.size() / N;
    vector<double> b(M);
    double sum(0.0);

    if (cblas == false) {
        for (size_t i = 0; i < NLOOPS; ++i) {
            b = matrix_vec(A,x);
            sum += b[17];
        }
    } else if (cblas == true) {
        for (size_t i = 0; i < NLOOPS; ++i) {
            cblas_dgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0, A.data(), N, x.data(), 1, 0, b.data(), 1);
            sum += b[17];
        }
    }

    // Check correctness
    if (static_cast<size_t>(sum) != N*NLOOPS) {printf("  !!   W R O N G  result   !!\n");}
}

tuple<vector<double>, vector<double>> init_C(size_t M, size_t N, size_t L) {
    vector<double> A(M*L), B(L*N);
    for (size_t i = 0; i < M; ++i) {
        for (size_t j = 0; j < L; ++j) {
            A[i*L + j] = (i+j)%219 + 1.0;
        }
    }
    // B chosen such that C[0,17]=L
    // so B[i,17] = 1/A[0,i]
    for (size_t i = 0; i < L; ++i) {
        for (size_t j = 0; j < N; ++j) {
            if (j==17) {
                B[i*N + 17] = 1.0/A[i];
            } else {
                B[i*N + j] = (i+j)%219 + 1.0;
            }
        }
    }
    return make_tuple(A, B);
}

void benchmark_C(vector<double> const &A, vector<double> const &B, size_t L, size_t NLOOPS, bool cblas) {
    size_t M = A.size() / L;
    size_t N = B.size() / L;
    vector<double> C(M*N);
    double sum(0.0);

    if (cblas == false) {
        for (size_t i = 0; i < NLOOPS; ++i) {
            C = matrix_matrix(A,B,M);
            sum += C[17];
        }
    } else if (cblas == true) {
        for (size_t i = 0; i < NLOOPS; ++i) {
            cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, L, 1.0, A.data(), L, B.data(), N, 0.0, C.data(), N);
            sum += C[17];
        }
    }

    // Check correctness
    if (static_cast<size_t>(sum) != L*NLOOPS) {printf("  !!   W R O N G  result   !!\n");}
}

tuple<vector<double>, vector<double>> init_D(size_t N, size_t p) {
    // x_i = i/N for i=0,...,N-1
    // a_j = 1   for j=0,...,p-1
    vector<double> x(N), a(p);
    for (size_t i = 0; i < N; ++i) {
        x[i] = static_cast<double>(i) / N;
    }
    for (size_t j = 0; j < p; ++j) {
        a[j] = 1.0;
    }
    return make_tuple(x, a);
}

void benchmark_D(vector<double> const &x, vector<double> const &a, size_t NLOOPS) {
    size_t N = x.size();
    vector<double> y(N);
    double sum(0.0);

    for (size_t i = 0; i < NLOOPS; ++i) {
        y = poly(x,a);
        sum += y[0];
    }

    // Check correctness
    if (static_cast<size_t>(sum) != NLOOPS) {printf("  !!   W R O N G  result sum = %f  !!\n", sum);}
}