This commit is contained in:
dino.celebic 2025-11-11 15:48:42 +01:00
commit f0e10857a1
5 changed files with 29 additions and 32 deletions

View file

@ -1,14 +1,17 @@
// Compile with "make" / "make run" or
// g++ *.cpp -O3 -ffast-math -lopenblas -llapacke -o main
#include "task_3.h" #include "task_3.h"
#include "task_4+6.h" #include "task_4+6.h"
#include "task_5.h" #include "task_5.h"
#include "task_7.h" #include "task_7.h"
#include "timing.h" #include "timing.h"
#include <cblas.h> // cBLAS Library
#include <lapacke.h>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <cblas.h> // cBLAS Library
#include <lapacke.h>
void task_1() { void task_1() {
printf("\n\n-------------- Task 1 --------------\n\n"); printf("\n\n-------------- Task 1 --------------\n\n");
@ -23,12 +26,12 @@ void task_1() {
// Memory per array = 610.4 MiB (= 0.6 GiB). // Memory per array = 610.4 MiB (= 0.6 GiB).
// Total memory required = 1831.1 MiB (= 1.8 GiB). // Total memory required = 1831.1 MiB (= 1.8 GiB).
// Each kernel will be executed 20 times. // Each kernel will be executed 20 times.
// The *best* time for each kernel (excluding the first iteration) // The *best* time for each kernel (excluding the first iteration)
// will be used to compute the reported bandwidth. // will be used to compute the reported bandwidth.
// ------------------------------------------------------------- // -------------------------------------------------------------
// Your clock granularity/precision appears to be 1 microseconds. // Your clock granularity/precision appears to be 1 microseconds.
// Each test below will take on the order of 116886 microseconds. // Each test below will take on the order of 116886 microseconds.
// (= 116886 clock ticks) // (= 116886 clock ticks)
// Increase the size of the arrays if this shows that // Increase the size of the arrays if this shows that
// you are not getting at least 20 clock ticks per test. // you are not getting at least 20 clock ticks per test.
// ------------------------------------------------------------- // -------------------------------------------------------------
@ -46,25 +49,25 @@ void task_1() {
// ------------------------------------------------------------- // -------------------------------------------------------------
// ./flops.exe // ./flops.exe
// FLOPS C Program (Double Precision), V2.0 18 Dec 1992 // FLOPS C Program (Double Precision), V2.0 18 Dec 1992
// Module Error RunTime MFLOPS // Module Error RunTime MFLOPS
// (usec) // (usec)
// 1 4.0146e-13 0.0024 5827.9076 // 1 4.0146e-13 0.0024 5827.9076
// 2 -1.4166e-13 0.0007 10037.8942 // 2 -1.4166e-13 0.0007 10037.8942
// 3 4.7184e-14 0.0039 4371.9185 // 3 4.7184e-14 0.0039 4371.9185
// 4 -1.2557e-13 0.0034 4355.5711 // 4 -1.2557e-13 0.0034 4355.5711
// 5 -1.3800e-13 0.0066 4415.6439 // 5 -1.3800e-13 0.0066 4415.6439
// 6 3.2380e-13 0.0065 4441.6299 // 6 3.2380e-13 0.0065 4441.6299
// 7 -8.4583e-11 0.0053 2277.1707 // 7 -8.4583e-11 0.0053 2277.1707
// 8 3.4867e-13 0.0069 4367.6094 // 8 3.4867e-13 0.0069 4367.6094
// Iterations = 512000000 // Iterations = 512000000
// NullTime (usec) = 0.0000 // NullTime (usec) = 0.0000
// MFLOPS(1) = 7050.6178 // MFLOPS(1) = 7050.6178
// MFLOPS(2) = 3461.6233 // MFLOPS(2) = 3461.6233
// MFLOPS(3) = 4175.0442 // MFLOPS(3) = 4175.0442
// MFLOPS(4) = 4389.7311 // MFLOPS(4) = 4389.7311
} }
void task_2() { void task_2() {
@ -231,7 +234,7 @@ void task_7() {
// ################################# // #################################
// Benchmark // Benchmark
cout << fixed << setprecision(4); // 4 digits after decimal cout << fixed << setprecision(4);
size_t NLOOPS = 1000; size_t NLOOPS = 1000;
cout << "N = " << " | 1 | 2 | 4 | 8 | 16 | 32 " << endl; cout << "N = " << " | 1 | 2 | 4 | 8 | 16 | 32 " << endl;
cout << "---------|--------|--------|--------|--------|--------|-------" << endl; cout << "---------|--------|--------|--------|--------|--------|-------" << endl;
@ -248,9 +251,6 @@ void task_7() {
cout << endl; cout << endl;
} }
printf("\nFor fixed n, the solution time per rhs does not slow down consistently and scales very well.\nIts faster than expected."); printf("\nFor fixed n, the solution time per rhs does not slow down consistently and scales very well.\nIts faster than expected.");
} }
int main() { int main() {

View file

@ -1,7 +1,6 @@
#include "task_3.h" #include "task_3.h"
#include <vector> #include <vector>
#include <cassert> #include <cassert>
#include <iostream>
#include <cmath> #include <cmath>
using namespace std; using namespace std;

View file

@ -1,9 +1,9 @@
#include "task_3.h" #include "task_3.h"
#include "task_4+6.h" #include "task_4+6.h"
#include "timing.h" #include "timing.h"
#include <cblas.h> // cBLAS Library
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include <cblas.h> // cBLAS Library
using namespace std; using namespace std;
void print_performance(double sec, size_t memory, size_t flops, unsigned int size) { void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {

View file

@ -1,5 +1,4 @@
#pragma once #pragma once
#include <vector> #include <vector>
using namespace std; using namespace std;

View file

@ -1,9 +1,8 @@
#include "task_7.h" #include "task_7.h"
#include <iostream>
#include <vector>
#include <cmath>
#include <lapacke.h> #include <lapacke.h>
#include <vector>
#include <iostream>
#include <cmath>
using namespace std; using namespace std;
tuple<vector<double>, vector<double>> init_M(size_t N, size_t Nrhs) { tuple<vector<double>, vector<double>> init_M(size_t N, size_t Nrhs) {