task8
This commit is contained in:
parent
7a02dff345
commit
f0e10857a1
5 changed files with 29 additions and 32 deletions
|
|
@ -1,14 +1,17 @@
|
||||||
|
// Compile with "make" / "make run" or
|
||||||
|
// g++ *.cpp -O3 -ffast-math -lopenblas -llapacke -o main
|
||||||
|
|
||||||
|
|
||||||
#include "task_3.h"
|
#include "task_3.h"
|
||||||
#include "task_4+6.h"
|
#include "task_4+6.h"
|
||||||
#include "task_5.h"
|
#include "task_5.h"
|
||||||
#include "task_7.h"
|
#include "task_7.h"
|
||||||
#include "timing.h"
|
#include "timing.h"
|
||||||
|
#include <cblas.h> // cBLAS Library
|
||||||
|
#include <lapacke.h>
|
||||||
|
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cblas.h> // cBLAS Library
|
|
||||||
#include <lapacke.h>
|
|
||||||
|
|
||||||
void task_1() {
|
void task_1() {
|
||||||
printf("\n\n-------------- Task 1 --------------\n\n");
|
printf("\n\n-------------- Task 1 --------------\n\n");
|
||||||
|
|
@ -23,12 +26,12 @@ void task_1() {
|
||||||
// Memory per array = 610.4 MiB (= 0.6 GiB).
|
// Memory per array = 610.4 MiB (= 0.6 GiB).
|
||||||
// Total memory required = 1831.1 MiB (= 1.8 GiB).
|
// Total memory required = 1831.1 MiB (= 1.8 GiB).
|
||||||
// Each kernel will be executed 20 times.
|
// Each kernel will be executed 20 times.
|
||||||
// The *best* time for each kernel (excluding the first iteration)
|
// The *best* time for each kernel (excluding the first iteration)
|
||||||
// will be used to compute the reported bandwidth.
|
// will be used to compute the reported bandwidth.
|
||||||
// -------------------------------------------------------------
|
// -------------------------------------------------------------
|
||||||
// Your clock granularity/precision appears to be 1 microseconds.
|
// Your clock granularity/precision appears to be 1 microseconds.
|
||||||
// Each test below will take on the order of 116886 microseconds.
|
// Each test below will take on the order of 116886 microseconds.
|
||||||
// (= 116886 clock ticks)
|
// (= 116886 clock ticks)
|
||||||
// Increase the size of the arrays if this shows that
|
// Increase the size of the arrays if this shows that
|
||||||
// you are not getting at least 20 clock ticks per test.
|
// you are not getting at least 20 clock ticks per test.
|
||||||
// -------------------------------------------------------------
|
// -------------------------------------------------------------
|
||||||
|
|
@ -46,25 +49,25 @@ void task_1() {
|
||||||
// -------------------------------------------------------------
|
// -------------------------------------------------------------
|
||||||
// ./flops.exe
|
// ./flops.exe
|
||||||
|
|
||||||
// FLOPS C Program (Double Precision), V2.0 18 Dec 1992
|
// FLOPS C Program (Double Precision), V2.0 18 Dec 1992
|
||||||
|
|
||||||
// Module Error RunTime MFLOPS
|
// Module Error RunTime MFLOPS
|
||||||
// (usec)
|
// (usec)
|
||||||
// 1 4.0146e-13 0.0024 5827.9076
|
// 1 4.0146e-13 0.0024 5827.9076
|
||||||
// 2 -1.4166e-13 0.0007 10037.8942
|
// 2 -1.4166e-13 0.0007 10037.8942
|
||||||
// 3 4.7184e-14 0.0039 4371.9185
|
// 3 4.7184e-14 0.0039 4371.9185
|
||||||
// 4 -1.2557e-13 0.0034 4355.5711
|
// 4 -1.2557e-13 0.0034 4355.5711
|
||||||
// 5 -1.3800e-13 0.0066 4415.6439
|
// 5 -1.3800e-13 0.0066 4415.6439
|
||||||
// 6 3.2380e-13 0.0065 4441.6299
|
// 6 3.2380e-13 0.0065 4441.6299
|
||||||
// 7 -8.4583e-11 0.0053 2277.1707
|
// 7 -8.4583e-11 0.0053 2277.1707
|
||||||
// 8 3.4867e-13 0.0069 4367.6094
|
// 8 3.4867e-13 0.0069 4367.6094
|
||||||
|
|
||||||
// Iterations = 512000000
|
// Iterations = 512000000
|
||||||
// NullTime (usec) = 0.0000
|
// NullTime (usec) = 0.0000
|
||||||
// MFLOPS(1) = 7050.6178
|
// MFLOPS(1) = 7050.6178
|
||||||
// MFLOPS(2) = 3461.6233
|
// MFLOPS(2) = 3461.6233
|
||||||
// MFLOPS(3) = 4175.0442
|
// MFLOPS(3) = 4175.0442
|
||||||
// MFLOPS(4) = 4389.7311
|
// MFLOPS(4) = 4389.7311
|
||||||
}
|
}
|
||||||
|
|
||||||
void task_2() {
|
void task_2() {
|
||||||
|
|
@ -231,7 +234,7 @@ void task_7() {
|
||||||
// #################################
|
// #################################
|
||||||
// Benchmark
|
// Benchmark
|
||||||
|
|
||||||
cout << fixed << setprecision(4); // 4 digits after decimal
|
cout << fixed << setprecision(4);
|
||||||
size_t NLOOPS = 1000;
|
size_t NLOOPS = 1000;
|
||||||
cout << "N = " << " | 1 | 2 | 4 | 8 | 16 | 32 " << endl;
|
cout << "N = " << " | 1 | 2 | 4 | 8 | 16 | 32 " << endl;
|
||||||
cout << "---------|--------|--------|--------|--------|--------|-------" << endl;
|
cout << "---------|--------|--------|--------|--------|--------|-------" << endl;
|
||||||
|
|
@ -248,9 +251,6 @@ void task_7() {
|
||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
printf("\nFor fixed n, the solution time per rhs does not slow down consistently and scales very well.\nIts faster than expected.");
|
printf("\nFor fixed n, the solution time per rhs does not slow down consistently and scales very well.\nIts faster than expected.");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
#include "task_3.h"
|
#include "task_3.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
#include "task_3.h"
|
#include "task_3.h"
|
||||||
#include "task_4+6.h"
|
#include "task_4+6.h"
|
||||||
#include "timing.h"
|
#include "timing.h"
|
||||||
|
#include <cblas.h> // cBLAS Library
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cblas.h> // cBLAS Library
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {
|
void print_performance(double sec, size_t memory, size_t flops, unsigned int size) {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
#include "task_7.h"
|
#include "task_7.h"
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <vector>
|
|
||||||
#include <cmath>
|
|
||||||
#include <lapacke.h>
|
#include <lapacke.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <cmath>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
tuple<vector<double>, vector<double>> init_M(size_t N, size_t Nrhs) {
|
tuple<vector<double>, vector<double>> init_M(size_t N, size_t Nrhs) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue