sheet3
This commit is contained in:
parent
56614805cf
commit
2195a9db0a
51 changed files with 13038 additions and 0 deletions
286
sheet3/345/main.cpp
Normal file
286
sheet3/345/main.cpp
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
#include "mylib.h"
|
||||
#include <cassert>
|
||||
#include <chrono> // timing
|
||||
#include <cmath> // sqrt()
|
||||
#include <cstdlib> // atoi()
|
||||
#include <cstring> // strncmp()
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "benchmark.h"
|
||||
using namespace std;
|
||||
using namespace std::chrono; // timing
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
const unsigned int NA = 1400000;
|
||||
const unsigned int NLOOPSA = 2000;
|
||||
//const unsigned int NLOOPS = 10;
|
||||
|
||||
const unsigned int MC = 1000;
|
||||
int const NLOOPSC = 5;
|
||||
// ---------- Benchmark A ----------
|
||||
|
||||
{
|
||||
|
||||
|
||||
vector<double> xA(NA), yA(NA);
|
||||
for (unsigned int i = 0; i < NA; ++i)
|
||||
{
|
||||
double xi= (i % 219) + 1;
|
||||
xA[i] = xi;
|
||||
yA[i] = 1.0 / xi;
|
||||
}
|
||||
|
||||
auto tA1 = system_clock::now();
|
||||
double sA = 0.0, sumA = 0.0;
|
||||
for (unsigned int loop = 0; loop < NLOOPSA; ++loop)
|
||||
{
|
||||
sA = benchmark_A(xA, yA);
|
||||
sumA += sA;
|
||||
}
|
||||
auto tA2 = system_clock::now();
|
||||
|
||||
auto durA = duration_cast<microseconds>(tA2 - tA1);
|
||||
double tA = static_cast<double>(durA.count()) / 1e6 / NLOOPSA; //duration per loop seconds
|
||||
|
||||
cout << "\n===== Benchmark A =====\n";
|
||||
cout << "<xA,yA> = " << sA << endl;
|
||||
cout << "Timing in sec. : " << tA << endl;
|
||||
cout << "GFLOPS : " << 2.0 * NA / tA / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : "
|
||||
<< 2.0 * NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
|
||||
}
|
||||
|
||||
// ---------- Benchmark B----------
|
||||
|
||||
{
|
||||
const unsigned int MB = 1700;
|
||||
const unsigned int NB = MB;
|
||||
const unsigned int NLOOPSB = 200;//50;
|
||||
|
||||
vector<double> AB(MB * NB);
|
||||
vector<double> xB(NB);
|
||||
|
||||
for (unsigned int i = 0; i < MB; ++i)
|
||||
for (unsigned int j = 0; j < NB; ++j)
|
||||
AB[i * NB + j] = (i+j) %219 +1;
|
||||
|
||||
for (unsigned int j = 0; j < NB; ++j)
|
||||
{
|
||||
|
||||
xB[j] = 1.0 / AB[17*NB+j];
|
||||
}
|
||||
|
||||
vector<double> bB;
|
||||
auto tB1 = system_clock::now();
|
||||
double guardB = 0.0;
|
||||
for (unsigned int loop = 0; loop < NLOOPSB; ++loop)
|
||||
{
|
||||
bB = benchmark_B(AB, xB);
|
||||
guardB += bB[17];
|
||||
}
|
||||
auto tB2 = system_clock::now();
|
||||
|
||||
auto durB = duration_cast<microseconds>(tB2 - tB1);
|
||||
double tB = static_cast<double>(durB.count()) / 1e6 / NLOOPSB;
|
||||
|
||||
double flopsB = 2.0 * MB * NB;
|
||||
double bytesB = (MB * NB + NB + MB) * sizeof(double);
|
||||
|
||||
cout << "\n===== Benchmark B =====\n";
|
||||
cout << guardB << endl;
|
||||
cout << "bytes: " << bytesB << endl;
|
||||
cout << "Timing in sec. : " << tB << endl;
|
||||
cout << "GFLOPS : " << flopsB / tB / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : " << bytesB / tB / 1024 / 1024 / 1024 << endl;
|
||||
}
|
||||
|
||||
// ---------- Benchmark C ----------
|
||||
|
||||
{
|
||||
|
||||
const unsigned int LC = MC;
|
||||
const unsigned int NC = MC;
|
||||
|
||||
|
||||
vector<double> AC(MC * LC), BC(LC * NC);
|
||||
|
||||
for (unsigned int i = 0; i < MC; ++i)
|
||||
for (unsigned int j = 0; j < LC; ++j)
|
||||
AC[i * LC + j] = (i+j) %219 +1;
|
||||
|
||||
for (unsigned int i = 0; i < LC; ++i)
|
||||
for (unsigned int j = 0; j < NC; ++j)
|
||||
BC[i * NC + j] = (i+j) %219 +1;
|
||||
|
||||
vector<double> CC;
|
||||
auto tC1 = system_clock::now();
|
||||
double guardC = 0.0;
|
||||
for (unsigned int loop = 0; loop < NLOOPSC; ++loop)
|
||||
{
|
||||
CC = benchmark_C(AC, BC, MC);
|
||||
guardC += CC[0];
|
||||
}
|
||||
auto tC2 = system_clock::now();
|
||||
|
||||
auto durC = duration_cast<microseconds>(tC2 - tC1);
|
||||
double tC = static_cast<double>(durC.count()) / 1e6 / NLOOPSC;
|
||||
|
||||
double flopsC = 2.0 * MC * LC * NC;
|
||||
double bytesC = (MC * LC + LC * NC + MC * NC)* sizeof(double);
|
||||
|
||||
cout << "\n===== Benchmark C =====\n";
|
||||
cout << "bytes: " << bytesC << endl;
|
||||
cout << "Timing in sec. : " << tC << endl;
|
||||
cout << "GFLOPS : " << flopsC / tC / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : " << bytesC / tC / 1024 / 1024 / 1024 << endl;
|
||||
}
|
||||
|
||||
// ---------- Benchmark D----------
|
||||
|
||||
|
||||
{
|
||||
const unsigned int ND = 2000000;
|
||||
const unsigned int p = 14; // degree p-1 = 15
|
||||
const unsigned int NLOOPSD = 100;
|
||||
vector<double> coeff(p, 0.0);
|
||||
vector<double> xD(ND);
|
||||
|
||||
for (unsigned int k = 0; k < p; ++k)
|
||||
coeff[k] = k%219+1;
|
||||
|
||||
for (unsigned int i = 0; i < ND; ++i)
|
||||
xD[i] = i%219+1;
|
||||
|
||||
vector<double> yD;
|
||||
auto tD1 = system_clock::now();
|
||||
double guardD = 0.0;
|
||||
for (unsigned int loop = 0; loop < NLOOPSD; ++loop)
|
||||
{
|
||||
|
||||
yD = benchmark_D(coeff, xD);
|
||||
guardD += yD[0];
|
||||
}
|
||||
auto tD2 = system_clock::now();
|
||||
|
||||
auto durD = duration_cast<microseconds>(tD2 - tD1);
|
||||
double tD = static_cast<double>(durD.count()) / 1e6 / NLOOPSD;
|
||||
|
||||
|
||||
double flopsD = ND * 2 * p;
|
||||
double bytesD = (p + 2 * ND)*sizeof(double);
|
||||
|
||||
cout << "\n===== Benchmark D =====\n";
|
||||
cout << "bytes: " << bytesD << endl;
|
||||
cout << "Timing in sec. : " << tD << endl;
|
||||
cout << "GFLOPS : " << flopsD / tD / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : " << bytesD / tD / 1024 / 1024 / 1024 << endl;
|
||||
}
|
||||
|
||||
|
||||
//-------------TASK 5
|
||||
{
|
||||
|
||||
|
||||
vector<double> xA(NA);
|
||||
for (unsigned int i = 0; i < NA; ++i)
|
||||
{
|
||||
double xi= (i % 219) + 1;
|
||||
xA[i] = xi;
|
||||
}
|
||||
|
||||
auto tA1 = system_clock::now();
|
||||
double sA = 0.0, sumA = 0.0;
|
||||
for (unsigned int loop = 0; loop < NLOOPSA; ++loop)
|
||||
{
|
||||
sA = norm2(xA);
|
||||
sumA += sA;
|
||||
}
|
||||
auto tA2 = system_clock::now();
|
||||
|
||||
auto durA = duration_cast<microseconds>(tA2 - tA1);
|
||||
double tA = static_cast<double>(durA.count()) / 1e6 / NLOOPSA; //duration per loop seconds
|
||||
|
||||
cout << "\n===== Benchmark 5A =====\n";
|
||||
cout << "NORM = " << sA << endl;
|
||||
cout << "Timing in sec. : " << tA << endl;
|
||||
cout << "GFLOPS : " << 2.0 * NA / tA / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : "
|
||||
<< NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
|
||||
|
||||
vector<double> xA(NA), yA(NA);
|
||||
for (unsigned int i = 0; i < NA; ++i)
|
||||
{
|
||||
double xi= (i % 219) + 1;
|
||||
xA[i] = xi;
|
||||
yA[i] = 1.0 / xi;
|
||||
}
|
||||
|
||||
auto tA1 = system_clock::now();
|
||||
double sA = 0.0, sumA = 0.0;
|
||||
for (unsigned int loop = 0; loop < NLOOPSA; ++loop)
|
||||
{
|
||||
sA = scalar_kahan(xA, yA);
|
||||
sumA += sA;
|
||||
}
|
||||
auto tA2 = system_clock::now();
|
||||
|
||||
auto durA = duration_cast<microseconds>(tA2 - tA1);
|
||||
double tA = static_cast<double>(durA.count()) / 1e6 / NLOOPSA; //duration per loop seconds
|
||||
|
||||
cout << "\n===== Benchmark 5B =====\n";
|
||||
cout << "<xA,yA> = " << sA << endl;
|
||||
cout << "Timing in sec. : " << tA << endl;
|
||||
cout << "GFLOPS : " << 5.0 * NA / tA / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : "
|
||||
<< 2.0 * NA * sizeof(xA[0]) / tA / 1024 / 1024 / 1024 << endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
{
|
||||
|
||||
const unsigned int LC = MC;
|
||||
const unsigned int NC = MC;
|
||||
|
||||
vector<double> AC(MC * LC), BC(LC * NC);
|
||||
|
||||
for (unsigned int i = 0; i < MC; ++i)
|
||||
for (unsigned int j = 0; j < LC; ++j)
|
||||
AC[i * LC + j] = (i+j) %219 +1;
|
||||
|
||||
for (unsigned int i = 0; i < LC; ++i)
|
||||
for (unsigned int j = 0; j < NC; ++j)
|
||||
BC[i * NC + j] = (i+j) %219 +1;
|
||||
|
||||
vector<double> CC;
|
||||
auto tC1 = system_clock::now();
|
||||
double guardC = 0.0;
|
||||
for (unsigned int loop = 0; loop < NLOOPSC; ++loop)
|
||||
{
|
||||
CC = matrixMultColumnWise(AC, BC, MC);
|
||||
guardC += CC[0];
|
||||
}
|
||||
auto tC2 = system_clock::now();
|
||||
|
||||
auto durC = duration_cast<microseconds>(tC2 - tC1);
|
||||
double tC = static_cast<double>(durC.count()) / 1e6 / NLOOPSC;
|
||||
|
||||
double flopsC = 2.0 * MC * LC * NC;
|
||||
double bytesC = (MC * LC + LC * NC + MC * NC)* sizeof(double);
|
||||
|
||||
cout << "\n===== Benchmark 5C =====\n";
|
||||
cout << "bytes: " << bytesC << endl;
|
||||
cout << "Timing in sec. : " << tC << endl;
|
||||
cout << "GFLOPS : " << flopsC / tC / 1024 / 1024 / 1024 << endl;
|
||||
cout << "GiByte/s : " << bytesC / tC / 1024 / 1024 / 1024 << endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} // memory for x and y will be deallocated by their destructors
|
||||
Loading…
Add table
Add a link
Reference in a new issue