Solutions

This commit is contained in:
Markus Schmidt 2025-10-21 19:36:38 +02:00
commit d3aa42a3e0
64 changed files with 2726 additions and 0 deletions

5
sheet1/A/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,5 @@
{
"files.associations": {
"vector": "cpp"
}
}

30
sheet1/A/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp means.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

BIN
sheet1/A/main.GCC_ Executable file

Binary file not shown.

42
sheet1/A/main.cpp Normal file
View file

@ -0,0 +1,42 @@
#include <iostream>
#include <cmath>
#include <vector>
#include "means.h"
using namespace std;
int main() {
int a = 1, b = 4, c = 16;
double ar, ge, ha;
// c)
means(a, b, c, ar, ge, ha);
cout << "Arithmetic mean: " << ar
<< ", geometric mean: " << ge
<< ", harmonic mean: " << ha << endl;
// d)
a = 2; b = 3; c = 5;
means(a, b, c, ar, ge, ha);
cout << "Arithmetic mean: " << ar
<< ", geometric mean: " << ge
<< ", harmonic mean: " << ha << endl;
// e)
a = 1000; b = 4000; c = 16000;
means(a, b, c, ar, ge, ha);
cout << "Arithmetic mean: " << ar
<< ", geometric mean: " << ge
<< ", harmonic mean: " << ha << endl;
// f)
vector<int> input = {1, 2, 3, 4, 5, 6};
means_vector(input, ar, ge, ha);
cout << "Arithmetic mean: " << ar
<< ", geometric mean: " << ge
<< ", harmonic mean: " << ha << endl;
return 0;
}

BIN
sheet1/A/main.o Normal file

Binary file not shown.

36
sheet1/A/means.cpp Normal file
View file

@ -0,0 +1,36 @@
#include <iostream>
#include <cmath>
#include <vector>
using namespace std;
void means(int a, int b, int c, double &ar, double &ge, double &ha) {
ar = (a+b+c) / 3.0;
ge = pow(a,1.0/3.0) * pow(b,1.0/3.0) * pow(c,1.0/3.0); //do it instead of pow(a*b*c,1.0/3.0) to prevent integer overflow
ha = 3.0 / (1.0/a +1.0/b +1.0/c);
}
void means_vector(const vector<int> &input, double &ar, double &ge, double &ha) {
int size = input.size();
if (size == 0) {
cout << "Empty input" << endl;
return;
}
ar = 0;
ge = 1;
ha = 0;
for (int i = 0; i < size; i++) {
ar += input.at(i);
ge *= pow(input.at(i), 1.0 / size);
ha += 1.0 / input.at(i);
}
ar /= size;
ha = size / ha;
}

9
sheet1/A/means.h Normal file
View file

@ -0,0 +1,9 @@
#ifndef MEANS_H_INCLUDED
#define MEANS_H_INCLUDED
#include <vector>
void means(int a, int b, int c, double &ar, double &ge, double &ha);
void means_vector(const std::vector<int> &input, double &ar, double &ge, double &ha);
#endif // MEANS_H_INCLUDED

BIN
sheet1/A/means.o Normal file

Binary file not shown.

7
sheet1/B/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,7 @@
{
"files.associations": {
"iostream": "cpp",
"ostream": "cpp",
"random": "cpp"
}
}

39
sheet1/B/Makefile Normal file
View file

@ -0,0 +1,39 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
# use CLANG compilers
# COMPILER=CLANG_
SOURCES = main.cpp file_io.cpp means.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk
task:
@pdflatex task
@pdflatex task

500
sheet1/B/data_1.txt Normal file
View file

@ -0,0 +1,500 @@
141
261
87
430
258
298
425
120
496
707
244
786
75
394
4
221
2
190
143
269
175
139
599
902
940
222
483
377
524
265
69
437
174
27
955
431
962
763
8
681
706
646
553
219
773
229
371
891
857
403
319
609
911
910
592
333
854
443
905
34
533
717
180
337
188
322
404
549
49
553
275
242
244
155
957
936
819
729
176
361
189
2
317
700
626
544
440
288
502
762
763
577
748
646
124
505
348
93
148
199
673
432
695
257
10
533
280
947
907
393
25
672
838
972
57
451
583
687
720
651
727
374
582
117
58
980
285
595
963
186
194
342
933
391
274
152
398
375
132
436
92
615
11
574
790
236
449
570
62
497
643
222
838
972
847
506
279
747
237
958
621
601
173
91
256
859
912
700
726
230
577
811
404
989
90
321
512
61
726
557
530
830
859
790
318
453
753
110
110
270
525
973
711
312
292
851
912
640
256
89
839
585
949
62
585
286
828
191
443
394
827
677
208
319
134
672
571
170
148
477
909
553
33
54
806
452
383
790
365
533
712
872
329
651
975
76
588
414
310
264
759
996
187
782
196
993
803
425
729
499
809
357
74
591
911
194
433
750
40
947
764
559
184
498
518
995
855
963
679
404
935
480
232
397
706
559
757
996
963
536
964
116
52
305
581
531
902
541
432
543
713
17
801
143
479
257
370
662
170
279
199
196
327
881
472
404
180
969
408
845
616
377
878
785
465
814
899
430
335
597
902
703
378
735
955
543
541
312
72
182
93
464
10
916
643
2
31
209
455
128
9
728
355
781
437
437
50
50
92
595
242
842
858
964
489
221
227
537
763
348
462
640
918
162
716
578
434
885
394
179
634
625
328
803
1000
981
128
233
24
608
111
408
885
549
370
209
441
957
125
471
857
44
692
979
284
134
686
910
611
900
194
755
347
419
156
820
625
739
806
68
951
498
756
743
832
157
458
619
933
836
896
583
583
855
35
886
408
37
747
155
144
606
255
325
402
407
387
610
167
189
95
324
770
235
741
693
825
828
294
310
524
326
832
811
557
263
681
234
457
385
539
992
756
981
235
529
52
757
602
858
989
930
410
1
541
208
220
326
96
748
749
544
339
833
553
958
893
357
547
347
623
797
746
126
823
26
415
732
782
368

60
sheet1/B/file_io.cbp Normal file
View file

@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<CodeBlocks_project_file>
<FileVersion major="1" minor="6" />
<Project>
<Option title="file_io" />
<Option pch_mode="2" />
<Option compiler="gcc" />
<Build>
<Target title="Debug">
<Option output="bin/Debug/file_io" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Debug/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-g" />
</Compiler>
</Target>
<Target title="Release">
<Option output="bin/Release/file_io" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Release/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-O2" />
</Compiler>
<Linker>
<Add option="-s" />
</Linker>
</Target>
</Build>
<Compiler>
<Add option="-Wshadow" />
<Add option="-Winit-self" />
<Add option="-Wunreachable-code" />
<Add option="-pedantic" />
<Add option="-std=c++11" />
<Add option="-Wextra" />
<Add option="-Wall" />
<Add option="-fexceptions" />
</Compiler>
<Unit filename="file_io.cpp" />
<Unit filename="file_io.h" />
<Unit filename="main.cpp" />
<Extensions>
<envvars />
<code_completion />
<lib_finder disable_auto="1" />
<debugger />
<DoxyBlocks>
<comment_style block="0" line="0" />
<doxyfile_project />
<doxyfile_build extract_all="1" />
<doxyfile_warnings />
<doxyfile_output />
<doxyfile_dot />
<general use_at_in_tags="1" />
</DoxyBlocks>
</Extensions>
</Project>
</CodeBlocks_project_file>

65
sheet1/B/file_io.cpp Normal file
View file

@ -0,0 +1,65 @@
#include "file_io.h"
#include <cassert>
#include <fstream>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>
using namespace std;
// [Str10, p.364]
void fill_vector(istream& istr, vector<short>& v)
{
double d=0;
while ( istr >> d) v.push_back(d); // Einlesen
if (!istr.eof())
{ // Fehlerbehandlung
cout << " Error handling \n";
if ( istr.bad() ) throw runtime_error("Schwerer Fehler in istr");
if ( istr.fail() ) // Versuch des Aufraeumens
{
cout << " Failed in reading all data.\n";
istr.clear();
}
}
v.shrink_to_fit(); // C++11
return;
}
void read_vector_from_file(const string& file_name, vector<short>& v)
{
ifstream fin(file_name); // Oeffne das File im ASCII-Modus
if( fin.is_open() ) // File gefunden:
{
v.clear(); // Vektor leeren
fill_vector(fin, v);
}
else // File nicht gefunden:
{
cout << "\nFile " << file_name << " has not been found.\n\n" ;
assert( fin.is_open() && "File not found." ); // exeption handling for the poor programmer
}
return;
}
void write_vector_to_file(const string& file_name, const vector<double>& v)
{
ofstream fout(file_name); // Oeffne das File im ASCII-Modus
if( fout.is_open() )
{
for (unsigned int k=0; k<v.size(); ++k)
{
fout << v.at(k) << endl;
}
}
else
{
cout << "\nFile " << file_name << " has not been opened.\n\n" ;
assert( fout.is_open() && "File not opened." ); // exeption handling for the poor programmer
}
return;
}

40
sheet1/B/file_io.h Normal file
View file

@ -0,0 +1,40 @@
#ifndef FILE_IO_H_INCLUDED
#define FILE_IO_H_INCLUDED
#include <string>
#include <vector>
//using namespace std;
/**
This function opens the ASCII-file named @p file_name and reads the
double data into the C++ vector @p v.
If the file @p file_name does not exist then the code stops with an appropriate message.
@param[in] file_name name of the ASCII-file
@param[out] v C++ vector with double values
*/
void read_vector_from_file(const std::string& file_name, std::vector<short>& v);
/**
This function opens the ASCII-file named @p file_name and rewrites its with the
double data from the C++ vector @p v.
If there are problems in opening/generating file @p file_name
then the code stops with an appropriate message.
@param[in] file_name name of the ASCII-file
@param[in] v C++ vector with double values
*/
void write_vector_to_file(const std::string& file_name, const std::vector<double>& v);
/**
Fills the double-vector @p v with data from an input stream @p istr until this input stream
ends regularily. The vector is cleared and its memory is automatically allocated.
@param[in] istr input stream
@param[out] v C++ vector with double values
@warning An exception is thrown in case of wrong data format or corrupted data.
*/
void fill_vector(std::istream& istr, std::vector<double>& v);
#endif // FILE_IO_H_INCLUDED

BIN
sheet1/B/file_io.o Normal file

Binary file not shown.

BIN
sheet1/B/main.GCC_ Executable file

Binary file not shown.

48
sheet1/B/main.cpp Normal file
View file

@ -0,0 +1,48 @@
#include "file_io.h"
#include "means.h"
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cmath>
using namespace std;
int main()
{
cout << "File einlesen." << endl;
const string name("data_1.txt"); // name of input file
const string name2("out_1.txt"); // name of output file
vector<short> a; //-2^15 to 2^15-1 fits the values from the file
double min, max, ar, ge, ha, std;
read_vector_from_file(name, a);
const unsigned size = a.size();
min = *min_element(a.begin(), a.end());
max = *max_element(a.begin(), a.end());
means_vector(a, ar, ge, ha);
std = 0;
for(unsigned int i = 0; i < size; i++)
{
std += pow(a.at(i)-ar,2);
}
std = sqrt(std/size);
cout << "min: " << min << ", max: " << max << endl;
cout << "Arithmetic mean: " << ar
<< ", geometric mean: " << ge
<< ", harmonic mean: " << ha << endl;
cout << "std: " << std << endl;
vector<double> results = {min, max, ar, ge, ha, std};
write_vector_to_file(name2, results);
return 0;
}

BIN
sheet1/B/main.o Normal file

Binary file not shown.

36
sheet1/B/means.cpp Normal file
View file

@ -0,0 +1,36 @@
#include <iostream>
#include <cmath>
#include <vector>
using namespace std;
void means(int a, int b, int c, double &ar, double &ge, double &ha) {
ar = (a+b+c) / 3.0;
ge = pow(a,1.0/3.0) * pow(b,1.0/3.0) * pow(c,1.0/3.0); //do it instead of pow(a*b*c,1.0/3.0) to prevent integer overflow
ha = 3.0 / (1.0/a +1.0/b +1.0/c);
}
void means_vector(const vector<short> &input, double &ar, double &ge, double &ha) {
int size = input.size();
if (size == 0) {
cout << "Empty input" << endl;
return;
}
ar = 0;
ge = 1;
ha = 0;
for (int i = 0; i < size; i++) {
ar += input.at(i);
ge *= pow(input.at(i), 1.0 / size);
ha += 1.0 / input.at(i);
}
ar /= size;
ha = size / ha;
}

9
sheet1/B/means.h Normal file
View file

@ -0,0 +1,9 @@
#ifndef MEANS_H_INCLUDED
#define MEANS_H_INCLUDED
#include <vector>
void means(int a, int b, int c, double &ar, double &ge, double &ha);
void means_vector(const std::vector<short> &input, double &ar, double &ge, double &ha);
#endif // MEANS_H_INCLUDED

BIN
sheet1/B/means.o Normal file

Binary file not shown.

6
sheet1/B/out_1.txt Normal file
View file

@ -0,0 +1,6 @@
1
1000
498.184
364.412
95.6857
287.905

6
sheet1/C/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,6 @@
{
"files.associations": {
"iostream": "cpp",
"ostream": "cpp"
}
}

30
sheet1/C/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

BIN
sheet1/C/main.GCC_ Executable file

Binary file not shown.

70
sheet1/C/main.cpp Normal file
View file

@ -0,0 +1,70 @@
#include <iostream>
#include "timing.h"
using namespace std;
unsigned long multiplesOf3Or5(unsigned int n)
{
unsigned long sum = 0;
for(unsigned int i=1; i<=n; i++)
{
if(i % 3 == 0 || i % 5 == 0)
{
sum += i;
}
}
return sum;
}
unsigned long gauss_sum(unsigned long n)
{
return n*(n+1)/2;
}
unsigned long multiplesOf3Or5_noLoop(unsigned int n)
{
/*for n there are floor(n/3) multiples of 3 and floor(n/5) multiples of 5. Calculate them using Gauss summation and subtract the
multiples of 15 (counted twice)*/
unsigned long multiples3 = gauss_sum(n/3)*3;
unsigned long multiples5 = gauss_sum(n/5)*5;
unsigned long multiples15 = gauss_sum(n/15)*15;
return multiples3 + multiples5 - multiples15;
}
int main() {
unsigned long mA,mB,mC,mD,mE,mF;
tic();
for(unsigned int i = 0; i< 10000; i++)
{
mA = multiplesOf3Or5(15);
mB = multiplesOf3Or5(1001);
mC = multiplesOf3Or5(1432987);
}
double timeA = toc();
tic();
for(unsigned int i = 0; i< 10000; i++)
{
mD = multiplesOf3Or5_noLoop(15);
mE = multiplesOf3Or5_noLoop(1001);
mF = multiplesOf3Or5_noLoop(1432987);
}
double timeB = toc();
cout << "n = 15, result = " << mA << endl;
cout << "n = 1001, result = " << mB << endl;
cout << "n = 1432987, result = " << mC << endl;
cout << "time: " << timeA << endl;
cout << "--------------------------------------------------------------" <<endl;
cout << "n = 15, result = " << mD << endl;
cout << "n = 1001, result = " << mE << endl;
cout << "n = 1432987, result = " << mF << endl;
cout << "time: " << timeB << endl;
return 0;
}

BIN
sheet1/C/main.o Normal file

Binary file not shown.

51
sheet1/C/timing.h Normal file
View file

@ -0,0 +1,51 @@
//
// Gundolf Haase, Oct 18 2024
//
#pragma once
#include <chrono> // timing
#include <stack>
//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
using Clock = std::chrono::high_resolution_clock;
using TPoint= std::chrono::time_point<Clock>;
// [Galowicz, C++17 STL Cookbook, p. 29]
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
/** Starts stopwatch timer.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
*
* The timining can be nested and the recent time point is stored on top of the stack.
*
* @return recent time point
* @see toc
*/
auto tic()
{
MyStopWatch.push(Clock::now());
return MyStopWatch.top();
}
/** Returns the elapsed time from stopwatch.
*
* The time point from top of the stack is used
* if time point @p t_b is not passed as input parameter.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
* The last option is to be used in the case of
* non-nested but overlapping time measurements.
*
* @param[in] t_b start time of some stop watch
* @return elapsed time in seconds.
*
*/
double toc(TPoint const &t_b = MyStopWatch.top())
{
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
using Unit = std::chrono::seconds;
using FpSeconds = std::chrono::duration<double, Unit::period>;
auto t_e = Clock::now();
MyStopWatch.pop();
return FpSeconds(t_e-t_b).count();
}

View file

123
sheet1/CLANG_default.mk Normal file
View file

@ -0,0 +1,123 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=CLANG_
#CLANGPATH=//usr/lib/llvm-10/bin/
CC = ${CLANGPATH}clang
CXX = ${CLANGPATH}clang++
#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0
#F77 = gfortran
LINKER = ${CXX}
#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages
WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1
WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic
CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS}
# don't use -Ofast
# -ftrapv
LINKFLAGS += -O3
# different libraries in Ubuntu or manajaró
ifndef UBUNTU
UBUNTU=1
endif
# BLAS, LAPACK
LINKFLAGS += -llapack -lblas
# -lopenblas
ifeq ($(UBUNTU),1)
# ubuntu
else
# on archlinux
LINKFLAGS += -lcblas
endif
# interprocedural optimization
CXXFLAGS += -flto
LINKFLAGS += -flto
# very good check
# http://clang.llvm.org/extra/clang-tidy/
# good check, see: http://llvm.org/docs/CodingStandards.html#include-style
SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init
SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration
#READABILITY=,readability*${SWITCH_OFF}
#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp"
#TIDYFLAGS += -checks='modernize*
# ???
#TIDYFLAGS = -checks='cert*' -header-filter=.*
# MPI checks ??
#TIDYFLAGS = -checks='mpi*'
# ??
#TIDYFLAGS = -checks='performance*' -header-filter=.*
#TIDYFLAGS = -checks='portability-*' -header-filter=.*
#TIDYFLAGS = -checks='readability-*' -header-filter=.*
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
codecheck: tidy_check
tidy_check:
clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES}
# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html
run: clean ${PROGRAM}
# time ./${PROGRAM} ${PARAMS}
./${PROGRAM} ${PARAMS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^ ${PARAMS}
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
codecheck: tidy_check

58
sheet1/E/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,58 @@
{
"files.associations": {
"array": "cpp",
"atomic": "cpp",
"bit": "cpp",
"cctype": "cpp",
"charconv": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"compare": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"list": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"optional": "cpp",
"random": "cpp",
"ratio": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"format": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"new": "cpp",
"numbers": "cpp",
"ostream": "cpp",
"span": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"typeinfo": "cpp",
"variant": "cpp"
}
}

30
sheet1/E/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

BIN
sheet1/E/main.GCC_ Executable file

Binary file not shown.

74
sheet1/E/main.cpp Normal file
View file

@ -0,0 +1,74 @@
#include <iostream>
#include <vector>
#include <random>
#include <chrono>
#include <algorithm>
#include "timing.h"
#include <list>
using namespace std;
std::default_random_engine generator;
unsigned int randomInt(unsigned int n)
{
std::uniform_int_distribution<int> distribution(1,n);
return distribution(generator);
}
void insertVector(vector<unsigned int>& vec)
{
unsigned int n = vec.size();
unsigned int random;
for(unsigned int i = 0; i<n; i++)
{
random = randomInt(n);
vector<unsigned int>::iterator it = lower_bound(vec.begin(), vec.end(), random);
vec.insert(it, random);
}
}
void insertList(list<unsigned int>& l)
{
unsigned int n = l.size();
unsigned int random;
for(unsigned int i = 0; i<n; i++)
{
random = randomInt(n);
list<unsigned int>::iterator it = lower_bound(l.begin(), l.end(), random);
l.insert(it, random);
}
}
int main()
{
unsigned int n = 10000;
vector<unsigned int> vec = {};
list<unsigned int> l = {};
for(unsigned int i=1; i<=n; i++)
{
vec.push_back(i);
l.push_back(i);
}
tic();
insertVector(vec);
double vecTime = toc();
tic();
insertList(l);
double listTime = toc();
cout << is_sorted(vec.begin(), vec.end())<< endl;
cout << is_sorted(l.begin(), l.end())<< endl;
cout << "vector time: " << vecTime << endl;
cout << "list time: " << listTime << endl;
return 0;
}

BIN
sheet1/E/main.o Normal file

Binary file not shown.

51
sheet1/E/timing.h Normal file
View file

@ -0,0 +1,51 @@
//
// Gundolf Haase, Oct 18 2024
//
#pragma once
#include <chrono> // timing
#include <stack>
//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
using Clock = std::chrono::high_resolution_clock;
using TPoint= std::chrono::time_point<Clock>;
// [Galowicz, C++17 STL Cookbook, p. 29]
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
/** Starts stopwatch timer.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
*
* The timining can be nested and the recent time point is stored on top of the stack.
*
* @return recent time point
* @see toc
*/
auto tic()
{
MyStopWatch.push(Clock::now());
return MyStopWatch.top();
}
/** Returns the elapsed time from stopwatch.
*
* The time point from top of the stack is used
* if time point @p t_b is not passed as input parameter.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
* The last option is to be used in the case of
* non-nested but overlapping time measurements.
*
* @param[in] t_b start time of some stop watch
* @return elapsed time in seconds.
*
*/
double toc(TPoint const &t_b = MyStopWatch.top())
{
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
using Unit = std::chrono::seconds;
using FpSeconds = std::chrono::duration<double, Unit::period>;
auto t_e = Clock::now();
MyStopWatch.pop();
return FpSeconds(t_e-t_b).count();
}

View file

5
sheet1/F/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,5 @@
{
"files.associations": {
"ostream": "cpp"
}
}

30
sheet1/F/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

BIN
sheet1/F/main.GCC_ Executable file

Binary file not shown.

130
sheet1/F/main.cpp Normal file
View file

@ -0,0 +1,130 @@
#include <iostream>
#include "mayer_primes.h"
#include <vector>
#include <algorithm>
#include "timing.h"
using namespace std;
unsigned int single_goldbach(unsigned int k)
{
unsigned int decomp = 0;
unsigned int p,q;
vector<unsigned int> primes = get_primes(k);
for(unsigned i=0; i< primes.size(); i++)
{
p = primes.at(i);
if(p > k/2)
{
break;
}
q = k-p;
vector<unsigned int>::iterator it = lower_bound(primes.begin(), primes.end(), q);
if (it != primes.end() && *it == q)
{
decomp++;
}
}
return decomp;
}
//one can call single_goldbach but this way it is faster cause you dont have to regenerate primes and search in list
vector<unsigned int> count_goldbach(unsigned int n)
{
vector<unsigned int> counts(n+1,0);
unsigned int p,q;
vector<unsigned int> primes = get_primes(n);
for(unsigned int j=0; j<primes.size(); j++)
{
for(unsigned i=j; i< primes.size(); i++) //start a j elsewise double counting of pairs
{
p = primes.at(j);
q = primes.at(i);
if(p+q > n)
{
continue;
}
counts[p+q] += 1;
}
}
return counts;
}
vector<vector<vector<unsigned int>>> count_goldbach_all(unsigned int n)
{
vector<vector<vector<unsigned int>>> counts(n+1,{{0,0}});
unsigned int p,q;
vector<unsigned int> primes = get_primes(n);
for(unsigned int j=0; j<primes.size(); j++)
{
for(unsigned i=j; i< primes.size(); i++)
{
p = primes.at(j);
q = primes.at(i);
if(p+q > n)
{
continue;
}
if (counts[p+q].at(0).at(0) == 0) //first pair found
{
counts[p+q]={{p,q}};
}
else{
counts[p+q].push_back({p,q});
}
}
}
return counts;
}
int main()
{
//2
cout << single_goldbach(694) << endl;
//3
vector<unsigned int> counts = count_goldbach(100000);
cout << (max_element(counts.begin(), counts.end()) - counts.begin()) << endl;
//4
vector<unsigned int> nvalues = {10000, 100000, 400000, 1000000, 2000000, 10000000};
double time;
unsigned int n;
for(unsigned int i = 0; i< nvalues.size(); i++)
{
n = nvalues.at(i);
tic();
count_goldbach(n);
time = toc();
cout << "Time for n=" << n << ": " << time << endl;
}
/*Time for n=10000: 0.0006853
Time for n=100000: 0.0371858
Time for n=400000: 0.505129
Time for n=1000000: 2.85873
Time for n=2000000: 15.0026
Time for n=10000000: 549.658*/
//*)
unsigned int n2 = 694;
vector<vector<vector<unsigned int>>> counts2 = count_goldbach_all(n2);
for(unsigned int i=4; i<n2; i+=2)
{
cout << "Decomp for " << i << ":"<<endl;
for(unsigned int j=0; j<counts2.at(i).size(); j++)
{
//cout << counts2.at(i).at(j).at(0) << "+" << counts2.at(i).at(j).at(1) << "=" << i << endl;
}
}
return 0;
}

BIN
sheet1/F/main.o Normal file

Binary file not shown.

73
sheet1/F/mayer_primes.h Normal file
View file

@ -0,0 +1,73 @@
#pragma once
#include <cstring> //memset
#include <vector>
//using namespace std;
/** \brief Determines all prime numbers in interval [2, @p max].
*
* The sieve of Eratosthenes is used.
*
* The implementation originates from <a href="http://code.activestate.com/recipes/576559-fast-prime-generator/">Florian Mayer</a>.
*
* \param[in] max end of interval for the prime number search.
* \return vector of prime numbers @f$2,3,5, ..., p<=max @f$.
*
* \copyright
* Copyright (c) 2008 Florian Mayer (adapted by Gundolf Haase 2018)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
template <class T>
std::vector<T> get_primes(T max)
{
std::vector<T> primes;
char *sieve;
sieve = new char[max / 8 + 1];
// Fill sieve with 1
memset(sieve, 0xFF, (max / 8 + 1) * sizeof(char));
for (T x = 2; x <= max; x++)
{
if (sieve[x / 8] & (0x01 << (x % 8))) {
primes.push_back(x);
// Is prime. Mark multiplicates.
for (T j = 2 * x; j <= max; j += x)
{
sieve[j / 8] &= ~(0x01 << (j % 8));
}
}
}
delete[] sieve;
return primes;
}
//---------------------------------------------------------------
//int main() // by Florian Mayer
//{g++ -O3 -std=c++14 -fopenmp main.cpp && ./a.out
// vector<unsigned long> primes;
// primes = get_primes(10000000);
// // return 0;
// // Print out result.
// vector<unsigned long>::iterator it;
// for(it=primes.begin(); it < primes.end(); it++)
// cout << *it << " ";
//
// cout << endl;
// return 0;
//}

View file

@ -0,0 +1,3 @@
[ZoneTransfer]
ZoneId=3
HostUrl=https://imsc.uni-graz.at/haasegu/Lectures/Math2CPP/Examples/goldbach/mayer_primes.h

51
sheet1/F/timing.h Normal file
View file

@ -0,0 +1,51 @@
//
// Gundolf Haase, Oct 18 2024
//
#pragma once
#include <chrono> // timing
#include <stack>
//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen
using Clock = std::chrono::high_resolution_clock;
using TPoint= std::chrono::time_point<Clock>;
// [Galowicz, C++17 STL Cookbook, p. 29]
std::stack<TPoint> MyStopWatch; //!< starting time of stopwatch
/** Starts stopwatch timer.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
*
* The timining can be nested and the recent time point is stored on top of the stack.
*
* @return recent time point
* @see toc
*/
auto tic()
{
MyStopWatch.push(Clock::now());
return MyStopWatch.top();
}
/** Returns the elapsed time from stopwatch.
*
* The time point from top of the stack is used
* if time point @p t_b is not passed as input parameter.
* Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode
* or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode
* The last option is to be used in the case of
* non-nested but overlapping time measurements.
*
* @param[in] t_b start time of some stop watch
* @return elapsed time in seconds.
*
*/
double toc(TPoint const &t_b = MyStopWatch.top())
{
// https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point
using Unit = std::chrono::seconds;
using FpSeconds = std::chrono::duration<double, Unit::period>;
auto t_e = Clock::now();
MyStopWatch.pop();
return FpSeconds(t_e-t_b).count();
}

View file

6
sheet1/G/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,6 @@
{
"files.associations": {
"vector": "cpp",
"ostream": "cpp"
}
}

30
sheet1/G/Makefile Normal file
View file

@ -0,0 +1,30 @@
#
# use GNU-Compiler tools
COMPILER=GCC_
# alternatively from the shell
# export COMPILER=GCC_
# or, alternatively from the shell
# make COMPILER=GCC_
# use Intel compilers
#COMPILER=ICC_
# use PGI compilers
# COMPILER=PGI_
SOURCES = main.cpp mylib.cpp
OBJECTS = $(SOURCES:.cpp=.o)
PROGRAM = main.${COMPILER}
# uncomment the next to lines for debugging and detailed performance analysis
CXXFLAGS += -g
LINKFLAGS += -g
# do not use -pg with PGI compilers
ifndef COMPILER
COMPILER=GCC_
endif
include ../${COMPILER}default.mk

View file

BIN
sheet1/G/main.GCC_ Executable file

Binary file not shown.

109
sheet1/G/main.cpp Normal file
View file

@ -0,0 +1,109 @@
#include "mylib.h"
#include <iostream>
#include <cassert>
#include <vector>
using namespace std;
int main()
{
//b)
DenseMatrix const M(5,3); // Dense matrix, also initialized
vector<double> const u{{1,2,3}};
vector<double> f1 = M.Mult(u);
vector<double> const v{{-1,2,-3,4,-5}};
vector<double> f2 = M.MultT(v);
for(unsigned int i = 0; i < f1.size(); i++)
{
cout << f1.at(i) << " ";
}
cout << endl;
for(unsigned int i = 0; i < f2.size(); i++)
{
cout << f2.at(i) << " ";
}
cout << endl;
//c)
int n=5000;
DenseMatrix const A(n,n);
vector<double> w = {};
for(int i = -n/2; i < n/2; i++)
{
w.push_back(i);
}
int const NLOOPS=100;
double t1 = clock(); // start timer
vector<double> f3 = A.Mult(w);
for (int k=1; k<NLOOPS; ++k)
{
f3 = A.Mult(w);
}
t1 = (clock()-t1)/CLOCKS_PER_SEC/NLOOPS;
cout << "Time for Mult per it: " << t1 << endl;
double t2 = clock(); // start timer
vector<double> f4 = A.MultT(w);
for (int k=1; k<NLOOPS; ++k)
{
f4 = A.MultT(w);
}
t2 = (clock()-t2)/CLOCKS_PER_SEC/NLOOPS;
cout << "Time for MultT per it: " << t2 << endl;
//slower because consecutive calls to data vector of Matrix are more apart (+m) due to column wise access (non contiguous memory)
double maxDiff = 0.0;
for (size_t i = 0; i < f3.size(); ++i) {
double diff = abs(f3[i] - f4[i]);
if (diff > maxDiff) {
maxDiff = diff;
}
}
cout << "difference f3 and f4: " << maxDiff << endl;
//d)
cout << "-----------------DYADIC------------------" <<endl;
vector<double> x = {};
for(int k=0; k < n; k++)
{
x.push_back(f(k, n));
}
Dyadic const D(x,x);
double t3 = clock(); // start timer
vector<double> f5 = D.Mult(w);
for (int k=1; k<NLOOPS; ++k)
{
f5 = D.Mult(w);
}
t3 = (clock()-t3)/CLOCKS_PER_SEC/NLOOPS;
cout << "Time for Mult per it: " << t3 << endl;
double t4 = clock(); // start timer
vector<double> f6 = D.MultT(w);
for (int k=1; k<NLOOPS; ++k)
{
f6 = D.MultT(w);
}
t4 = (clock()-t4)/CLOCKS_PER_SEC/NLOOPS;
cout << "Time for MultT per it: " << t4 << endl;
//slower because consecutive calls to data vector of Matrix are more apart (+m) due to column wise access (non contiguous memory)
double maxDiff2 = 0.0;
for (size_t i = 0; i < f5.size(); ++i) {
double diff = abs(f5[i] - f6[i]);
if (diff > maxDiff2) {
maxDiff2 = diff;
}
}
cout << "difference f5 and f6: " << maxDiff2 << endl;
return 0;
}

BIN
sheet1/G/main.o Normal file

Binary file not shown.

109
sheet1/G/mylib.cpp Normal file
View file

@ -0,0 +1,109 @@
#include "mylib.h"
#include <cmath>
#include <iostream>
using namespace std;
double f(unsigned int k, unsigned int nm) {
return 1.0 / (1.0 + exp(-(10.0*k/(nm-1)-5)));
}
DenseMatrix::DenseMatrix(unsigned int n, unsigned int m): n_(n), m_(m), data_(n*m)
{
unsigned int nm = max(n,m);
for(unsigned int rowIt = 0; rowIt < n_; rowIt++)
{
for(unsigned int colIt=0; colIt <m_; colIt++)
{
data_.at(rowIt*m+colIt) = (f(rowIt,nm)*f(colIt,nm));
}
}
}
vector<double> DenseMatrix::Mult(const vector<double> &u) const{
if(u.size() != m_)
{
cout << "Dimension mismatch: expected " << m_ << " but got " << u.size() << "!" << endl;
return {};
}
vector<double> f1 = {};
double sum;
for(unsigned int rowIt = 0; rowIt < n_; rowIt++)
{
sum = 0;
for(unsigned int colIt=0; colIt <m_; colIt++)
{
sum += data_.at(rowIt*m_+colIt) * u.at(colIt);
}
f1.push_back(sum);
}
return f1;
}
vector<double> DenseMatrix::MultT(const vector<double> &v) const{
if(v.size() != n_)
{
cout << "Dimension mismatch: expected " << n_ << " but got " << v.size() << "!" << endl;
return {};
}
vector<double> f2 = {};
double sum;
for(unsigned int colIt = 0; colIt < m_; colIt++)
{
sum = 0;
for(unsigned int rowIt=0; rowIt <n_; rowIt++)
{
sum += data_.at(rowIt*m_+colIt) * v.at(rowIt);
}
f2.push_back(sum);
}
return f2;
}
//-----------------------------------------------------------------------------------------------
Dyadic::Dyadic(vector<double>& u, vector<double>& v):u_(u), v_(v){
}
vector<double> Dyadic::Mult(const vector<double> &w) const{
unsigned int m = v_.size();
if(w.size() != m)
{
cout << "Dimension mismatch!" << endl;
return {};
}
vector<double> f1 = {};
double skalar = 0;
for(unsigned int vIt = 0; vIt < m; vIt++)
{
skalar += v_.at(vIt)*w.at(vIt);
}
for(unsigned int uIt=0; uIt < u_.size(); uIt++)
{
f1.push_back(u_.at(uIt)*skalar);
}
return f1;
}
vector<double> Dyadic::MultT(const vector<double> &w) const{
unsigned int n = u_.size();
if(w.size() != n)
{
cout << "Dimension mismatch!" << endl;
return {};
}
vector<double> f1 = {};
double skalar = 0;
for(unsigned int uIt = 0; uIt < n; uIt++)
{
skalar += u_.at(uIt)*w.at(uIt);
}
for(unsigned int vIt=0; vIt < v_.size(); vIt++)
{
f1.push_back(v_.at(vIt)*skalar);
}
return f1;
}

36
sheet1/G/mylib.h Normal file
View file

@ -0,0 +1,36 @@
#ifndef MYLIB_H_INCLUDED
#define MYLIB_H_INCLUDED
#include <vector>
using namespace std;
double f(unsigned int k, unsigned int nm);
class DenseMatrix{
public:
DenseMatrix(unsigned int n, unsigned int m);
vector<double> Mult(const vector<double> &u) const;
vector<double> MultT(const vector<double> &v) const;
private:
unsigned int n_,m_;
vector<double> data_;
};
class Dyadic{
public:
Dyadic(vector<double>& u, vector<double>& v);
vector<double> Mult(const vector<double> &u) const;
vector<double> MultT(const vector<double> &v) const;
private:
vector<double> u_;
vector<double> v_;
};
#endif // MYLIB_H_INCLUDED

BIN
sheet1/G/mylib.o Normal file

Binary file not shown.

130
sheet1/GCC_AMD32_default.mk Normal file
View file

@ -0,0 +1,130 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
CC = gcc
CXX = g++
F77 = gfortran
LINKER = ${CXX}
# on mephisto:
#CXXFLAGS += -I/share/apps/atlas/include
#LINKFLAGS += -L/share/apps/atlas/lib
#LINKFLAGS += -lcblas -latlas
#LINKFLAGS += -lblas
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow
WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Winline -fmax-errors=1
# -Wunreachable-code
# -Wunreachable-code
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
#-msse3
# -ftree-vectorizer-verbose=2 -DNDEBUG
# -ftree-vectorizer-verbose=5
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
LINKFLAGS += -O3
# BLAS, LAPACK
OPENBLAS_DIR = /opt/openblas_GCCseq
#OPENBLAS_DIR = /opt/openblas_GCC
OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib
OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include
CXXFLAGS += -I${OPENBLAS_INCDIR}
LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas
# interprocedural optimization
CXXFLAGS += -flto
LINKFLAGS += -flto
# profiling tools
#CXXFLAGS += -pg
#LINKFLAGS += -pg
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig
-@rm -r html
run: clean ${PROGRAM}
# time ./${PROGRAM}
# ./${PROGRAM}
( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} )
# or 'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}' in your ~/.bashrc
# tar the current directory
MY_DIR = `basename ${PWD}`
tar:
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
./$^
gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ 11
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers

183
sheet1/GCC_default.mk Normal file
View file

@ -0,0 +1,183 @@
# Basic Defintions for using GNU-compiler suite sequentially
# requires setting of COMPILER=GCC_
CC = gcc
CXX = g++
F77 = gfortran
LINKER = ${CXX}
WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \
-Wredundant-decls -Winline -fmax-errors=1
# -Wunreachable-code
CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS}
#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS}
#-msse3
# -ftree-vectorizer-verbose=2 -DNDEBUG
# -ftree-vectorizer-verbose=5
# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr
# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details
# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2
# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp
LINKFLAGS += -O3
#architecture
#CPU = -march=znver2
CXXFLAGS += ${CPU}
LINKFLAGS += ${CPU}
# different libraries in Ubuntu or manajaró
ifndef UBUNTU
UBUNTU=1
endif
# BLAS, LAPACK
ifeq ($(UBUNTU),1)
LINKFLAGS += -llapack -lblas
# -lopenblas
else
# on archlinux
LINKFLAGS += -llapack -lopenblas -lcblas
endif
# interprocedural optimization
CXXFLAGS += -flto
LINKFLAGS += -flto
# for debugging purpose (save code)
# -fsanitize=leak # only one out the three can be used
# -fsanitize=address
# -fsanitize=thread
SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \
-fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \
-fsanitize=bool -fsanitize=enum -fsanitize=vptr
#CXXFLAGS += ${SANITARY}
#LINKFLAGS += ${SANITARY}
# profiling tools
#CXXFLAGS += -pg
#LINKFLAGS += -pg
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
-@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt
-@rm -rf html
run: clean ${PROGRAM}
#run: ${PROGRAM}
# time ./${PROGRAM} ${PARAMS}
./${PROGRAM} ${PARAMS}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
#find . -size +10M > large_files
#--exclude-from ${MY_DIR}/large_files
zip: clean
@echo "Zip the directory: " ${MY_DIR}
@cd .. ;\
zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
doc:
doxygen Doxyfile
#########################################################################
.SUFFIXES: .f90
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log
# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log)
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
.f90.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags)
cache: ${PROGRAM}
valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS}
# kcachegrind callgrind.out.<pid> &
kcachegrind `ls -1tr callgrind.out.* |tail -1`
# Check for wrong memory accesses, memory leaks, ...
# use smaller data sets
# no "-pg" in compile/link options
mem: ${PROGRAM}
valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS}
# Graphical interface
# valkyrie
# Simple run time profiling of your code
# CXXFLAGS += -g -pg
# LINKFLAGS += -pg
prof: ${PROGRAM}
perf record ./$^ ${PARAMS}
perf report
# gprof -b ./$^ > gp.out
# kprof -f gp.out -p gprof &
# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/
# * install
# * sudo vi /etc/sysctl.conf
# add kernel.perf_event_paranoid = 0
#Trace your heap:
#> heaptrack ./main.GCC_
#> heaptrack_gui heaptrack.main.GCC_.<pid>.gz
heap: ${PROGRAM}
heaptrack ./$^ ${PARAMS}
heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` &
codecheck: $(SOURCES)
cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^
########################################################################
# get the detailed status of all optimization flags
info:
echo "detailed status of all optimization flags"
$(CXX) --version
$(CXX) -Q $(CXXFLAGS) --help=optimizers
lscpu
inxi -C
lstopo
# Excellent hardware info
# hardinfo
# Life monitoring of CPU frequency etc.
# sudo i7z
# Memory consumption
# vmstat -at -SM 3
# xfce4-taskmanager
# https://www.tecmint.com/check-linux-cpu-information/
#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/
# Debugging:
# https://wiki.archlinux.org/index.php/Debugging

137
sheet1/ICC_default.mk Normal file
View file

@ -0,0 +1,137 @@
# Basic Defintions for using INTEL compiler suite sequentially
# requires setting of COMPILER=ICC_
#BINDIR = /opt/intel/bin/
# special on my sony [GH]
#BINDIR = /opt/save.intel/bin/
# very special on my sony [GH]
# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so
# Error with g++-4.8 using icpc14.0,x:
# find directory wherein bits/c++config.h is located
# 'locate bits/c++config.h'
#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8
CC = ${BINDIR}icc
CXX = ${BINDIR}icpc
F77 = ${BINDIR}ifort
LINKER = ${CXX}
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3
# -Winline -Wredundant-decls -Wunreachable-code
CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG}
# profiling tools
#CXXFLAGS += -pg
#LINKFLAGS += -pg
# -vec-report=3
# -qopt-report=5 -qopt-report-phase=vec
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
CXXFLAGS += -align
# use MKL by INTEL
# https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html
# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html
# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl
LINKFLAGS += -O3 -mkl=sequential
# interprocedural optimization
CXXFLAGS += -ipo
LINKFLAGS += -ipo
# annotated assembler file
ANNOTED = -fsource-asm -S
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: clean ${PROGRAM}
./${PROGRAM}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# cache: ${PROGRAM}
# valgrind --tool=callgrind --simulate-cache=yes ./$^
# # kcachegrind callgrind.out.<pid> &
#
# # Check for wrong memory accesses, memory leaks, ...
# # use smaller data sets
# mem: ${PROGRAM}
# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^
#
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# prof: ${PROGRAM}
# ./$^
# gprof -b ./$^ > gp.out
# # kprof -f gp.out -p gprof &
#
mem: inspector
prof: amplifier
cache: amplifier
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
# alternatively to the solution above:
#edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 .
amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
inspxe-gui &
advisor:
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
advixe-gui &
icc-info:
icpc -# main.cpp

176
sheet1/ONEAPI_default.mk Normal file
View file

@ -0,0 +1,176 @@
# Basic Defintions for using INTEL compiler suite sequentially
# requires setting of COMPILER=ONEAPI_
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
# requires
# source /opt/intel/oneapi/setvars.sh
# on AMD: export MKL_DEBUG_CPU_TYPE=5
#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/
#MKL_ROOT = /opt/intel/oneapi/mkl/latest/
#export KMP_AFFINITY=verbose,compact
CC = ${BINDIR}icc
CXX = ${BINDIR}dpcpp
F77 = ${BINDIR}ifort
LINKER = ${CXX}
## Compiler flags
WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic
WARNINGS += -Wpessimizing-move -Wredundant-move
#-wd2015,2012,2014 -wn3
# -Winline -Wredundant-decls -Wunreachable-code
# -qopt-subscript-in-range
# -vec-threshold0
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include"
#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include"
LINKFLAGS += -O3
# interprocedural optimization
CXXFLAGS += -ipo
LINKFLAGS += -ipo
LINKFLAGS += -flto
# annotated Assembler file
ANNOTED = -fsource-asm -S
#architecture
CPU = -march=core-avx2
#CPU += -mtp=zen
# -xCORE-AVX2
# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2
CXXFLAGS += ${CPU}
LINKFLAGS += ${CPU}
# use MKL by INTEL
# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html
# sequential MKL
# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64)
CXXFLAGS += -qmkl=sequential -UMKL_ILP64
LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static
# install intel-oneapi-compiler-static
# or
LINKFLAGS += -shared-intel
OPENMP = -qopenmp
CXXFLAGS += ${OPENMP}
LINKFLAGS += ${OPENMP}
# profiling tools
#CXXFLAGS += -pg
#LINKFLAGS += -pg
# -vec-report=3
# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par
#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg
# Redirect report from *.optrpt to stderr
# -qopt-report-file=stderr
# Guided paralellization
# -guide -parallel
# -guide-opts=string -guide-par[=n] -guide-vec[=n]
# -auto-p32 -simd
## run time checks
# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
rm -f ${PROGRAM} ${OBJECTS} *.optrpt
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: clean ${PROGRAM}
./${PROGRAM}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# some tools
# Cache behaviour (CXXFLAGS += -g tracks down to source lines)
# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html
mem: inspector
prof: vtune
cache: inspector
gap_par_report:
${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt
# GUI for performance report
amplifier: ${PROGRAM}
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
amplxe-gui &
# GUI for Memory and Thread analyzer (race condition)
inspector: ${PROGRAM}
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
# inspxe-gui &
vtune-gui ./${PROGRAM} &
advisor:
source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh
# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui &
advisor --collect=survey ./${PROGRAM}
# advisor --collect=roofline ./${PROGRAM}
advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv
vtune:
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new
export ADVIXE_EXPERIMENTAL=roofline
vtune -collect hotspots ./${PROGRAM}
vtune -report hotspots -r r000hs > vtune.out
# vtune-gui ./${PROGRAM} &
icc-info:
icpc -# main.cpp
# MKL on AMD
# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/
#
# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu
# export MKL_DEBUG_CPU_TYPE=5
# export MKL_NUM_THRAEDS=1
# export MKL_DYNAMIC=false
# on Intel compiler
# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html

93
sheet1/PGI_default.mk Normal file
View file

@ -0,0 +1,93 @@
# Basic Defintions for using PGI-compiler suite sequentially
# requires setting of COMPILER=PGI_
# OPTIRUN = optirun
CC = pgcc
CXX = pgc++
F77 = pgfortran
LINKER = ${CXX}
# on mephisto:
#CXXFLAGS += -I/share/apps/atlas/include
#LINKFLAGS += -L/share/apps/atlas/lib
#LINKFLAGS += -lcblas -latlas
#LINKFLAGS += -lblas
# Der <cblas.h> Header muss mit extern "C" versehen werden, damit g++ alles findet.
WARNINGS = -Minform=warn
# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow -Wredundant-decls
# -pedantic -Wunreachable-code -Wextra -Winline
# -Wunreachable-code
#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel
PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity
# -Minfo
# -Mprof=time
# -Mprof=lines
# take care with option -Msafeptr
CXXFLAGS += -O3 -std=c++17 ${WARNINGS}
#CXXFLAGS += -O3 -std=c++11 -DNDEBUG ${PGI_PROFILING} ${WARNINGS}
# -fastsse -fargument-noalias ${WARNINGS} -msse3 -vec-report=3
default: ${PROGRAM}
${PROGRAM}: ${OBJECTS}
$(LINKER) $^ ${LINKFLAGS} -o $@
clean:
@rm -f ${PROGRAM} ${OBJECTS}
clean_all:: clean
@rm -f *_ *~ *.bak *.log *.out *.tar
run: clean ${PROGRAM}
./${PROGRAM}
# tar the current directory
MY_DIR = `basename ${PWD}`
tar: clean_all
@echo "Tar the directory: " ${MY_DIR}
@cd .. ;\
tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\
cd ${MY_DIR}
# tar cf `basename ${PWD}`.tar *
doc:
doxygen Doxyfile
#########################################################################
.cpp.o:
$(CXX) -c $(CXXFLAGS) -o $@ $<
.c.o:
$(CC) -c $(CFLAGS) -o $@ $<
.f.o:
$(F77) -c $(FFLAGS) -o $@ $<
##################################################################################################
# # some tools
# # Simple run time profiling of your code
# # CXXFLAGS += -g -pg
# # LINKFLAGS += -pg
# Profiling options PGI, see: pgcollect -help
# CPU_PROF = -allcache
CPU_PROF = -time
# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit
#GPU_PROF = -cuda=branch:cc20
#
PROF_FILE = pgprof.out
cache: prof
prof: ${PROGRAM}
${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^
${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) &
info:
pgaccelinfo -v