diff --git a/sheet1/A/.vscode/settings.json b/sheet1/A/.vscode/settings.json new file mode 100644 index 0000000..a3b2b51 --- /dev/null +++ b/sheet1/A/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "vector": "cpp" + } +} \ No newline at end of file diff --git a/sheet1/A/Makefile b/sheet1/A/Makefile new file mode 100644 index 0000000..f8c4fd6 --- /dev/null +++ b/sheet1/A/Makefile @@ -0,0 +1,30 @@ +# +# use GNU-Compiler tools +COMPILER=GCC_ +# alternatively from the shell +# export COMPILER=GCC_ +# or, alternatively from the shell +# make COMPILER=GCC_ + +# use Intel compilers +#COMPILER=ICC_ + +# use PGI compilers +# COMPILER=PGI_ + + +SOURCES = main.cpp means.cpp +OBJECTS = $(SOURCES:.cpp=.o) + +PROGRAM = main.${COMPILER} + +# uncomment the next to lines for debugging and detailed performance analysis +CXXFLAGS += -g +LINKFLAGS += -g +# do not use -pg with PGI compilers + +ifndef COMPILER + COMPILER=GCC_ +endif + +include ../${COMPILER}default.mk diff --git a/sheet1/A/Makefile:Zone.Identifier b/sheet1/A/Makefile:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/sheet1/A/main.GCC_ b/sheet1/A/main.GCC_ new file mode 100755 index 0000000..2291c10 Binary files /dev/null and b/sheet1/A/main.GCC_ differ diff --git a/sheet1/A/main.cpp b/sheet1/A/main.cpp new file mode 100644 index 0000000..97d6644 --- /dev/null +++ b/sheet1/A/main.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include "means.h" + +using namespace std; + + +int main() { + int a = 1, b = 4, c = 16; + double ar, ge, ha; + + // c) + means(a, b, c, ar, ge, ha); + cout << "Arithmetic mean: " << ar + << ", geometric mean: " << ge + << ", harmonic mean: " << ha << endl; + + // d) + a = 2; b = 3; c = 5; + means(a, b, c, ar, ge, ha); + cout << "Arithmetic mean: " << ar + << ", geometric mean: " << ge + << ", harmonic mean: " << ha << endl; + + // e) + a = 1000; b = 4000; c = 16000; + means(a, b, c, ar, ge, ha); + + cout << "Arithmetic mean: " << ar + << ", geometric mean: " << ge + << ", harmonic mean: " << ha << endl; + + // f) + vector input = {1, 2, 3, 4, 5, 6}; + means_vector(input, ar, ge, ha); + cout << "Arithmetic mean: " << ar + << ", geometric mean: " << ge + << ", harmonic mean: " << ha << endl; + + return 0; +} diff --git a/sheet1/A/main.o b/sheet1/A/main.o new file mode 100644 index 0000000..c54977f Binary files /dev/null and b/sheet1/A/main.o differ diff --git a/sheet1/A/means.cpp b/sheet1/A/means.cpp new file mode 100644 index 0000000..ac0a945 --- /dev/null +++ b/sheet1/A/means.cpp @@ -0,0 +1,36 @@ +#include +#include +#include + +using namespace std; + +void means(int a, int b, int c, double &ar, double &ge, double &ha) { + ar = (a+b+c) / 3.0; + + + ge = pow(a,1.0/3.0) * pow(b,1.0/3.0) * pow(c,1.0/3.0); //do it instead of pow(a*b*c,1.0/3.0) to prevent integer overflow + + ha = 3.0 / (1.0/a +1.0/b +1.0/c); +} + +void means_vector(const vector &input, double &ar, double &ge, double &ha) { + int size = input.size(); + + if (size == 0) { + cout << "Empty input" << endl; + return; + } + + ar = 0; + ge = 1; + ha = 0; + + for (int i = 0; i < size; i++) { + ar += input.at(i); + ge *= pow(input.at(i), 1.0 / size); + ha += 1.0 / input.at(i); + } + + ar /= size; + ha = size / ha; +} \ No newline at end of file diff --git a/sheet1/A/means.h b/sheet1/A/means.h new file mode 100644 index 0000000..5235659 --- /dev/null +++ b/sheet1/A/means.h @@ -0,0 +1,9 @@ +#ifndef MEANS_H_INCLUDED +#define MEANS_H_INCLUDED + +#include + +void means(int a, int b, int c, double &ar, double &ge, double &ha); +void means_vector(const std::vector &input, double &ar, double &ge, double &ha); + +#endif // MEANS_H_INCLUDED diff --git a/sheet1/A/means.o b/sheet1/A/means.o new file mode 100644 index 0000000..a5915c5 Binary files /dev/null and b/sheet1/A/means.o differ diff --git a/sheet1/B/.vscode/settings.json b/sheet1/B/.vscode/settings.json new file mode 100644 index 0000000..db8069e --- /dev/null +++ b/sheet1/B/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "files.associations": { + "iostream": "cpp", + "ostream": "cpp", + "random": "cpp" + } +} \ No newline at end of file diff --git a/sheet1/B/Makefile b/sheet1/B/Makefile new file mode 100644 index 0000000..fffe26c --- /dev/null +++ b/sheet1/B/Makefile @@ -0,0 +1,39 @@ +# +# use GNU-Compiler tools +COMPILER=GCC_ +# alternatively from the shell +# export COMPILER=GCC_ +# or, alternatively from the shell +# make COMPILER=GCC_ + +# use Intel compilers +#COMPILER=ICC_ + +# use PGI compilers +# COMPILER=PGI_ + +# use CLANG compilers +# COMPILER=CLANG_ + + +SOURCES = main.cpp file_io.cpp means.cpp +OBJECTS = $(SOURCES:.cpp=.o) + +PROGRAM = main.${COMPILER} + +# uncomment the next to lines for debugging and detailed performance analysis +CXXFLAGS += -g +LINKFLAGS += -g +# do not use -pg with PGI compilers + +ifndef COMPILER + COMPILER=GCC_ +endif + +include ../${COMPILER}default.mk + + +task: + @pdflatex task + @pdflatex task + diff --git a/sheet1/B/data_1.txt b/sheet1/B/data_1.txt new file mode 100644 index 0000000..49315c2 --- /dev/null +++ b/sheet1/B/data_1.txt @@ -0,0 +1,500 @@ +141 +261 +87 +430 +258 +298 +425 +120 +496 +707 +244 +786 +75 +394 +4 +221 +2 +190 +143 +269 +175 +139 +599 +902 +940 +222 +483 +377 +524 +265 +69 +437 +174 +27 +955 +431 +962 +763 +8 +681 +706 +646 +553 +219 +773 +229 +371 +891 +857 +403 +319 +609 +911 +910 +592 +333 +854 +443 +905 +34 +533 +717 +180 +337 +188 +322 +404 +549 +49 +553 +275 +242 +244 +155 +957 +936 +819 +729 +176 +361 +189 +2 +317 +700 +626 +544 +440 +288 +502 +762 +763 +577 +748 +646 +124 +505 +348 +93 +148 +199 +673 +432 +695 +257 +10 +533 +280 +947 +907 +393 +25 +672 +838 +972 +57 +451 +583 +687 +720 +651 +727 +374 +582 +117 +58 +980 +285 +595 +963 +186 +194 +342 +933 +391 +274 +152 +398 +375 +132 +436 +92 +615 +11 +574 +790 +236 +449 +570 +62 +497 +643 +222 +838 +972 +847 +506 +279 +747 +237 +958 +621 +601 +173 +91 +256 +859 +912 +700 +726 +230 +577 +811 +404 +989 +90 +321 +512 +61 +726 +557 +530 +830 +859 +790 +318 +453 +753 +110 +110 +270 +525 +973 +711 +312 +292 +851 +912 +640 +256 +89 +839 +585 +949 +62 +585 +286 +828 +191 +443 +394 +827 +677 +208 +319 +134 +672 +571 +170 +148 +477 +909 +553 +33 +54 +806 +452 +383 +790 +365 +533 +712 +872 +329 +651 +975 +76 +588 +414 +310 +264 +759 +996 +187 +782 +196 +993 +803 +425 +729 +499 +809 +357 +74 +591 +911 +194 +433 +750 +40 +947 +764 +559 +184 +498 +518 +995 +855 +963 +679 +404 +935 +480 +232 +397 +706 +559 +757 +996 +963 +536 +964 +116 +52 +305 +581 +531 +902 +541 +432 +543 +713 +17 +801 +143 +479 +257 +370 +662 +170 +279 +199 +196 +327 +881 +472 +404 +180 +969 +408 +845 +616 +377 +878 +785 +465 +814 +899 +430 +335 +597 +902 +703 +378 +735 +955 +543 +541 +312 +72 +182 +93 +464 +10 +916 +643 +2 +31 +209 +455 +128 +9 +728 +355 +781 +437 +437 +50 +50 +92 +595 +242 +842 +858 +964 +489 +221 +227 +537 +763 +348 +462 +640 +918 +162 +716 +578 +434 +885 +394 +179 +634 +625 +328 +803 +1000 +981 +128 +233 +24 +608 +111 +408 +885 +549 +370 +209 +441 +957 +125 +471 +857 +44 +692 +979 +284 +134 +686 +910 +611 +900 +194 +755 +347 +419 +156 +820 +625 +739 +806 +68 +951 +498 +756 +743 +832 +157 +458 +619 +933 +836 +896 +583 +583 +855 +35 +886 +408 +37 +747 +155 +144 +606 +255 +325 +402 +407 +387 +610 +167 +189 +95 +324 +770 +235 +741 +693 +825 +828 +294 +310 +524 +326 +832 +811 +557 +263 +681 +234 +457 +385 +539 +992 +756 +981 +235 +529 +52 +757 +602 +858 +989 +930 +410 +1 +541 +208 +220 +326 +96 +748 +749 +544 +339 +833 +553 +958 +893 +357 +547 +347 +623 +797 +746 +126 +823 +26 +415 +732 +782 +368 diff --git a/sheet1/B/file_io.cbp b/sheet1/B/file_io.cbp new file mode 100644 index 0000000..7ffa582 --- /dev/null +++ b/sheet1/B/file_io.cbp @@ -0,0 +1,60 @@ + + + + + + diff --git a/sheet1/B/file_io.cpp b/sheet1/B/file_io.cpp new file mode 100644 index 0000000..4d78dab --- /dev/null +++ b/sheet1/B/file_io.cpp @@ -0,0 +1,65 @@ +#include "file_io.h" +#include +#include +#include +#include +#include +#include +using namespace std; + +// [Str10, p.364] +void fill_vector(istream& istr, vector& v) +{ + double d=0; + while ( istr >> d) v.push_back(d); // Einlesen + if (!istr.eof()) + { // Fehlerbehandlung + cout << " Error handling \n"; + if ( istr.bad() ) throw runtime_error("Schwerer Fehler in istr"); + if ( istr.fail() ) // Versuch des Aufraeumens + { + cout << " Failed in reading all data.\n"; + istr.clear(); + } + } + v.shrink_to_fit(); // C++11 + return; +} + + +void read_vector_from_file(const string& file_name, vector& v) +{ + ifstream fin(file_name); // Oeffne das File im ASCII-Modus + if( fin.is_open() ) // File gefunden: + { + v.clear(); // Vektor leeren + fill_vector(fin, v); + } + else // File nicht gefunden: + { + cout << "\nFile " << file_name << " has not been found.\n\n" ; + assert( fin.is_open() && "File not found." ); // exeption handling for the poor programmer + } + + return; +} + +void write_vector_to_file(const string& file_name, const vector& v) +{ + ofstream fout(file_name); // Oeffne das File im ASCII-Modus + if( fout.is_open() ) + { + for (unsigned int k=0; k +#include +//using namespace std; + + +/** + This function opens the ASCII-file named @p file_name and reads the + double data into the C++ vector @p v. + If the file @p file_name does not exist then the code stops with an appropriate message. + @param[in] file_name name of the ASCII-file + @param[out] v C++ vector with double values +*/ + +void read_vector_from_file(const std::string& file_name, std::vector& v); + + +/** + This function opens the ASCII-file named @p file_name and rewrites its with the + double data from the C++ vector @p v. + If there are problems in opening/generating file @p file_name + then the code stops with an appropriate message. + @param[in] file_name name of the ASCII-file + @param[in] v C++ vector with double values +*/ + +void write_vector_to_file(const std::string& file_name, const std::vector& v); + +/** + Fills the double-vector @p v with data from an input stream @p istr until this input stream + ends regularily. The vector is cleared and its memory is automatically allocated. + @param[in] istr input stream + @param[out] v C++ vector with double values + @warning An exception is thrown in case of wrong data format or corrupted data. +*/ +void fill_vector(std::istream& istr, std::vector& v); + +#endif // FILE_IO_H_INCLUDED diff --git a/sheet1/B/file_io.o b/sheet1/B/file_io.o new file mode 100644 index 0000000..13990e8 Binary files /dev/null and b/sheet1/B/file_io.o differ diff --git a/sheet1/B/main.GCC_ b/sheet1/B/main.GCC_ new file mode 100755 index 0000000..d512706 Binary files /dev/null and b/sheet1/B/main.GCC_ differ diff --git a/sheet1/B/main.cpp b/sheet1/B/main.cpp new file mode 100644 index 0000000..b3c2bf1 --- /dev/null +++ b/sheet1/B/main.cpp @@ -0,0 +1,48 @@ +#include "file_io.h" +#include "means.h" +#include +#include +#include +#include +#include +using namespace std; + +int main() +{ + cout << "File einlesen." << endl; + + const string name("data_1.txt"); // name of input file + const string name2("out_1.txt"); // name of output file + vector a; //-2^15 to 2^15-1 fits the values from the file + double min, max, ar, ge, ha, std; + + + read_vector_from_file(name, a); + const unsigned size = a.size(); + + min = *min_element(a.begin(), a.end()); + max = *max_element(a.begin(), a.end()); + + + + means_vector(a, ar, ge, ha); + + std = 0; + for(unsigned int i = 0; i < size; i++) + { + std += pow(a.at(i)-ar,2); + } + std = sqrt(std/size); + + cout << "min: " << min << ", max: " << max << endl; + cout << "Arithmetic mean: " << ar + << ", geometric mean: " << ge + << ", harmonic mean: " << ha << endl; + cout << "std: " << std << endl; + + vector results = {min, max, ar, ge, ha, std}; + write_vector_to_file(name2, results); + + + return 0; +} diff --git a/sheet1/B/main.o b/sheet1/B/main.o new file mode 100644 index 0000000..4e4ac76 Binary files /dev/null and b/sheet1/B/main.o differ diff --git a/sheet1/B/means.cpp b/sheet1/B/means.cpp new file mode 100644 index 0000000..1b4b6a6 --- /dev/null +++ b/sheet1/B/means.cpp @@ -0,0 +1,36 @@ +#include +#include +#include + +using namespace std; + +void means(int a, int b, int c, double &ar, double &ge, double &ha) { + ar = (a+b+c) / 3.0; + + + ge = pow(a,1.0/3.0) * pow(b,1.0/3.0) * pow(c,1.0/3.0); //do it instead of pow(a*b*c,1.0/3.0) to prevent integer overflow + + ha = 3.0 / (1.0/a +1.0/b +1.0/c); +} + +void means_vector(const vector &input, double &ar, double &ge, double &ha) { + int size = input.size(); + + if (size == 0) { + cout << "Empty input" << endl; + return; + } + + ar = 0; + ge = 1; + ha = 0; + + for (int i = 0; i < size; i++) { + ar += input.at(i); + ge *= pow(input.at(i), 1.0 / size); + ha += 1.0 / input.at(i); + } + + ar /= size; + ha = size / ha; +} \ No newline at end of file diff --git a/sheet1/B/means.h b/sheet1/B/means.h new file mode 100644 index 0000000..985f236 --- /dev/null +++ b/sheet1/B/means.h @@ -0,0 +1,9 @@ +#ifndef MEANS_H_INCLUDED +#define MEANS_H_INCLUDED + +#include + +void means(int a, int b, int c, double &ar, double &ge, double &ha); +void means_vector(const std::vector &input, double &ar, double &ge, double &ha); + +#endif // MEANS_H_INCLUDED diff --git a/sheet1/B/means.o b/sheet1/B/means.o new file mode 100644 index 0000000..3d8f21a Binary files /dev/null and b/sheet1/B/means.o differ diff --git a/sheet1/B/out_1.txt b/sheet1/B/out_1.txt new file mode 100644 index 0000000..95f3165 --- /dev/null +++ b/sheet1/B/out_1.txt @@ -0,0 +1,6 @@ +1 +1000 +498.184 +364.412 +95.6857 +287.905 diff --git a/sheet1/C/.vscode/settings.json b/sheet1/C/.vscode/settings.json new file mode 100644 index 0000000..0c83701 --- /dev/null +++ b/sheet1/C/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "files.associations": { + "iostream": "cpp", + "ostream": "cpp" + } +} \ No newline at end of file diff --git a/sheet1/C/Makefile b/sheet1/C/Makefile new file mode 100644 index 0000000..6506518 --- /dev/null +++ b/sheet1/C/Makefile @@ -0,0 +1,30 @@ +# +# use GNU-Compiler tools +COMPILER=GCC_ +# alternatively from the shell +# export COMPILER=GCC_ +# or, alternatively from the shell +# make COMPILER=GCC_ + +# use Intel compilers +#COMPILER=ICC_ + +# use PGI compilers +# COMPILER=PGI_ + + +SOURCES = main.cpp +OBJECTS = $(SOURCES:.cpp=.o) + +PROGRAM = main.${COMPILER} + +# uncomment the next to lines for debugging and detailed performance analysis +CXXFLAGS += -g +LINKFLAGS += -g +# do not use -pg with PGI compilers + +ifndef COMPILER + COMPILER=GCC_ +endif + +include ../${COMPILER}default.mk diff --git a/sheet1/C/Makefile:Zone.Identifier b/sheet1/C/Makefile:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/sheet1/C/main.GCC_ b/sheet1/C/main.GCC_ new file mode 100755 index 0000000..ab92bf3 Binary files /dev/null and b/sheet1/C/main.GCC_ differ diff --git a/sheet1/C/main.cpp b/sheet1/C/main.cpp new file mode 100644 index 0000000..4d73a12 --- /dev/null +++ b/sheet1/C/main.cpp @@ -0,0 +1,70 @@ +#include +#include "timing.h" + +using namespace std; + + +unsigned long multiplesOf3Or5(unsigned int n) +{ + unsigned long sum = 0; + for(unsigned int i=1; i<=n; i++) + { + if(i % 3 == 0 || i % 5 == 0) + { + sum += i; + } + } + return sum; +} + +unsigned long gauss_sum(unsigned long n) +{ + return n*(n+1)/2; +} + +unsigned long multiplesOf3Or5_noLoop(unsigned int n) +{ + /*for n there are floor(n/3) multiples of 3 and floor(n/5) multiples of 5. Calculate them using Gauss summation and subtract the + multiples of 15 (counted twice)*/ + unsigned long multiples3 = gauss_sum(n/3)*3; + unsigned long multiples5 = gauss_sum(n/5)*5; + unsigned long multiples15 = gauss_sum(n/15)*15; + + return multiples3 + multiples5 - multiples15; + +} + + +int main() { + + unsigned long mA,mB,mC,mD,mE,mF; + + tic(); + for(unsigned int i = 0; i< 10000; i++) + { + mA = multiplesOf3Or5(15); + mB = multiplesOf3Or5(1001); + mC = multiplesOf3Or5(1432987); + } + double timeA = toc(); + + tic(); + for(unsigned int i = 0; i< 10000; i++) + { + mD = multiplesOf3Or5_noLoop(15); + mE = multiplesOf3Or5_noLoop(1001); + mF = multiplesOf3Or5_noLoop(1432987); + } + double timeB = toc(); + + cout << "n = 15, result = " << mA << endl; + cout << "n = 1001, result = " << mB << endl; + cout << "n = 1432987, result = " << mC << endl; + cout << "time: " << timeA << endl; + cout << "--------------------------------------------------------------" < // timing +#include + +//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen +using Clock = std::chrono::high_resolution_clock; +using TPoint= std::chrono::time_point; + +// [Galowicz, C++17 STL Cookbook, p. 29] + +std::stack MyStopWatch; //!< starting time of stopwatch + +/** Starts stopwatch timer. + * Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode + * + * The timining can be nested and the recent time point is stored on top of the stack. + * + * @return recent time point + * @see toc + */ +auto tic() +{ + MyStopWatch.push(Clock::now()); + return MyStopWatch.top(); +} + +/** Returns the elapsed time from stopwatch. + * + * The time point from top of the stack is used + * if time point @p t_b is not passed as input parameter. + * Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode + * or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode + * The last option is to be used in the case of + * non-nested but overlapping time measurements. + * + * @param[in] t_b start time of some stop watch + * @return elapsed time in seconds. + * +*/ +double toc(TPoint const &t_b = MyStopWatch.top()) +{ + // https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point + using Unit = std::chrono::seconds; + using FpSeconds = std::chrono::duration; + auto t_e = Clock::now(); + MyStopWatch.pop(); + return FpSeconds(t_e-t_b).count(); +} diff --git a/sheet1/C/timing.h:Zone.Identifier b/sheet1/C/timing.h:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/sheet1/CLANG_default.mk b/sheet1/CLANG_default.mk new file mode 100644 index 0000000..4bc290d --- /dev/null +++ b/sheet1/CLANG_default.mk @@ -0,0 +1,123 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=CLANG_ + +#CLANGPATH=//usr/lib/llvm-10/bin/ +CC = ${CLANGPATH}clang +CXX = ${CLANGPATH}clang++ +#CXX = ${CLANGPATH}clang++ -lomptarget -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=/opt/pgi/linux86-64/2017/cuda/8.0 +#F77 = gfortran +LINKER = ${CXX} + +#http://clang.llvm.org/docs/UsersManual.html#options-to-control-error-and-warning-messages +WARNINGS += -Weverything -Wno-c++98-compat -Wno-sign-conversion -Wno-date-time -Wno-shorten-64-to-32 -Wno-padded -ferror-limit=1 +WARNINGS += -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic +#-fsyntax-only -Wdocumentation -Wconversion -Wshadow -Wfloat-conversion -pedantic + +CXXFLAGS += -O3 -std=c++17 -ferror-limit=1 ${WARNINGS} +# don't use -Ofast +# -ftrapv +LINKFLAGS += -O3 + +# different libraries in Ubuntu or manajaró +ifndef UBUNTU +UBUNTU=1 +endif + +# BLAS, LAPACK +LINKFLAGS += -llapack -lblas +# -lopenblas +ifeq ($(UBUNTU),1) +# ubuntu +else +# on archlinux +LINKFLAGS += -lcblas +endif + +# interprocedural optimization +CXXFLAGS += -flto +LINKFLAGS += -flto + +# very good check +# http://clang.llvm.org/extra/clang-tidy/ +# good check, see: http://llvm.org/docs/CodingStandards.html#include-style +SWITCH_OFF=,-readability-magic-numbers,-readability-redundant-control-flow,-readability-redundant-member-init +SWITCH_OFF+=,-readability-redundant-member-init,-readability-isolate-declaration +#READABILITY=,readability*${SWITCH_OFF} +#TIDYFLAGS = -checks=llvm-*,-llvm-header-guard -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" +TIDYFLAGS = -checks=llvm-*,-llvm-header-guard${READABILITY} -header-filter=.* -enable-check-profile -extra-arg="-std=c++17" -extra-arg="-fopenmp" +#TIDYFLAGS += -checks='modernize* +# ??? +#TIDYFLAGS = -checks='cert*' -header-filter=.* +# MPI checks ?? +#TIDYFLAGS = -checks='mpi*' +# ?? +#TIDYFLAGS = -checks='performance*' -header-filter=.* +#TIDYFLAGS = -checks='portability-*' -header-filter=.* +#TIDYFLAGS = -checks='readability-*' -header-filter=.* + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +codecheck: tidy_check +tidy_check: + clang-tidy ${SOURCES} ${TIDYFLAGS} -- ${SOURCES} +# see also http://clang-developers.42468.n3.nabble.com/Error-while-trying-to-load-a-compilation-database-td4049722.html + +run: clean ${PROGRAM} +# time ./${PROGRAM} ${PARAMS} + ./${PROGRAM} ${PARAMS} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS} +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS} + +# Simple run time profiling of your code +# CXXFLAGS += -g -pg +# LINKFLAGS += -pg +prof: ${PROGRAM} + perf record ./$^ ${PARAMS} + perf report +# gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +codecheck: tidy_check diff --git a/sheet1/E/.vscode/settings.json b/sheet1/E/.vscode/settings.json new file mode 100644 index 0000000..d1429ba --- /dev/null +++ b/sheet1/E/.vscode/settings.json @@ -0,0 +1,58 @@ +{ + "files.associations": { + "array": "cpp", + "atomic": "cpp", + "bit": "cpp", + "cctype": "cpp", + "charconv": "cpp", + "chrono": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "compare": "cpp", + "concepts": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "list": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "format": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "new": "cpp", + "numbers": "cpp", + "ostream": "cpp", + "span": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "typeinfo": "cpp", + "variant": "cpp" + } +} \ No newline at end of file diff --git a/sheet1/E/Makefile b/sheet1/E/Makefile new file mode 100644 index 0000000..6506518 --- /dev/null +++ b/sheet1/E/Makefile @@ -0,0 +1,30 @@ +# +# use GNU-Compiler tools +COMPILER=GCC_ +# alternatively from the shell +# export COMPILER=GCC_ +# or, alternatively from the shell +# make COMPILER=GCC_ + +# use Intel compilers +#COMPILER=ICC_ + +# use PGI compilers +# COMPILER=PGI_ + + +SOURCES = main.cpp +OBJECTS = $(SOURCES:.cpp=.o) + +PROGRAM = main.${COMPILER} + +# uncomment the next to lines for debugging and detailed performance analysis +CXXFLAGS += -g +LINKFLAGS += -g +# do not use -pg with PGI compilers + +ifndef COMPILER + COMPILER=GCC_ +endif + +include ../${COMPILER}default.mk diff --git a/sheet1/E/Makefile:Zone.Identifier b/sheet1/E/Makefile:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/sheet1/E/main.GCC_ b/sheet1/E/main.GCC_ new file mode 100755 index 0000000..f087ff9 Binary files /dev/null and b/sheet1/E/main.GCC_ differ diff --git a/sheet1/E/main.cpp b/sheet1/E/main.cpp new file mode 100644 index 0000000..6d9772b --- /dev/null +++ b/sheet1/E/main.cpp @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include +#include "timing.h" +#include +using namespace std; +std::default_random_engine generator; + + +unsigned int randomInt(unsigned int n) +{ + + std::uniform_int_distribution distribution(1,n); + return distribution(generator); + +} + +void insertVector(vector& vec) +{ + unsigned int n = vec.size(); + unsigned int random; + + for(unsigned int i = 0; i::iterator it = lower_bound(vec.begin(), vec.end(), random); + vec.insert(it, random); + } + +} +void insertList(list& l) +{ + unsigned int n = l.size(); + unsigned int random; + + for(unsigned int i = 0; i::iterator it = lower_bound(l.begin(), l.end(), random); + l.insert(it, random); + } + +} + + + +int main() +{ + unsigned int n = 10000; + vector vec = {}; + list l = {}; + for(unsigned int i=1; i<=n; i++) + { + vec.push_back(i); + l.push_back(i); + } + tic(); + insertVector(vec); + double vecTime = toc(); + + tic(); + insertList(l); + double listTime = toc(); + + cout << is_sorted(vec.begin(), vec.end())<< endl; + cout << is_sorted(l.begin(), l.end())<< endl; + + cout << "vector time: " << vecTime << endl; + cout << "list time: " << listTime << endl; + + return 0; +} diff --git a/sheet1/E/main.o b/sheet1/E/main.o new file mode 100644 index 0000000..1db2400 Binary files /dev/null and b/sheet1/E/main.o differ diff --git a/sheet1/E/timing.h b/sheet1/E/timing.h new file mode 100644 index 0000000..1c79e9d --- /dev/null +++ b/sheet1/E/timing.h @@ -0,0 +1,51 @@ +// +// Gundolf Haase, Oct 18 2024 +// +#pragma once +#include // timing +#include + +//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen +using Clock = std::chrono::high_resolution_clock; +using TPoint= std::chrono::time_point; + +// [Galowicz, C++17 STL Cookbook, p. 29] + +std::stack MyStopWatch; //!< starting time of stopwatch + +/** Starts stopwatch timer. + * Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode + * + * The timining can be nested and the recent time point is stored on top of the stack. + * + * @return recent time point + * @see toc + */ + auto tic() +{ + MyStopWatch.push(Clock::now()); + return MyStopWatch.top(); +} + +/** Returns the elapsed time from stopwatch. + * + * The time point from top of the stack is used + * if time point @p t_b is not passed as input parameter. + * Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode + * or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode + * The last option is to be used in the case of + * non-nested but overlapping time measurements. + * + * @param[in] t_b start time of some stop watch + * @return elapsed time in seconds. + * +*/ + double toc(TPoint const &t_b = MyStopWatch.top()) +{ + // https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point + using Unit = std::chrono::seconds; + using FpSeconds = std::chrono::duration; + auto t_e = Clock::now(); + MyStopWatch.pop(); + return FpSeconds(t_e-t_b).count(); +} diff --git a/sheet1/E/timing.h:Zone.Identifier b/sheet1/E/timing.h:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/sheet1/F/.vscode/settings.json b/sheet1/F/.vscode/settings.json new file mode 100644 index 0000000..e2c18d1 --- /dev/null +++ b/sheet1/F/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "ostream": "cpp" + } +} \ No newline at end of file diff --git a/sheet1/F/Makefile b/sheet1/F/Makefile new file mode 100644 index 0000000..6506518 --- /dev/null +++ b/sheet1/F/Makefile @@ -0,0 +1,30 @@ +# +# use GNU-Compiler tools +COMPILER=GCC_ +# alternatively from the shell +# export COMPILER=GCC_ +# or, alternatively from the shell +# make COMPILER=GCC_ + +# use Intel compilers +#COMPILER=ICC_ + +# use PGI compilers +# COMPILER=PGI_ + + +SOURCES = main.cpp +OBJECTS = $(SOURCES:.cpp=.o) + +PROGRAM = main.${COMPILER} + +# uncomment the next to lines for debugging and detailed performance analysis +CXXFLAGS += -g +LINKFLAGS += -g +# do not use -pg with PGI compilers + +ifndef COMPILER + COMPILER=GCC_ +endif + +include ../${COMPILER}default.mk diff --git a/sheet1/F/Makefile:Zone.Identifier b/sheet1/F/Makefile:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/sheet1/F/main.GCC_ b/sheet1/F/main.GCC_ new file mode 100755 index 0000000..8c5ad2c Binary files /dev/null and b/sheet1/F/main.GCC_ differ diff --git a/sheet1/F/main.cpp b/sheet1/F/main.cpp new file mode 100644 index 0000000..6a6cd97 --- /dev/null +++ b/sheet1/F/main.cpp @@ -0,0 +1,130 @@ +#include +#include "mayer_primes.h" +#include +#include +#include "timing.h" +using namespace std; + +unsigned int single_goldbach(unsigned int k) +{ + unsigned int decomp = 0; + unsigned int p,q; + vector primes = get_primes(k); + for(unsigned i=0; i< primes.size(); i++) + { + p = primes.at(i); + if(p > k/2) + { + break; + } + + q = k-p; + vector::iterator it = lower_bound(primes.begin(), primes.end(), q); + if (it != primes.end() && *it == q) + { + decomp++; + } + + } + + return decomp; +} + +//one can call single_goldbach but this way it is faster cause you dont have to regenerate primes and search in list +vector count_goldbach(unsigned int n) +{ + vector counts(n+1,0); + unsigned int p,q; + vector primes = get_primes(n); + for(unsigned int j=0; j n) + { + continue; + } + counts[p+q] += 1; + + } + } + return counts; +} + +vector>> count_goldbach_all(unsigned int n) +{ + vector>> counts(n+1,{{0,0}}); + unsigned int p,q; + vector primes = get_primes(n); + for(unsigned int j=0; j n) + { + continue; + } + if (counts[p+q].at(0).at(0) == 0) //first pair found + { + counts[p+q]={{p,q}}; + } + else{ + counts[p+q].push_back({p,q}); + } + + } + } + return counts; +} + +int main() +{ + + //2 + cout << single_goldbach(694) << endl; + + //3 + vector counts = count_goldbach(100000); + cout << (max_element(counts.begin(), counts.end()) - counts.begin()) << endl; + + //4 + vector nvalues = {10000, 100000, 400000, 1000000, 2000000, 10000000}; + double time; + unsigned int n; + for(unsigned int i = 0; i< nvalues.size(); i++) + { + n = nvalues.at(i); + tic(); + count_goldbach(n); + time = toc(); + cout << "Time for n=" << n << ": " << time << endl; + } + /*Time for n=10000: 0.0006853 + Time for n=100000: 0.0371858 + Time for n=400000: 0.505129 + Time for n=1000000: 2.85873 + Time for n=2000000: 15.0026 + Time for n=10000000: 549.658*/ + + + //*) + unsigned int n2 = 694; + vector>> counts2 = count_goldbach_all(n2); + for(unsigned int i=4; iFlorian Mayer. + * + * \param[in] max end of interval for the prime number search. + * \return vector of prime numbers @f$2,3,5, ..., p<=max @f$. + * + * \copyright + * Copyright (c) 2008 Florian Mayer (adapted by Gundolf Haase 2018) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ +template +std::vector get_primes(T max) +{ + std::vector primes; + char *sieve; + sieve = new char[max / 8 + 1]; + // Fill sieve with 1 + memset(sieve, 0xFF, (max / 8 + 1) * sizeof(char)); + for (T x = 2; x <= max; x++) + { + if (sieve[x / 8] & (0x01 << (x % 8))) { + primes.push_back(x); + // Is prime. Mark multiplicates. + for (T j = 2 * x; j <= max; j += x) + { + sieve[j / 8] &= ~(0x01 << (j % 8)); + } + } + } + delete[] sieve; + return primes; +} + +//--------------------------------------------------------------- +//int main() // by Florian Mayer +//{g++ -O3 -std=c++14 -fopenmp main.cpp && ./a.out +// vector primes; +// primes = get_primes(10000000); +// // return 0; +// // Print out result. +// vector::iterator it; +// for(it=primes.begin(); it < primes.end(); it++) +// cout << *it << " "; +// +// cout << endl; +// return 0; +//} diff --git a/sheet1/F/mayer_primes.h:Zone.Identifier b/sheet1/F/mayer_primes.h:Zone.Identifier new file mode 100644 index 0000000..b9647de --- /dev/null +++ b/sheet1/F/mayer_primes.h:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=https://imsc.uni-graz.at/haasegu/Lectures/Math2CPP/Examples/goldbach/mayer_primes.h diff --git a/sheet1/F/timing.h b/sheet1/F/timing.h new file mode 100644 index 0000000..1c79e9d --- /dev/null +++ b/sheet1/F/timing.h @@ -0,0 +1,51 @@ +// +// Gundolf Haase, Oct 18 2024 +// +#pragma once +#include // timing +#include + +//using Clock = std::chrono::system_clock; //!< The wall clock timer chosen +using Clock = std::chrono::high_resolution_clock; +using TPoint= std::chrono::time_point; + +// [Galowicz, C++17 STL Cookbook, p. 29] + +std::stack MyStopWatch; //!< starting time of stopwatch + +/** Starts stopwatch timer. + * Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode + * + * The timining can be nested and the recent time point is stored on top of the stack. + * + * @return recent time point + * @see toc + */ + auto tic() +{ + MyStopWatch.push(Clock::now()); + return MyStopWatch.top(); +} + +/** Returns the elapsed time from stopwatch. + * + * The time point from top of the stack is used + * if time point @p t_b is not passed as input parameter. + * Use as @code tic(); myfunction(...) ; double tsec = toc(); @endcode + * or as @code auto t_b = tic(); myfunction(...) ; double tsec = toc(t_b); @endcode + * The last option is to be used in the case of + * non-nested but overlapping time measurements. + * + * @param[in] t_b start time of some stop watch + * @return elapsed time in seconds. + * +*/ + double toc(TPoint const &t_b = MyStopWatch.top()) +{ + // https://en.cppreference.com/w/cpp/chrono/treat_as_floating_point + using Unit = std::chrono::seconds; + using FpSeconds = std::chrono::duration; + auto t_e = Clock::now(); + MyStopWatch.pop(); + return FpSeconds(t_e-t_b).count(); +} diff --git a/sheet1/F/timing.h:Zone.Identifier b/sheet1/F/timing.h:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/sheet1/G/.vscode/settings.json b/sheet1/G/.vscode/settings.json new file mode 100644 index 0000000..77853c5 --- /dev/null +++ b/sheet1/G/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "files.associations": { + "vector": "cpp", + "ostream": "cpp" + } +} \ No newline at end of file diff --git a/sheet1/G/Makefile b/sheet1/G/Makefile new file mode 100644 index 0000000..f5bc097 --- /dev/null +++ b/sheet1/G/Makefile @@ -0,0 +1,30 @@ +# +# use GNU-Compiler tools +COMPILER=GCC_ +# alternatively from the shell +# export COMPILER=GCC_ +# or, alternatively from the shell +# make COMPILER=GCC_ + +# use Intel compilers +#COMPILER=ICC_ + +# use PGI compilers +# COMPILER=PGI_ + + +SOURCES = main.cpp mylib.cpp +OBJECTS = $(SOURCES:.cpp=.o) + +PROGRAM = main.${COMPILER} + +# uncomment the next to lines for debugging and detailed performance analysis +CXXFLAGS += -g +LINKFLAGS += -g +# do not use -pg with PGI compilers + +ifndef COMPILER + COMPILER=GCC_ +endif + +include ../${COMPILER}default.mk diff --git a/sheet1/G/Makefile:Zone.Identifier b/sheet1/G/Makefile:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/sheet1/G/main.GCC_ b/sheet1/G/main.GCC_ new file mode 100755 index 0000000..8da8f6a Binary files /dev/null and b/sheet1/G/main.GCC_ differ diff --git a/sheet1/G/main.cpp b/sheet1/G/main.cpp new file mode 100644 index 0000000..d344a23 --- /dev/null +++ b/sheet1/G/main.cpp @@ -0,0 +1,109 @@ +#include "mylib.h" +#include +#include +#include +using namespace std; +int main() +{ + + //b) + DenseMatrix const M(5,3); // Dense matrix, also initialized + vector const u{{1,2,3}}; + vector f1 = M.Mult(u); + vector const v{{-1,2,-3,4,-5}}; + vector f2 = M.MultT(v); + + for(unsigned int i = 0; i < f1.size(); i++) + { + cout << f1.at(i) << " "; + } + cout << endl; + for(unsigned int i = 0; i < f2.size(); i++) + { + cout << f2.at(i) << " "; + } + cout << endl; + + + //c) + int n=5000; + DenseMatrix const A(n,n); + vector w = {}; + for(int i = -n/2; i < n/2; i++) + { + w.push_back(i); + } + + + int const NLOOPS=100; + + double t1 = clock(); // start timer + vector f3 = A.Mult(w); + for (int k=1; k f4 = A.MultT(w); + for (int k=1; k maxDiff) { + maxDiff = diff; + } + } + cout << "difference f3 and f4: " << maxDiff << endl; + + + //d) + cout << "-----------------DYADIC------------------" < x = {}; + for(int k=0; k < n; k++) + { + x.push_back(f(k, n)); + } + Dyadic const D(x,x); + + double t3 = clock(); // start timer + vector f5 = D.Mult(w); + for (int k=1; k f6 = D.MultT(w); + for (int k=1; k maxDiff2) { + maxDiff2 = diff; + } + } + cout << "difference f5 and f6: " << maxDiff2 << endl; + + + + return 0; +} diff --git a/sheet1/G/main.o b/sheet1/G/main.o new file mode 100644 index 0000000..1ca5573 Binary files /dev/null and b/sheet1/G/main.o differ diff --git a/sheet1/G/mylib.cpp b/sheet1/G/mylib.cpp new file mode 100644 index 0000000..8592c4c --- /dev/null +++ b/sheet1/G/mylib.cpp @@ -0,0 +1,109 @@ +#include "mylib.h" +#include +#include +using namespace std; + +double f(unsigned int k, unsigned int nm) { + + return 1.0 / (1.0 + exp(-(10.0*k/(nm-1)-5))); +} + +DenseMatrix::DenseMatrix(unsigned int n, unsigned int m): n_(n), m_(m), data_(n*m) +{ + unsigned int nm = max(n,m); + for(unsigned int rowIt = 0; rowIt < n_; rowIt++) + { + for(unsigned int colIt=0; colIt DenseMatrix::Mult(const vector &u) const{ + if(u.size() != m_) + { + cout << "Dimension mismatch: expected " << m_ << " but got " << u.size() << "!" << endl; + return {}; + } + vector f1 = {}; + double sum; + for(unsigned int rowIt = 0; rowIt < n_; rowIt++) + { + sum = 0; + for(unsigned int colIt=0; colIt DenseMatrix::MultT(const vector &v) const{ + if(v.size() != n_) + { + cout << "Dimension mismatch: expected " << n_ << " but got " << v.size() << "!" << endl; + return {}; + } + vector f2 = {}; + double sum; + for(unsigned int colIt = 0; colIt < m_; colIt++) + { + sum = 0; + for(unsigned int rowIt=0; rowIt & u, vector& v):u_(u), v_(v){ + +} + +vector Dyadic::Mult(const vector &w) const{ + unsigned int m = v_.size(); + if(w.size() != m) + { + cout << "Dimension mismatch!" << endl; + return {}; + } + vector f1 = {}; + double skalar = 0; + for(unsigned int vIt = 0; vIt < m; vIt++) + { + skalar += v_.at(vIt)*w.at(vIt); + } + for(unsigned int uIt=0; uIt < u_.size(); uIt++) + { + f1.push_back(u_.at(uIt)*skalar); + } + + return f1; +} + +vector Dyadic::MultT(const vector &w) const{ + unsigned int n = u_.size(); + if(w.size() != n) + { + cout << "Dimension mismatch!" << endl; + return {}; + } + vector f1 = {}; + double skalar = 0; + for(unsigned int uIt = 0; uIt < n; uIt++) + { + skalar += u_.at(uIt)*w.at(uIt); + } + for(unsigned int vIt=0; vIt < v_.size(); vIt++) + { + f1.push_back(v_.at(vIt)*skalar); + } + + return f1; +} \ No newline at end of file diff --git a/sheet1/G/mylib.h b/sheet1/G/mylib.h new file mode 100644 index 0000000..5357490 --- /dev/null +++ b/sheet1/G/mylib.h @@ -0,0 +1,36 @@ +#ifndef MYLIB_H_INCLUDED +#define MYLIB_H_INCLUDED + +#include +using namespace std; + +double f(unsigned int k, unsigned int nm); + +class DenseMatrix{ + + public: + DenseMatrix(unsigned int n, unsigned int m); + + vector Mult(const vector &u) const; + vector MultT(const vector &v) const; + + + private: + unsigned int n_,m_; + vector data_; +}; + +class Dyadic{ + public: + Dyadic(vector& u, vector& v); + + vector Mult(const vector &u) const; + vector MultT(const vector &v) const; + + + private: + vector u_; + vector v_; +}; + +#endif // MYLIB_H_INCLUDED diff --git a/sheet1/G/mylib.o b/sheet1/G/mylib.o new file mode 100644 index 0000000..dbce3aa Binary files /dev/null and b/sheet1/G/mylib.o differ diff --git a/sheet1/GCC_AMD32_default.mk b/sheet1/GCC_AMD32_default.mk new file mode 100644 index 0000000..a911b6b --- /dev/null +++ b/sheet1/GCC_AMD32_default.mk @@ -0,0 +1,130 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=GCC_ + +CC = gcc +CXX = g++ +F77 = gfortran +LINKER = ${CXX} + +# on mephisto: +#CXXFLAGS += -I/share/apps/atlas/include +#LINKFLAGS += -L/share/apps/atlas/lib +#LINKFLAGS += -lcblas -latlas + +#LINKFLAGS += -lblas +# Der Header muss mit extern "C" versehen werden, damit g++ alles findet. + + +#WARNINGS = -pedantic -pedantic-errors -Wall -Wextra -Werror -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow +WARNINGS = -pedantic -Wall -Wextra -Wconversion -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \ + -Wredundant-decls -Winline -fmax-errors=1 +# -Wunreachable-code +# -Wunreachable-code +CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS} +#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS} +#-msse3 +# -ftree-vectorizer-verbose=2 -DNDEBUG +# -ftree-vectorizer-verbose=5 +# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr + +# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details +# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2 +# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +LINKFLAGS += -O3 + +# BLAS, LAPACK +OPENBLAS_DIR = /opt/openblas_GCCseq +#OPENBLAS_DIR = /opt/openblas_GCC +OPENBLAS_LIBDIR = ${OPENBLAS_DIR}/lib +OPENBLAS_INCDIR = ${OPENBLAS_DIR}/include +CXXFLAGS += -I${OPENBLAS_INCDIR} +LINKFLAGS += -L${OPENBLAS_LIBDIR} -lopenblas + +# interprocedural optimization +CXXFLAGS += -flto +LINKFLAGS += -flto + +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + -@rm -f *_ *~ *.bak *.log *.out *.tar *.orig + -@rm -r html + +run: clean ${PROGRAM} +# time ./${PROGRAM} +# ./${PROGRAM} + ( export LD_LIBRARY_PATH=${OPENBLAS_LIBDIR}:${LD_LIBRARY_PATH} ; ./${PROGRAM} ) +# or 'export LD_LIBRARY_PATH=/opt/openblas_gcc/lib:${LD_LIBRARY_PATH}' in your ~/.bashrc + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +# no "-pg" in compile/link options +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ + +# Simple run time profiling of your code +# CXXFLAGS += -g -pg +# LINKFLAGS += -pg +prof: ${PROGRAM} + ./$^ + gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +#Trace your heap: +#> heaptrack ./main.GCC_ +#> heaptrack_gui heaptrack.main.GCC_..gz +heap: ${PROGRAM} + heaptrack ./$^ 11 + heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` & + + + +######################################################################## +# get the detailed status of all optimization flags +info: + echo "detailed status of all optimization flags" + $(CXX) --version + $(CXX) -Q $(CXXFLAGS) --help=optimizers diff --git a/sheet1/GCC_default.mk b/sheet1/GCC_default.mk new file mode 100644 index 0000000..803c060 --- /dev/null +++ b/sheet1/GCC_default.mk @@ -0,0 +1,183 @@ +# Basic Defintions for using GNU-compiler suite sequentially +# requires setting of COMPILER=GCC_ + +CC = gcc +CXX = g++ +F77 = gfortran +LINKER = ${CXX} + +WARNINGS = -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow \ + -Wredundant-decls -Winline -fmax-errors=1 +# -Wunreachable-code +CXXFLAGS += -ffast-math -O3 -march=native -std=c++17 ${WARNINGS} +#CXXFLAGS += -Ofast -funroll-all-loops -std=c++17 ${WARNINGS} +#-msse3 +# -ftree-vectorizer-verbose=2 -DNDEBUG +# -ftree-vectorizer-verbose=5 +# -ftree-vectorize -fdump-tree-vect-blocks=foo.dump -fdump-tree-pre=stderr + +# CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp -fdump-tree-vect-details +# CFLAGS = -ffast-math -O3 -funroll-loops -DNDEBUG -msse3 -fopenmp -ftree-vectorizer-verbose=2 +# #CFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# FFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +# LFLAGS = -ffast-math -O3 -DNDEBUG -msse3 -fopenmp +LINKFLAGS += -O3 + +#architecture +#CPU = -march=znver2 +CXXFLAGS += ${CPU} +LINKFLAGS += ${CPU} + +# different libraries in Ubuntu or manajaró +ifndef UBUNTU +UBUNTU=1 +endif + +# BLAS, LAPACK +ifeq ($(UBUNTU),1) +LINKFLAGS += -llapack -lblas +# -lopenblas +else +# on archlinux +LINKFLAGS += -llapack -lopenblas -lcblas +endif + +# interprocedural optimization +CXXFLAGS += -flto +LINKFLAGS += -flto + +# for debugging purpose (save code) +# -fsanitize=leak # only one out the three can be used +# -fsanitize=address +# -fsanitize=thread +SANITARY = -fsanitize=address -fsanitize=undefined -fsanitize=null -fsanitize=return \ + -fsanitize=bounds -fsanitize=alignment -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow \ + -fsanitize=bool -fsanitize=enum -fsanitize=vptr +#CXXFLAGS += ${SANITARY} +#LINKFLAGS += ${SANITARY} + +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg + + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + -@rm -f *_ *~ *.bak *.log *.out *.tar *.orig *.optrpt + -@rm -rf html + +run: clean ${PROGRAM} +#run: ${PROGRAM} +# time ./${PROGRAM} ${PARAMS} + ./${PROGRAM} ${PARAMS} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * +#find . -size +10M > large_files +#--exclude-from ${MY_DIR}/large_files + +zip: clean + @echo "Zip the directory: " ${MY_DIR} + @cd .. ;\ + zip -r ${MY_DIR}.zip ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} + +doc: + doxygen Doxyfile + +######################################################################### +.SUFFIXES: .f90 + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< +# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $<.log +# $(CXX) -c $(CXXFLAGS) -o $@ $< 2>&1 | tee -a $(<:.cpp=.log) + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +.f90.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines; no -pg in linkflags) +cache: ${PROGRAM} + valgrind --tool=callgrind --simulate-cache=yes ./$^ ${PARAMS} +# kcachegrind callgrind.out. & + kcachegrind `ls -1tr callgrind.out.* |tail -1` + +# Check for wrong memory accesses, memory leaks, ... +# use smaller data sets +# no "-pg" in compile/link options +mem: ${PROGRAM} + valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ ${PARAMS} +# Graphical interface +# valkyrie + +# Simple run time profiling of your code +# CXXFLAGS += -g -pg +# LINKFLAGS += -pg +prof: ${PROGRAM} + perf record ./$^ ${PARAMS} + perf report +# gprof -b ./$^ > gp.out +# kprof -f gp.out -p gprof & + +# perf in Ubuntu 20.04: https://www.howtoforge.com/how-to-install-perf-performance-analysis-tool-on-ubuntu-20-04/ +# * install +# * sudo vi /etc/sysctl.conf +# add kernel.perf_event_paranoid = 0 + +#Trace your heap: +#> heaptrack ./main.GCC_ +#> heaptrack_gui heaptrack.main.GCC_..gz +heap: ${PROGRAM} + heaptrack ./$^ ${PARAMS} + heaptrack_gui `ls -1tr heaptrack.$^.* |tail -1` & + +codecheck: $(SOURCES) + cppcheck --enable=all --inconclusive --std=c++17 --suppress=missingIncludeSystem $^ + + +######################################################################## +# get the detailed status of all optimization flags +info: + echo "detailed status of all optimization flags" + $(CXX) --version + $(CXX) -Q $(CXXFLAGS) --help=optimizers + lscpu + inxi -C + lstopo + +# Excellent hardware info +# hardinfo +# Life monitoring of CPU frequency etc. +# sudo i7z + +# Memory consumption +# vmstat -at -SM 3 +# xfce4-taskmanager + + +# https://www.tecmint.com/check-linux-cpu-information/ +#https://www.tecmint.com/monitor-cpu-and-gpu-temperature-in-ubuntu/ + +# Debugging: +# https://wiki.archlinux.org/index.php/Debugging diff --git a/sheet1/ICC_default.mk b/sheet1/ICC_default.mk new file mode 100644 index 0000000..d4bd4db --- /dev/null +++ b/sheet1/ICC_default.mk @@ -0,0 +1,137 @@ +# Basic Defintions for using INTEL compiler suite sequentially +# requires setting of COMPILER=ICC_ + +#BINDIR = /opt/intel/bin/ + +# special on my sony [GH] +#BINDIR = /opt/save.intel/bin/ +# very special on my sony [GH] +# FIND_LIBS = -L /opt/save.intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.so + +# Error with g++-4.8 using icpc14.0,x: +# find directory wherein bits/c++config.h is located +# 'locate bits/c++config.h' +#FOUND_CONFIG = -I/usr/include/x86_64-linux-gnu/c++/4.8 + + +CC = ${BINDIR}icc +CXX = ${BINDIR}icpc +F77 = ${BINDIR}ifort +LINKER = ${CXX} + + +WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -wd2015,2012 -wn3 +# -Winline -Wredundant-decls -Wunreachable-code +CXXFLAGS += -O3 -fargument-noalias -std=c++17 -DNDEBUG ${WARNINGS} -mkl ${FOUND_CONFIG} +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg +# -vec-report=3 +# -qopt-report=5 -qopt-report-phase=vec +# -guide -parallel +# -guide-opts=string -guide-par[=n] -guide-vec[=n] +# -auto-p32 -simd +CXXFLAGS += -align + +# use MKL by INTEL +# https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-quick-start/using-the-mkl-compiler-option.html +# https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html +# LINKFLAGS += -L${BINDIR}../composer_xe_2013.1.117/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread +#LINKFLAGS += -O3 -L/opt/intel/mkl/lib -mkl +LINKFLAGS += -O3 -mkl=sequential + +# interprocedural optimization +CXXFLAGS += -ipo +LINKFLAGS += -ipo + +# annotated assembler file +ANNOTED = -fsource-asm -S + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +run: clean ${PROGRAM} + ./${PROGRAM} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# # some tools +# # Cache behaviour (CXXFLAGS += -g tracks down to source lines) +# cache: ${PROGRAM} +# valgrind --tool=callgrind --simulate-cache=yes ./$^ +# # kcachegrind callgrind.out. & +# +# # Check for wrong memory accesses, memory leaks, ... +# # use smaller data sets +# mem: ${PROGRAM} +# valgrind -v --leak-check=yes --tool=memcheck --undef-value-errors=yes --track-origins=yes --log-file=$^.addr.out --show-reachable=yes ./$^ +# +# # Simple run time profiling of your code +# # CXXFLAGS += -g -pg +# # LINKFLAGS += -pg +# prof: ${PROGRAM} +# ./$^ +# gprof -b ./$^ > gp.out +# # kprof -f gp.out -p gprof & +# + + +mem: inspector +prof: amplifier +cache: amplifier + +gap_par_report: + ${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt + +# GUI for performance report +amplifier: ${PROGRAM} + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +# alternatively to the solution above: + #edit file /etc/sysctl.d/10-ptrace.conf and set variable kernel.yama.ptrace_scope variable to 0 . + amplxe-gui & + +# GUI for Memory and Thread analyzer (race condition) +inspector: ${PROGRAM} + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + inspxe-gui & + +advisor: + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + advixe-gui & + +icc-info: + icpc -# main.cpp + + + + diff --git a/sheet1/ONEAPI_default.mk b/sheet1/ONEAPI_default.mk new file mode 100644 index 0000000..fe7b3fe --- /dev/null +++ b/sheet1/ONEAPI_default.mk @@ -0,0 +1,176 @@ +# Basic Defintions for using INTEL compiler suite sequentially +# requires setting of COMPILER=ONEAPI_ + +# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html +# requires +# source /opt/intel/oneapi/setvars.sh +# on AMD: export MKL_DEBUG_CPU_TYPE=5 + +#BINDIR = /opt/intel/oneapi/compiler/latest/linux/bin/ +#MKL_ROOT = /opt/intel/oneapi/mkl/latest/ +#export KMP_AFFINITY=verbose,compact + +CC = ${BINDIR}icc +CXX = ${BINDIR}dpcpp +F77 = ${BINDIR}ifort +LINKER = ${CXX} + +## Compiler flags +WARNINGS = -Wall -Weffc++ -Woverloaded-virtual -Wfloat-equal -Wshadow -pedantic +WARNINGS += -Wpessimizing-move -Wredundant-move +#-wd2015,2012,2014 -wn3 +# -Winline -Wredundant-decls -Wunreachable-code +# -qopt-subscript-in-range +# -vec-threshold0 + +CXXFLAGS += -O3 -std=c++17 ${WARNINGS} +#CXXFLAGS += -DMKL_ILP64 -I"${MKLROOT}/include" +#CXXFLAGS += -DMKL_ILP32 -I"${MKLROOT}/include" +LINKFLAGS += -O3 + +# interprocedural optimization +CXXFLAGS += -ipo +LINKFLAGS += -ipo +LINKFLAGS += -flto + +# annotated Assembler file +ANNOTED = -fsource-asm -S + +#architecture +CPU = -march=core-avx2 +#CPU += -mtp=zen +# -xCORE-AVX2 +# -axcode COMMON-AVX512 -axcode MIC-AVX512 -axcode CORE-AVX512 -axcode CORE-AVX2 +CXXFLAGS += ${CPU} +LINKFLAGS += ${CPU} + +# use MKL by INTEL +# https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl/link-line-advisor.html +# sequential MKL +# use the 32 bit interface (LP64) instead of 64 bit interface (ILP64) +CXXFLAGS += -qmkl=sequential -UMKL_ILP64 +LINKFLAGS += -O3 -qmkl=sequential -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread +#LINKFLAGS += -O3 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +# shared libs: https://aur.archlinux.org/packages/intel-oneapi-compiler-static +# install intel-oneapi-compiler-static +# or +LINKFLAGS += -shared-intel + + +OPENMP = -qopenmp +CXXFLAGS += ${OPENMP} +LINKFLAGS += ${OPENMP} + + +# profiling tools +#CXXFLAGS += -pg +#LINKFLAGS += -pg +# -vec-report=3 +# -qopt-report=5 -qopt-report-phase=vec -qopt-report-phase=openmp +# -guide -parallel +# -guide-opts=string -guide-par[=n] -guide-vec[=n] +# -auto-p32 -simd + +# Reports: https://software.intel.com/en-us/articles/getting-the-most-out-of-your-intel-compiler-with-the-new-optimization-reports +#CXXFLAGS += -qopt-report=5 -qopt-report-phase=vec,par +#CXXFLAGS += -qopt-report=5 -qopt-report-phase=cg +# Redirect report from *.optrpt to stderr +# -qopt-report-file=stderr +# Guided paralellization +# -guide -parallel +# -guide-opts=string -guide-par[=n] -guide-vec[=n] +# -auto-p32 -simd + +## run time checks +# https://www.intel.com/content/www/us/en/develop/documentation/fortran-compiler-oneapi-dev-guide-and-reference/top/compiler-reference/compiler-options/offload-openmp-and-parallel-processing-options/par-runtime-control-qpar-runtime-control.html + + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + rm -f ${PROGRAM} ${OBJECTS} *.optrpt + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +run: clean ${PROGRAM} + ./${PROGRAM} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# some tools +# Cache behaviour (CXXFLAGS += -g tracks down to source lines) +# https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/analyze-performance/microarchitecture-analysis-group/memory-access-analysis.html + +mem: inspector +prof: vtune +cache: inspector + +gap_par_report: + ${CXX} -c -guide -parallel $(SOURCES) 2> gap.txt + +# GUI for performance report +amplifier: ${PROGRAM} + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid + amplxe-gui & + +# GUI for Memory and Thread analyzer (race condition) +inspector: ${PROGRAM} + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +# inspxe-gui & + vtune-gui ./${PROGRAM} & + +advisor: + source /opt/intel/oneapi/advisor/2021.2.0/advixe-vars.sh +# /opt/intel/oneapi/advisor/latest/bin64/advixe-gui & + advisor --collect=survey ./${PROGRAM} +# advisor --collect=roofline ./${PROGRAM} + advisor --report=survey --project-dir=./ src:r=./ --format=csv --report-output=./out/survey.csv + +vtune: + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +# https://software.intel.com/en-us/articles/intel-advisor-2017-update-1-what-s-new + export ADVIXE_EXPERIMENTAL=roofline + vtune -collect hotspots ./${PROGRAM} + vtune -report hotspots -r r000hs > vtune.out +# vtune-gui ./${PROGRAM} & + +icc-info: + icpc -# main.cpp + +# MKL on AMD +# https://www.computerbase.de/2019-11/mkl-workaround-erhoeht-leistung-auf-amd-ryzen/ +# +# https://sites.google.com/a/uci.edu/mingru-yang/programming/mkl-has-bad-performance-on-an-amd-cpu +# export MKL_DEBUG_CPU_TYPE=5 +# export MKL_NUM_THRAEDS=1 +# export MKL_DYNAMIC=false +# on Intel compiler +# http://publicclu2.blogspot.com/2013/05/intel-complier-suite-reference-card.html diff --git a/sheet1/PGI_default.mk b/sheet1/PGI_default.mk new file mode 100644 index 0000000..40760e5 --- /dev/null +++ b/sheet1/PGI_default.mk @@ -0,0 +1,93 @@ +# Basic Defintions for using PGI-compiler suite sequentially +# requires setting of COMPILER=PGI_ +# OPTIRUN = optirun + + +CC = pgcc +CXX = pgc++ +F77 = pgfortran +LINKER = ${CXX} + +# on mephisto: +#CXXFLAGS += -I/share/apps/atlas/include +#LINKFLAGS += -L/share/apps/atlas/lib +#LINKFLAGS += -lcblas -latlas + +#LINKFLAGS += -lblas +# Der Header muss mit extern "C" versehen werden, damit g++ alles findet. + +WARNINGS = -Minform=warn +# -Wall -pedantic -Wextra -Weffc++ -Woverloaded-virtual -W -Wfloat-equal -Wshadow -Wredundant-decls +# -pedantic -Wunreachable-code -Wextra -Winline +# -Wunreachable-code + +#PGI_PROFILING = -Minfo=ccff,loop,vect,opt,intensity,mp,accel +PGI_PROFILING = -Minfo=ccff,accel,ipa,loop,lre,mp,opt,par,unified,vect,intensity +# -Minfo +# -Mprof=time +# -Mprof=lines +# take care with option -Msafeptr +CXXFLAGS += -O3 -std=c++17 ${WARNINGS} +#CXXFLAGS += -O3 -std=c++11 -DNDEBUG ${PGI_PROFILING} ${WARNINGS} +# -fastsse -fargument-noalias ${WARNINGS} -msse3 -vec-report=3 + +default: ${PROGRAM} + +${PROGRAM}: ${OBJECTS} + $(LINKER) $^ ${LINKFLAGS} -o $@ + +clean: + @rm -f ${PROGRAM} ${OBJECTS} + +clean_all:: clean + @rm -f *_ *~ *.bak *.log *.out *.tar + +run: clean ${PROGRAM} + ./${PROGRAM} + +# tar the current directory +MY_DIR = `basename ${PWD}` +tar: clean_all + @echo "Tar the directory: " ${MY_DIR} + @cd .. ;\ + tar cf ${MY_DIR}.tar ${MY_DIR} *default.mk ;\ + cd ${MY_DIR} +# tar cf `basename ${PWD}`.tar * + +doc: + doxygen Doxyfile + +######################################################################### + +.cpp.o: + $(CXX) -c $(CXXFLAGS) -o $@ $< + +.c.o: + $(CC) -c $(CFLAGS) -o $@ $< + +.f.o: + $(F77) -c $(FFLAGS) -o $@ $< + +################################################################################################## +# # some tools +# # Simple run time profiling of your code +# # CXXFLAGS += -g -pg +# # LINKFLAGS += -pg + + +# Profiling options PGI, see: pgcollect -help +# CPU_PROF = -allcache +CPU_PROF = -time +# GPU_PROF = -cuda=gmem,branch,cc13 -cudainit +#GPU_PROF = -cuda=branch:cc20 +# +PROF_FILE = pgprof.out + +cache: prof + +prof: ${PROGRAM} + ${OPTIRUN} ${BINDIR}pgcollect $(CPU_PROF) ./$^ + ${OPTIRUN} ${BINDIR}pgprof -exe ./$^ $(PROF_FILE) & + +info: + pgaccelinfo -v