#include "check_env.h" #include "mylib.h" #include // atoi() #include // strncmp() #include #include #include // OpenMP #include #include using namespace std; int main(int argc, char const *argv[]) { omp_set_schedule(omp_sched_static, 2000000); //omp_set_schedule(omp_sched_dynamic, 1000000); //omp_set_schedule(omp_sched_guided, 1000000); //omp_set_schedule(omp_sched_auto, 1); // chunk size does not matter for auto // Speedup for different number of cores (incl. hyperthreading) omp_set_num_threads(8); // Print number of available processors cout << "Number of available processors: " << omp_get_num_procs() << endl; // Currently executing parallel code? -> no cout << "Currently in parallel? " << omp_in_parallel() << endl; int const NLOOPS = 10; // chose a value such that the benchmark runs at least 10 sec. unsigned int N = 500000001; //########################################################################## // Read Parameter from command line (C++ style) cout << "Checking command line parameters for: -n " << endl; for (int i = 1; i < argc; i++) { cout << " arg[" << i << "] = " << argv[i] << endl; string ss(argv[i]); if ("-n"==ss && i + 1 < argc) // found "-n" followed by another parameter { N = static_cast(atoi(argv[i + 1])); } else { cout << "Corect call: " << argv[0] << " -n \n"; } } cout << "\nN = " << N << endl; check_env(argc, argv); //######################################################################## int nthreads; // OpenMP #pragma omp parallel default(none) shared(cout,nthreads) { stringstream inparallel; inparallel << "Currently in parallel? " << omp_in_parallel() << endl; int const th_id = omp_get_thread_num(); // OpenMP int const nthrds = omp_get_num_threads(); // OpenMP stringstream ss; ss << "C++: Hello World from thread " << th_id << " / " << nthrds << endl; #pragma omp critical { cout << ss.str(); // output to a shared ressource cout << inparallel.str() << endl; } #pragma omp master nthreads = nthrds; // transfer nn to to master thread } cout << " " << nthreads << " threads have been started." << endl; //########################################################################## // Memory allocation cout << "Memory allocation\n"; vector x(N), y(N); cout.precision(2); cout << 2.0 * N *sizeof(x[0]) / 1024 / 1024 / 1024 << " GByte Memory allocated\n"; cout.precision(6); //########################################################################## // Data initialization // Special: x_i = i+1; y_i = 1/x_i ==> == N for (unsigned int i = 0; i < N; ++i) { x[i] = i + 1; y[i] = 1.0 / x[i]; } //########################################################################## cout << "\nStart Benchmarking\n"; // Do calculation double tstart = omp_get_wtime(); // OpenMP double sk(0.0); for (int i = 0; i < NLOOPS; ++i) { //sk = scalar(x, y); sk = scalar_parallel(x, y); //sk = scalar_trans(x, y); //sk = norm(x); } double t1 = omp_get_wtime() - tstart; // OpenMP t1 /= NLOOPS; // divide by number of function calls //########################################################################## // Check the correct result cout << "\n = " << sk << endl; if (static_cast(sk) != N) { cout << " !! W R O N G result !!\n"; } cout << endl; //########################################################################## // Timings and Performance cout << endl; cout.precision(2); cout << "Total benchmarking time: " << t1*NLOOPS << endl; cout << "Timing in sec. : " << t1 << endl; cout << "GFLOPS : " << 2.0 * N / t1 / 1024 / 1024 / 1024 << endl; cout << "GiByte/s : " << 2.0 * N / t1 / 1024 / 1024 / 1024 * sizeof(x[0]) << endl; //######################################################################### cout << "\n Try the reduction with an STL-vektor \n"; auto vr = reduction_vec_append(5); cout << "done\n"; cout << vr << endl; return 0; } // memory for x and y will be deallocated their destructors