Compiling Code:
> pgcc -fast -acc
-ta=nvidia:cc2+,cuda5.5,fastmath skalar.cc -o main.PGI_
> ./main.PGI_
>
optirun -v ./main.PGI_
How to OpenACC parallelize the inner product:
Original code for inner product:double scalar(const int N, const double x[], const double y[]) { double sum = 0.0;
for (int i=0; i<N; ++i) { sum += x[i]*y[i]; } return sum; }
int main()
{
...
double s = scalar(n,a,b);
...
}
// local sequential inner product
double scalar(const int N, const double x[], const double y[]) { double sum = 0.0;
#pragma acc kernels loop present_or_copyin(x[0:N], y[0:N]) independent reduction(+:sum)
for (int i=0; i<N; ++i)
sum += x[i]*y[i]; return sum; }
int main(int argc, char* argv[])
{
...
#pragma acc data copyin(a[0:nnode],b[0:nnode])
{
...
double s = scalar(n,a,b);
...
}
...
}
pgc++
-fast
-acc
-ta=nvidia:cc2+,cuda5.5,fastmath skalar.cpp
-o main.PGI_