#include <mpi.h>
Go to the source code of this file.
Functions | |
void | JacobiSolve_GPU_4 (const int nnode, const int nx, const int ny, const int neigh[], const int color, const MPI::Intracomm &icomm, const int id_d[], const int ik_d[], const float sk_d[], const float f_d[], float u_d[], float r_d[], float w_d[], float dd_d[], const dim3 dimGrid, const dim3 dimBlock) |
void | JacobiSolve_GPU (const int nnode, const int id[], const int ik[], const float sk[], const float f[], float u[]) |
float | dscapr_GPU (const int n, const float x[], const float y[]) |
Calculates the Euclidian inner product of two vectors on GPU. More... | |
void | vddiv_GPU (const int n, float x[], const float y[], const float z[]) |
Element-wise vector divison x = y/z. More... | |
void | vdaxpy_GPU (const int n, float x[], const float y[], const float a, const float z[]) |
Element-wise vector multiplication x = y*z. A stride iy =0 for the source stride adds a constant value stored in y [0]. The same holds for vector z . More... | |
void | set_value_GPU (const int n, float x[], const float a) |
void | Defect_GPU (float w[], const float f[], const float u[], const int nnode, const int id[], const int ik[], const float sk[]) |
void | GetDiag_GPU (const int nnode, const int id[], const int ik[], const float sk[], float d[]) |
void Defect_GPU | ( | float | w[], |
const float | f[], | ||
const float | u[], | ||
const int | nnode, | ||
const int | id[], | ||
const int | ik[], | ||
const float | sk[] | ||
) |
float dscapr_GPU | ( | const int | n, |
const float | x[], | ||
const float | y[] | ||
) |
Calculates the Euclidian inner product of two vectors on GPU.
[in] | n | number of elements in both vectors. |
[in] | x | source vector on GPU |
[in] | y | source vector on GPU |
void GetDiag_GPU | ( | const int | nnode, |
const int | id[], | ||
const int | ik[], | ||
const float | sk[], | ||
float | d[] | ||
) |
Copies the diagonal entries of a CSR matrix (sk
,id
, ik
) onto a vector d
.
[in] | nnode | number of rows |
[in] | id | index vector containing the last entry in a CSR row |
[in] | ik | column index vector of CSR matrix |
[in] | sk | vector non-zero entries of CSR matrix |
[out] | d | accumulated diagonal entries of CSR matrix |
void JacobiSolve_GPU | ( | const int | nnode, |
const int | id[], | ||
const int | ik[], | ||
const float | sk[], | ||
const float | f[], | ||
float | u[] | ||
) |
Solves linear system of equations K u
= f
via the Jacobi iteration. We use a distributed symmetric CSR matrix K(sk
,id
, ik
) and initial guess of the solution is set to 0.
[in] | nnode | number of nodes and number of rows |
[in] | sk | vector non-zero entries of CSR matrix |
[in] | id | index vector containing the last entry in a CSR row |
[in] | ik | column index vector of CSR matrix |
[in] | f | distributed local vector storing the right hand side |
[out] | u | accumulated local vector storing the solution. |
void JacobiSolve_GPU_4 | ( | const int | nnode, |
const int | nx, | ||
const int | ny, | ||
const int | neigh[], | ||
const int | color, | ||
const MPI::Intracomm & | icomm, | ||
const int | id_d[], | ||
const int | ik_d[], | ||
const float | sk_d[], | ||
const float | f_d[], | ||
float | u_d[], | ||
float | r_d[], | ||
float | w_d[], | ||
float | dd_d[], | ||
const dim3 | dimGrid, | ||
const dim3 | dimBlock | ||
) |
Solves linear system of equations K u
= f
via the Jacobi iteration. We use a distributed symmetric CSR matrix K(sk
,id
, ik
) and initial guess of the solution is set to 0.
[in] | nnode | number of nodes and number of rows |
[in] | nx | local number intervals in x-direction |
[in] | ny | local number intervals in x-direction |
[in] | neigh | neighbors in parallel |
[in] | color | my color in parallel |
[in] | sk_d | on GPU: vector non-zero entries of CSR matrix |
[in] | id_d | on GPU: index vector containing the last entry in a CSR row |
[in] | ik_d | on GPU: column index vector of CSR matrix |
[in] | f_d | on GPU: distributed local vector storing the right hand side |
[out] | u_d | on GPU: accumulated local vector storing the solution. |
[in,out] | r_d | on GPU: residuum (aux. vector) |
[in,out] | w_d | on GPU: correction (aux. vector) |
[in,out] | dd_d | on GPU: diagonale values(aux. vector) |
[in] | dimGrid | number of blockthreads |
[in] | dimBlock | number of threads per block |
void set_value_GPU | ( | const int | n, |
float | x[], | ||
const float | a | ||
) |
void vdaxpy_GPU | ( | const int | n, |
float | x[], | ||
const float | y[], | ||
const float | a, | ||
const float | z[] | ||
) |
Element-wise vector multiplication x = y*z. A stride iy
=0 for the source stride adds a constant value stored in y
[0]. The same holds for vector z
.
[in] | n | number of elements to multiply |
[out] | x | target vector |
[in] | ix | stride for target vector x |
[in] | y | source vector |
[in] | iy | stride for source vector y |
[in] | z | source vector |
[in] | iz | stride for source vector z |
void vddiv_GPU | ( | const int | n, |
float | x[], | ||
const float | y[], | ||
const float | z[] | ||
) |
Element-wise vector divison x = y/z.
[in] | n | number of elements to divide |
[out] | x | target vector |
[in] | y | source vector |
[in] | z | source vector |