cuLite v0.3.1
A lite CUDA C++ Interface
Loading...
Searching...
No Matches
culite::CuBlasHandler Class Reference

The cuBlas handler class. More...

Public Member Functions

 CuBlasHandler ()
 Constructor.
 ~CuBlasHandler ()
 Destructor.
cublasHandle_t handle ()
 Get the cuBlas handle.
cublasPointerMode_t setPointerMode (cublasPointerMode_t mode)
 Set the pointer mode for cuBLAS operations.
cublasPointerMode_t pointerMode ()
 Get the current pointer mode.
template<typename T_Scalar>
void iamax (int_t n, const T_Scalar *x, int_t incx, int_t *result)
 Finds the index of the maximum absolute value element.
template<typename T_Scalar>
void asum (int_t n, const T_Scalar *x, int_t incx, typename TypeTraits< T_Scalar >::real_type *result)
 Computes the sum of absolute values.
template<typename T_Scalar>
void nrm2 (int_t n, const T_Scalar *x, int_t incx, typename TypeTraits< T_Scalar >::real_type *result)
 Computes the Euclidean norm (L2 norm).
template<typename T_Scalar>
void copy (int_t n, const T_Scalar *x, int_t incx, T_Scalar *y, int_t incy)
 Copies a vector to another vector.
template<typename T_Scalar>
void scal (int_t n, const T_Scalar *alpha, T_Scalar *x, int_t incx)
 Scales a vector by a scalar.
template<typename T_Scalar>
void dot (int_t n, const T_Scalar *x, int_t incx, const T_Scalar *y, int_t incy, T_Scalar *result)
 Computes the dot product of two vectors.
template<typename T_Scalar>
void dotc (int_t n, const T_Scalar *x, int_t incx, const T_Scalar *y, int_t incy, T_Scalar *result)
 Computes the conjugate dot product of two vectors.
template<typename T_Scalar>
void axpy (int_t n, const T_Scalar *alpha, const T_Scalar *x, int_t incx, T_Scalar *y, int_t incy)
 Computes a vector plus scalar times a vector.
template<typename T_Scalar>
void geam (op_t opA, op_t opB, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *beta, const T_Scalar *b, int_t ldb, T_Scalar *c, int_t ldc)
 Performs parametrized matrix addition.
template<typename T_Scalar>
void dgmm (side_t side, int_t m, int_t n, const T_Scalar *a, int_t lda, const T_Scalar *x, int_t incx, T_Scalar *c, int_t ldc)
 Performs matrix-diagonal matrix multiplication.
template<typename T_Scalar>
void ger (int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *x, int_t incx, const T_Scalar *y, int_t incy, T_Scalar *a, int_t lda)
 Performs general rank-1 update (unconjugated).
template<typename T_Scalar>
void gerc (int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *x, int_t incx, const T_Scalar *y, int_t incy, T_Scalar *a, int_t lda)
 Performs general rank-1 update (conjugated).
template<typename T_Scalar>
void syr (uplo_t uplo, int_t n, const T_Scalar *alpha, const T_Scalar *x, int_t incx, T_Scalar *a, int_t lda)
 Performs symmetric rank-1 update.
template<typename T_Scalar>
void her (uplo_t uplo, int_t n, const typename TypeTraits< T_Scalar >::real_type *alpha, const T_Scalar *x, int_t incx, T_Scalar *a, int_t lda)
 Performs Hermitian rank-1 update.
template<typename T_Scalar>
void gemv (op_t op, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *x, int_t incx, const T_Scalar *beta, T_Scalar *y, int_t incy)
 Performs general matrix-vector multiplication.
template<typename T_Scalar>
void symv (uplo_t uplo, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *x, int_t incx, const T_Scalar *beta, T_Scalar *y, int_t incy)
 Performs symmetric matrix-vector multiplication.
template<typename T_Scalar>
void hemv (uplo_t uplo, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *x, int_t incx, const T_Scalar *beta, T_Scalar *y, int_t incy)
 Performs Hermitian matrix-vector multiplication.
template<typename T_Scalar>
void trmv (uplo_t uplo, op_t trans, diag_t diag, int_t n, const T_Scalar *a, int_t lda, T_Scalar *x, int_t incx)
 Performs triangular matrix-vector multiplication.
template<typename T_Scalar>
void trsv (uplo_t uplo, op_t trans, diag_t diag, int_t n, const T_Scalar *a, int_t lda, T_Scalar *x, int_t incx)
 Solves a triangular system of equations.
template<typename T_Scalar>
void gemm (op_t transa, op_t transb, int_t m, int_t n, int_t k, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const T_Scalar *beta, T_Scalar *c, int_t ldc)
 Performs general matrix-matrix multiplication.
template<typename T_Scalar>
void symm (side_t side, uplo_t uplo, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const T_Scalar *beta, T_Scalar *c, int_t ldc)
 Performs symmetric matrix-matrix multiplication.
template<typename T_Scalar>
void hemm (side_t side, uplo_t uplo, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const T_Scalar *beta, T_Scalar *c, int_t ldc)
 Performs Hermitian matrix-matrix multiplication.
template<typename T_Scalar>
void trmm (side_t side, uplo_t uplo, op_t trans, diag_t diag, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, T_Scalar *c, int_t ldc)
 Performs triangular matrix-matrix multiplication.
template<typename T_Scalar>
void trsm (side_t side, uplo_t uplo, op_t trans, diag_t diag, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, T_Scalar *b, int_t ldb)
 Solves a triangular matrix equation.
template<typename T_Scalar>
void syrk (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *beta, T_Scalar *c, int_t ldc)
 Performs symmetric rank-k update.
template<typename T_Scalar>
void herk (uplo_t uplo, op_t trans, int_t n, int_t k, const typename TypeTraits< T_Scalar >::real_type *alpha, const T_Scalar *a, int_t lda, const typename TypeTraits< T_Scalar >::real_type *beta, T_Scalar *c, int_t ldc)
 Performs Hermitian rank-k update.
template<typename T_Scalar>
void syrkx (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const T_Scalar *beta, T_Scalar *c, int_t ldc)
 Performs symmetric rank-k update with two matrices.
template<typename T_Scalar>
void herkx (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const typename TypeTraits< T_Scalar >::real_type *beta, T_Scalar *c, int_t ldc)
 Performs Hermitian rank-k update with two matrices.

Detailed Description

The cuBlas handler class.

This class provides a wrapper around the cuBLAS library for performing Basic Linear Algebra Subprograms (BLAS) operations on GPU devices. It manages the cuBLAS handle and provides convenient interfaces for vector and matrix operations.

Constructor & Destructor Documentation

◆ CuBlasHandler()

culite::CuBlasHandler::CuBlasHandler ( )

Constructor.

Initializes the cuBLAS handle and internal state.

◆ ~CuBlasHandler()

culite::CuBlasHandler::~CuBlasHandler ( )

Destructor.

Destroys the cuBLAS handle and releases all allocated resources.

Member Function Documentation

◆ handle()

cublasHandle_t culite::CuBlasHandler::handle ( )

Get the cuBlas handle.

Returns
The cuBlas handle.

◆ setPointerMode()

cublasPointerMode_t culite::CuBlasHandler::setPointerMode ( cublasPointerMode_t mode)

Set the pointer mode for cuBLAS operations.

Changes the pointer mode and returns the previous mode. The pointer mode determines whether scalar values (alpha, beta) are passed by reference on the host or device.

Parameters
[in]modeThe new pointer mode to set.
Returns
The pointer mode that was active before this call.

◆ pointerMode()

cublasPointerMode_t culite::CuBlasHandler::pointerMode ( )

Get the current pointer mode.

Returns the current pointer mode setting for cuBLAS operations.

Returns
The current pointer mode.

◆ iamax()

template<typename T_Scalar>
void culite::CuBlasHandler::iamax ( int_t n,
const T_Scalar * x,
int_t incx,
int_t * result )

Finds the index of the maximum absolute value element.

Computes the index of the element with the maximum absolute value in vector x.

Template Parameters
T_ScalarThe scalar type of the vector elements.
Parameters
[in]nThe number of elements in the vector.
[in]xPointer to the device vector.
[in]incxThe stride between consecutive elements of x.
[out]resultPointer to store the result index (1-based).

◆ asum()

template<typename T_Scalar>
void culite::CuBlasHandler::asum ( int_t n,
const T_Scalar * x,
int_t incx,
typename TypeTraits< T_Scalar >::real_type * result )

Computes the sum of absolute values.

Computes \( \sum_{i=1}^{n} |x_i| \) for the elements of vector x.

Template Parameters
T_ScalarThe scalar type of the vector elements.
Parameters
[in]nThe number of elements in the vector.
[in]xPointer to the device vector.
[in]incxThe stride between consecutive elements of x.
[out]resultPointer to store the sum of absolute values.

◆ nrm2()

template<typename T_Scalar>
void culite::CuBlasHandler::nrm2 ( int_t n,
const T_Scalar * x,
int_t incx,
typename TypeTraits< T_Scalar >::real_type * result )

Computes the Euclidean norm (L2 norm).

Computes \( \sqrt{\sum_{i=1}^{n} |x_i|^2} \) for the elements of vector x.

Template Parameters
T_ScalarThe scalar type of the vector elements.
Parameters
[in]nThe number of elements in the vector.
[in]xPointer to the device vector.
[in]incxThe stride between consecutive elements of x.
[out]resultPointer to store the Euclidean norm.

◆ copy()

template<typename T_Scalar>
void culite::CuBlasHandler::copy ( int_t n,
const T_Scalar * x,
int_t incx,
T_Scalar * y,
int_t incy )

Copies a vector to another vector.

Copies vector x to vector y, computing \( y = x \).

Template Parameters
T_ScalarThe scalar type of the vector elements.
Parameters
[in]nThe number of elements in the vectors.
[in]xPointer to the source device vector.
[in]incxThe stride between consecutive elements of x.
[out]yPointer to the destination device vector.
[in]incyThe stride between consecutive elements of y.

◆ scal()

template<typename T_Scalar>
void culite::CuBlasHandler::scal ( int_t n,
const T_Scalar * alpha,
T_Scalar * x,
int_t incx )

Scales a vector by a scalar.

Computes \( x = \alpha \cdot x \) for vector x.

Template Parameters
T_ScalarThe scalar type of the vector elements.
Parameters
[in]nThe number of elements in the vector.
[in]alphaPointer to the scalar multiplier.
[in,out]xPointer to the device vector (modified in-place).
[in]incxThe stride between consecutive elements of x.

◆ dot()

template<typename T_Scalar>
void culite::CuBlasHandler::dot ( int_t n,
const T_Scalar * x,
int_t incx,
const T_Scalar * y,
int_t incy,
T_Scalar * result )

Computes the dot product of two vectors.

Computes \( result = x^T \cdot y = \sum_{i=1}^{n} x_i \cdot y_i \).

Template Parameters
T_ScalarThe scalar type of the vector elements.
Parameters
[in]nThe number of elements in the vectors.
[in]xPointer to the first vector on device.
[in]incxThe stride between consecutive elements of x.
[in]yPointer to the second vector on device.
[in]incyThe stride between consecutive elements of y.
[out]resultPointer to store the dot product result.

◆ dotc()

template<typename T_Scalar>
void culite::CuBlasHandler::dotc ( int_t n,
const T_Scalar * x,
int_t incx,
const T_Scalar * y,
int_t incy,
T_Scalar * result )

Computes the conjugate dot product of two vectors.

Computes \( result = x^H \cdot y = \sum_{i=1}^{n} \overline{x_i} \cdot y_i \), where \( \overline{x_i} \) denotes the complex conjugate of \( x_i \).

Template Parameters
T_ScalarThe scalar type of the vector elements.
Parameters
[in]nThe number of elements in the vectors.
[in]xPointer to the first vector on device (will be conjugated).
[in]incxThe stride between consecutive elements of x.
[in]yPointer to the second vector on device.
[in]incyThe stride between consecutive elements of y.
[out]resultPointer to store the conjugate dot product result.

◆ axpy()

template<typename T_Scalar>
void culite::CuBlasHandler::axpy ( int_t n,
const T_Scalar * alpha,
const T_Scalar * x,
int_t incx,
T_Scalar * y,
int_t incy )

Computes a vector plus scalar times a vector.

Computes \( y = \alpha \cdot x + y \), updating vector y in-place.

Template Parameters
T_ScalarThe scalar type of the vector elements.
Parameters
[in]nThe number of elements in the vectors.
[in]alphaPointer to the scalar multiplier.
[in]xPointer to the input vector on device.
[in]incxThe stride between consecutive elements of x.
[in,out]yPointer to the vector on device (modified in-place).
[in]incyThe stride between consecutive elements of y.

◆ geam()

template<typename T_Scalar>
void culite::CuBlasHandler::geam ( op_t opA,
op_t opB,
int_t m,
int_t n,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * beta,
const T_Scalar * b,
int_t ldb,
T_Scalar * c,
int_t ldc )

Performs parametrized matrix addition.

Computes \( C = \alpha \cdot op_A(A) + \beta \cdot op_B(B) \), where \( op_A \) and \( op_B \) can independently be no-transpose, transpose, or conjugate transpose operations.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]opAThe operation to apply to matrix a (no-transpose, transpose, or conjugate transpose).
[in]opBThe operation to apply to matrix b (no-transpose, transpose, or conjugate transpose).
[in]mThe number of rows in matrices \( op_A(a) \), \( op_B(b) \), and c.
[in]nThe number of columns in matrices \( op_A(a) \), \( op_B(b) \), and c.
[in]alphaPointer to the scalar multiplier for matrix a.
[in]aPointer to matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]betaPointer to the scalar multiplier for matrix b.
[in]bPointer to matrix b on device.
[in]ldbThe leading dimension of matrix b.
[out]cPointer to matrix c on device (result).
[in]ldcThe leading dimension of matrix c.

◆ dgmm()

template<typename T_Scalar>
void culite::CuBlasHandler::dgmm ( side_t side,
int_t m,
int_t n,
const T_Scalar * a,
int_t lda,
const T_Scalar * x,
int_t incx,
T_Scalar * c,
int_t ldc )

Performs matrix-diagonal matrix multiplication.

Computes \( C = A \cdot diag(x) \) if side is cla3p::side_t::Right, or \( C = diag(x) \cdot A \) if side is cla3p::side_t::Left, where \( diag(x) \) is a diagonal matrix constructed from vector x.

Template Parameters
T_ScalarThe scalar type of the matrix and vector elements.
Parameters
[in]sideSpecifies whether the diagonal matrix multiplies from the left or right.
[in]mThe number of rows in matrices a and c.
[in]nThe number of columns in matrices a and c.
[in]aPointer to matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]xPointer to the diagonal vector on device.
[in]incxThe stride between consecutive elements of x.
[out]cPointer to matrix c on device (result).
[in]ldcThe leading dimension of matrix c.

◆ ger()

template<typename T_Scalar>
void culite::CuBlasHandler::ger ( int_t m,
int_t n,
const T_Scalar * alpha,
const T_Scalar * x,
int_t incx,
const T_Scalar * y,
int_t incy,
T_Scalar * a,
int_t lda )

Performs general rank-1 update (unconjugated).

Computes \( A = \alpha \cdot x \cdot y^T + A \).

Template Parameters
T_ScalarThe scalar type of the vector and matrix elements.
Parameters
[in]mThe number of rows in matrix a.
[in]nThe number of columns in matrix a.
[in]alphaPointer to the scalar multiplier.
[in]xPointer to the first vector on device.
[in]incxThe stride between consecutive elements of x.
[in]yPointer to the second vector on device.
[in]incyThe stride between consecutive elements of y.
[in,out]aPointer to matrix a on device (modified in-place).
[in]ldaThe leading dimension of matrix a.

◆ gerc()

template<typename T_Scalar>
void culite::CuBlasHandler::gerc ( int_t m,
int_t n,
const T_Scalar * alpha,
const T_Scalar * x,
int_t incx,
const T_Scalar * y,
int_t incy,
T_Scalar * a,
int_t lda )

Performs general rank-1 update (conjugated).

Computes \( A = \alpha \cdot x \cdot y^H + A \), where \( y^H \) denotes the conjugate transpose of vector y.

Template Parameters
T_ScalarThe scalar type of the vector and matrix elements.
Parameters
[in]mThe number of rows in matrix a.
[in]nThe number of columns in matrix a.
[in]alphaPointer to the scalar multiplier.
[in]xPointer to the first vector on device.
[in]incxThe stride between consecutive elements of x.
[in]yPointer to the second vector on device (will be conjugated).
[in]incyThe stride between consecutive elements of y.
[in,out]aPointer to matrix a on device (modified in-place).
[in]ldaThe leading dimension of matrix a.

◆ syr()

template<typename T_Scalar>
void culite::CuBlasHandler::syr ( uplo_t uplo,
int_t n,
const T_Scalar * alpha,
const T_Scalar * x,
int_t incx,
T_Scalar * a,
int_t lda )

Performs symmetric rank-1 update.

Computes \( A = \alpha \cdot x \cdot x^T + A \), where \( A \) is symmetric.

Template Parameters
T_ScalarThe scalar type of the vector and matrix elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of a is referenced.
[in]nThe number of rows and columns in matrix a.
[in]alphaPointer to the scalar multiplier.
[in]xPointer to the vector on device.
[in]incxThe stride between consecutive elements of x.
[in,out]aPointer to the symmetric matrix a on device (modified in-place).
[in]ldaThe leading dimension of matrix a.

◆ her()

template<typename T_Scalar>
void culite::CuBlasHandler::her ( uplo_t uplo,
int_t n,
const typename TypeTraits< T_Scalar >::real_type * alpha,
const T_Scalar * x,
int_t incx,
T_Scalar * a,
int_t lda )

Performs Hermitian rank-1 update.

Computes \( A = \alpha \cdot x \cdot x^H + A \), where \( A \) is Hermitian.

Template Parameters
T_ScalarThe scalar type of the vector and matrix elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of a is referenced.
[in]nThe number of rows and columns in matrix a.
[in]alphaPointer to the real scalar multiplier.
[in]xPointer to the vector on device.
[in]incxThe stride between consecutive elements of x.
[in,out]aPointer to the Hermitian matrix a on device (modified in-place).
[in]ldaThe leading dimension of matrix a.

◆ gemv()

template<typename T_Scalar>
void culite::CuBlasHandler::gemv ( op_t op,
int_t m,
int_t n,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * x,
int_t incx,
const T_Scalar * beta,
T_Scalar * y,
int_t incy )

Performs general matrix-vector multiplication.

Computes \( y = \alpha \cdot op(A) \cdot x + \beta \cdot y \).

Template Parameters
T_ScalarThe scalar type of the matrix and vector elements.
Parameters
[in]opThe operation to apply to matrix a (no-transpose, transpose, or conjugate transpose).
[in]mThe number of rows in matrix \( op(A) \).
[in]nThe number of columns in matrix \( op(A) \).
[in]alphaPointer to the scalar multiplier.
[in]aPointer to matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]xPointer to the input vector on device.
[in]incxThe stride between consecutive elements of x.
[in]betaPointer to the scalar multiplier for y.
[in,out]yPointer to the result vector on device (modified in-place).
[in]incyThe stride between consecutive elements of y.

◆ symv()

template<typename T_Scalar>
void culite::CuBlasHandler::symv ( uplo_t uplo,
int_t n,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * x,
int_t incx,
const T_Scalar * beta,
T_Scalar * y,
int_t incy )

Performs symmetric matrix-vector multiplication.

Computes \( y = \alpha \cdot A \cdot x + \beta \cdot y \), where \( A \) is symmetric.

Template Parameters
T_ScalarThe scalar type of the matrix and vector elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of a is referenced.
[in]nThe number of rows and columns in matrix a.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to the symmetric matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]xPointer to the input vector on device.
[in]incxThe stride between consecutive elements of x.
[in]betaPointer to the scalar multiplier for y.
[in,out]yPointer to the result vector on device (modified in-place).
[in]incyThe stride between consecutive elements of y.

◆ hemv()

template<typename T_Scalar>
void culite::CuBlasHandler::hemv ( uplo_t uplo,
int_t n,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * x,
int_t incx,
const T_Scalar * beta,
T_Scalar * y,
int_t incy )

Performs Hermitian matrix-vector multiplication.

Computes \( y = \alpha \cdot A \cdot x + \beta \cdot y \), where \( A \) is Hermitian.

Template Parameters
T_ScalarThe scalar type of the matrix and vector elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of a is referenced.
[in]nThe number of rows and columns in matrix a.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to the Hermitian matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]xPointer to the input vector on device.
[in]incxThe stride between consecutive elements of x.
[in]betaPointer to the scalar multiplier for y.
[in,out]yPointer to the result vector on device (modified in-place).
[in]incyThe stride between consecutive elements of y.

◆ trmv()

template<typename T_Scalar>
void culite::CuBlasHandler::trmv ( uplo_t uplo,
op_t trans,
diag_t diag,
int_t n,
const T_Scalar * a,
int_t lda,
T_Scalar * x,
int_t incx )

Performs triangular matrix-vector multiplication.

Computes \( x = op(A) \cdot x \), where \( A \) is triangular.

Template Parameters
T_ScalarThe scalar type of the matrix and vector elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of a is used.
[in]transThe operation to apply to matrix a (no-transpose, transpose, or conjugate transpose).
[in]diagSpecifies whether the matrix is unit triangular or not.
[in]nThe number of rows and columns in matrix a.
[in]aPointer to the triangular matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in,out]xPointer to the vector on device (modified in-place).
[in]incxThe stride between consecutive elements of x.

◆ trsv()

template<typename T_Scalar>
void culite::CuBlasHandler::trsv ( uplo_t uplo,
op_t trans,
diag_t diag,
int_t n,
const T_Scalar * a,
int_t lda,
T_Scalar * x,
int_t incx )

Solves a triangular system of equations.

Solves \( op(A) \cdot x = b \), where \( A \) is triangular and x contains \( b \) on entry.

Template Parameters
T_ScalarThe scalar type of the matrix and vector elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of a is used.
[in]transThe operation to apply to matrix a (no-transpose, transpose, or conjugate transpose).
[in]diagSpecifies whether the matrix is unit triangular or not.
[in]nThe number of rows and columns in matrix a.
[in]aPointer to the triangular matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in,out]xPointer to the vector on device containing \( b \) on entry and the solution on exit.
[in]incxThe stride between consecutive elements of x.

◆ gemm()

template<typename T_Scalar>
void culite::CuBlasHandler::gemm ( op_t transa,
op_t transb,
int_t m,
int_t n,
int_t k,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * b,
int_t ldb,
const T_Scalar * beta,
T_Scalar * c,
int_t ldc )

Performs general matrix-matrix multiplication.

Computes \( C = \alpha \cdot op(A) \cdot op(B) + \beta \cdot C \).

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]transaThe operation to apply to matrix a (no-transpose, transpose, or conjugate transpose).
[in]transbThe operation to apply to matrix b (no-transpose, transpose, or conjugate transpose).
[in]mThe number of rows in matrices \( op(A) \) and c.
[in]nThe number of columns in matrices \( op(B) \) and c.
[in]kThe number of columns in \( op(A) \) and rows in \( op(B) \).
[in]alphaPointer to the scalar multiplier.
[in]aPointer to matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]bPointer to matrix b on device.
[in]ldbThe leading dimension of matrix b.
[in]betaPointer to the scalar multiplier for c.
[in,out]cPointer to matrix c on device (modified in-place).
[in]ldcThe leading dimension of matrix c.

◆ symm()

template<typename T_Scalar>
void culite::CuBlasHandler::symm ( side_t side,
uplo_t uplo,
int_t m,
int_t n,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * b,
int_t ldb,
const T_Scalar * beta,
T_Scalar * c,
int_t ldc )

Performs symmetric matrix-matrix multiplication.

Computes \( C = \alpha \cdot A \cdot B + \beta \cdot C \) if side is Left, or \( C = \alpha \cdot B \cdot A + \beta \cdot C \) if side is Right, where \( A \) is symmetric.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]sideSpecifies whether the symmetric matrix a multiplies from the left or right.
[in]uploSpecifies whether the upper or lower triangular part of a is referenced.
[in]mThe number of rows in matrices b and c.
[in]nThe number of columns in matrices b and c.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to the symmetric matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]bPointer to matrix b on device.
[in]ldbThe leading dimension of matrix b.
[in]betaPointer to the scalar multiplier for c.
[in,out]cPointer to matrix c on device (modified in-place).
[in]ldcThe leading dimension of matrix c.

◆ hemm()

template<typename T_Scalar>
void culite::CuBlasHandler::hemm ( side_t side,
uplo_t uplo,
int_t m,
int_t n,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * b,
int_t ldb,
const T_Scalar * beta,
T_Scalar * c,
int_t ldc )

Performs Hermitian matrix-matrix multiplication.

Computes \( C = \alpha \cdot A \cdot B + \beta \cdot C \) if side is Left, or \( C = \alpha \cdot B \cdot A + \beta \cdot C \) if side is Right, where \( A \) is Hermitian.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]sideSpecifies whether the Hermitian matrix a multiplies from the left or right.
[in]uploSpecifies whether the upper or lower triangular part of a is referenced.
[in]mThe number of rows in matrices b and c.
[in]nThe number of columns in matrices b and c.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to the Hermitian matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]bPointer to matrix b on device.
[in]ldbThe leading dimension of matrix b.
[in]betaPointer to the scalar multiplier for c.
[in,out]cPointer to matrix c on device (modified in-place).
[in]ldcThe leading dimension of matrix c.

◆ trmm()

template<typename T_Scalar>
void culite::CuBlasHandler::trmm ( side_t side,
uplo_t uplo,
op_t trans,
diag_t diag,
int_t m,
int_t n,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * b,
int_t ldb,
T_Scalar * c,
int_t ldc )

Performs triangular matrix-matrix multiplication.

Computes \( C = \alpha \cdot op(A) \cdot B \) if side is Left, or \( C = \alpha \cdot B \cdot op(A) \) if side is Right, where \( A \) is triangular.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]sideSpecifies whether the triangular matrix a multiplies from the left or right.
[in]uploSpecifies whether the upper or lower triangular part of a is used.
[in]transThe operation to apply to matrix a (no-transpose, transpose, or conjugate transpose).
[in]diagSpecifies whether the matrix is unit triangular or not.
[in]mThe number of rows in matrices b and c.
[in]nThe number of columns in matrices b and c.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to the triangular matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]bPointer to matrix b on device.
[in]ldbThe leading dimension of matrix b.
[out]cPointer to matrix c on device (result).
[in]ldcThe leading dimension of matrix c.

◆ trsm()

template<typename T_Scalar>
void culite::CuBlasHandler::trsm ( side_t side,
uplo_t uplo,
op_t trans,
diag_t diag,
int_t m,
int_t n,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
T_Scalar * b,
int_t ldb )

Solves a triangular matrix equation.

Solves \( op(A) \cdot X = \alpha \cdot B \) if side is Left, or \( X \cdot op(A) = \alpha \cdot B \) if side is Right, where \( A \) is triangular. The solution \( X \) overwrites b on exit.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]sideSpecifies whether the triangular matrix a is on the left or right.
[in]uploSpecifies whether the upper or lower triangular part of a is used.
[in]transThe operation to apply to matrix a (no-transpose, transpose, or conjugate transpose).
[in]diagSpecifies whether the matrix is unit triangular or not.
[in]mThe number of rows in matrix b.
[in]nThe number of columns in matrix b.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to the triangular matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in,out]bPointer to matrix b on device (contains right-hand side on entry, solution on exit).
[in]ldbThe leading dimension of matrix b.

◆ syrk()

template<typename T_Scalar>
void culite::CuBlasHandler::syrk ( uplo_t uplo,
op_t trans,
int_t n,
int_t k,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * beta,
T_Scalar * c,
int_t ldc )

Performs symmetric rank-k update.

Computes \( C = \alpha \cdot A \cdot A^T + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^T \cdot A + \beta \cdot C \) if trans is Trans, where \( C \) is symmetric.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of c is updated.
[in]transThe operation to apply to matrix a (no-transpose or transpose).
[in]nThe number of rows and columns in matrix c.
[in]kThe inner dimension of the multiplication.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]betaPointer to the scalar multiplier for c.
[in,out]cPointer to the symmetric matrix c on device (modified in-place).
[in]ldcThe leading dimension of matrix c.

◆ herk()

template<typename T_Scalar>
void culite::CuBlasHandler::herk ( uplo_t uplo,
op_t trans,
int_t n,
int_t k,
const typename TypeTraits< T_Scalar >::real_type * alpha,
const T_Scalar * a,
int_t lda,
const typename TypeTraits< T_Scalar >::real_type * beta,
T_Scalar * c,
int_t ldc )

Performs Hermitian rank-k update.

Computes \( C = \alpha \cdot A \cdot A^H + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^H \cdot A + \beta \cdot C \) if trans is ConjTrans, where \( C \) is Hermitian.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of c is updated.
[in]transThe operation to apply to matrix a (no-transpose or conjugate transpose).
[in]nThe number of rows and columns in matrix c.
[in]kThe inner dimension of the multiplication.
[in]alphaPointer to the real scalar multiplier.
[in]aPointer to matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]betaPointer to the real scalar multiplier for c.
[in,out]cPointer to the Hermitian matrix c on device (modified in-place).
[in]ldcThe leading dimension of matrix c.

◆ syrkx()

template<typename T_Scalar>
void culite::CuBlasHandler::syrkx ( uplo_t uplo,
op_t trans,
int_t n,
int_t k,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * b,
int_t ldb,
const T_Scalar * beta,
T_Scalar * c,
int_t ldc )

Performs symmetric rank-k update with two matrices.

Computes \( C = \alpha \cdot A \cdot B^T + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^T \cdot B + \beta \cdot C \) if trans is Trans, where \( C \) is symmetric.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of c is updated.
[in]transThe operation to apply to matrices a and b (no-transpose or transpose).
[in]nThe number of rows and columns in matrix c.
[in]kThe inner dimension of the multiplication.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]bPointer to matrix b on device.
[in]ldbThe leading dimension of matrix b.
[in]betaPointer to the scalar multiplier for c.
[in,out]cPointer to the symmetric matrix c on device (modified in-place).
[in]ldcThe leading dimension of matrix c.

◆ herkx()

template<typename T_Scalar>
void culite::CuBlasHandler::herkx ( uplo_t uplo,
op_t trans,
int_t n,
int_t k,
const T_Scalar * alpha,
const T_Scalar * a,
int_t lda,
const T_Scalar * b,
int_t ldb,
const typename TypeTraits< T_Scalar >::real_type * beta,
T_Scalar * c,
int_t ldc )

Performs Hermitian rank-k update with two matrices.

Computes \( C = \alpha \cdot A \cdot B^H + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^H \cdot B + \beta \cdot C \) if trans is ConjTrans, where \( C \) is Hermitian.

Template Parameters
T_ScalarThe scalar type of the matrix elements.
Parameters
[in]uploSpecifies whether the upper or lower triangular part of c is updated.
[in]transThe operation to apply to matrices a and b (no-transpose or conjugate transpose).
[in]nThe number of rows and columns in matrix c.
[in]kThe inner dimension of the multiplication.
[in]alphaPointer to the scalar multiplier.
[in]aPointer to matrix a on device.
[in]ldaThe leading dimension of matrix a.
[in]bPointer to matrix b on device.
[in]ldbThe leading dimension of matrix b.
[in]betaPointer to the real scalar multiplier for c.
[in,out]cPointer to the Hermitian matrix c on device (modified in-place).
[in]ldcThe leading dimension of matrix c.