![]() |
cuLite v0.3.1
A lite CUDA C++ Interface
|
The cuBlas handler class. More...
Public Member Functions | |
| CuBlasHandler () | |
| Constructor. | |
| ~CuBlasHandler () | |
| Destructor. | |
| cublasHandle_t | handle () |
| Get the cuBlas handle. | |
| cublasPointerMode_t | setPointerMode (cublasPointerMode_t mode) |
| Set the pointer mode for cuBLAS operations. | |
| cublasPointerMode_t | pointerMode () |
| Get the current pointer mode. | |
| template<typename T_Scalar> | |
| void | iamax (int_t n, const T_Scalar *x, int_t incx, int_t *result) |
| Finds the index of the maximum absolute value element. | |
| template<typename T_Scalar> | |
| void | asum (int_t n, const T_Scalar *x, int_t incx, typename TypeTraits< T_Scalar >::real_type *result) |
| Computes the sum of absolute values. | |
| template<typename T_Scalar> | |
| void | nrm2 (int_t n, const T_Scalar *x, int_t incx, typename TypeTraits< T_Scalar >::real_type *result) |
| Computes the Euclidean norm (L2 norm). | |
| template<typename T_Scalar> | |
| void | copy (int_t n, const T_Scalar *x, int_t incx, T_Scalar *y, int_t incy) |
| Copies a vector to another vector. | |
| template<typename T_Scalar> | |
| void | scal (int_t n, const T_Scalar *alpha, T_Scalar *x, int_t incx) |
| Scales a vector by a scalar. | |
| template<typename T_Scalar> | |
| void | dot (int_t n, const T_Scalar *x, int_t incx, const T_Scalar *y, int_t incy, T_Scalar *result) |
| Computes the dot product of two vectors. | |
| template<typename T_Scalar> | |
| void | dotc (int_t n, const T_Scalar *x, int_t incx, const T_Scalar *y, int_t incy, T_Scalar *result) |
| Computes the conjugate dot product of two vectors. | |
| template<typename T_Scalar> | |
| void | axpy (int_t n, const T_Scalar *alpha, const T_Scalar *x, int_t incx, T_Scalar *y, int_t incy) |
| Computes a vector plus scalar times a vector. | |
| template<typename T_Scalar> | |
| void | geam (op_t opA, op_t opB, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *beta, const T_Scalar *b, int_t ldb, T_Scalar *c, int_t ldc) |
| Performs parametrized matrix addition. | |
| template<typename T_Scalar> | |
| void | dgmm (side_t side, int_t m, int_t n, const T_Scalar *a, int_t lda, const T_Scalar *x, int_t incx, T_Scalar *c, int_t ldc) |
| Performs matrix-diagonal matrix multiplication. | |
| template<typename T_Scalar> | |
| void | ger (int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *x, int_t incx, const T_Scalar *y, int_t incy, T_Scalar *a, int_t lda) |
| Performs general rank-1 update (unconjugated). | |
| template<typename T_Scalar> | |
| void | gerc (int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *x, int_t incx, const T_Scalar *y, int_t incy, T_Scalar *a, int_t lda) |
| Performs general rank-1 update (conjugated). | |
| template<typename T_Scalar> | |
| void | syr (uplo_t uplo, int_t n, const T_Scalar *alpha, const T_Scalar *x, int_t incx, T_Scalar *a, int_t lda) |
| Performs symmetric rank-1 update. | |
| template<typename T_Scalar> | |
| void | her (uplo_t uplo, int_t n, const typename TypeTraits< T_Scalar >::real_type *alpha, const T_Scalar *x, int_t incx, T_Scalar *a, int_t lda) |
| Performs Hermitian rank-1 update. | |
| template<typename T_Scalar> | |
| void | gemv (op_t op, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *x, int_t incx, const T_Scalar *beta, T_Scalar *y, int_t incy) |
| Performs general matrix-vector multiplication. | |
| template<typename T_Scalar> | |
| void | symv (uplo_t uplo, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *x, int_t incx, const T_Scalar *beta, T_Scalar *y, int_t incy) |
| Performs symmetric matrix-vector multiplication. | |
| template<typename T_Scalar> | |
| void | hemv (uplo_t uplo, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *x, int_t incx, const T_Scalar *beta, T_Scalar *y, int_t incy) |
| Performs Hermitian matrix-vector multiplication. | |
| template<typename T_Scalar> | |
| void | trmv (uplo_t uplo, op_t trans, diag_t diag, int_t n, const T_Scalar *a, int_t lda, T_Scalar *x, int_t incx) |
| Performs triangular matrix-vector multiplication. | |
| template<typename T_Scalar> | |
| void | trsv (uplo_t uplo, op_t trans, diag_t diag, int_t n, const T_Scalar *a, int_t lda, T_Scalar *x, int_t incx) |
| Solves a triangular system of equations. | |
| template<typename T_Scalar> | |
| void | gemm (op_t transa, op_t transb, int_t m, int_t n, int_t k, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const T_Scalar *beta, T_Scalar *c, int_t ldc) |
| Performs general matrix-matrix multiplication. | |
| template<typename T_Scalar> | |
| void | symm (side_t side, uplo_t uplo, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const T_Scalar *beta, T_Scalar *c, int_t ldc) |
| Performs symmetric matrix-matrix multiplication. | |
| template<typename T_Scalar> | |
| void | hemm (side_t side, uplo_t uplo, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const T_Scalar *beta, T_Scalar *c, int_t ldc) |
| Performs Hermitian matrix-matrix multiplication. | |
| template<typename T_Scalar> | |
| void | trmm (side_t side, uplo_t uplo, op_t trans, diag_t diag, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, T_Scalar *c, int_t ldc) |
| Performs triangular matrix-matrix multiplication. | |
| template<typename T_Scalar> | |
| void | trsm (side_t side, uplo_t uplo, op_t trans, diag_t diag, int_t m, int_t n, const T_Scalar *alpha, const T_Scalar *a, int_t lda, T_Scalar *b, int_t ldb) |
| Solves a triangular matrix equation. | |
| template<typename T_Scalar> | |
| void | syrk (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *beta, T_Scalar *c, int_t ldc) |
| Performs symmetric rank-k update. | |
| template<typename T_Scalar> | |
| void | herk (uplo_t uplo, op_t trans, int_t n, int_t k, const typename TypeTraits< T_Scalar >::real_type *alpha, const T_Scalar *a, int_t lda, const typename TypeTraits< T_Scalar >::real_type *beta, T_Scalar *c, int_t ldc) |
| Performs Hermitian rank-k update. | |
| template<typename T_Scalar> | |
| void | syrkx (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const T_Scalar *beta, T_Scalar *c, int_t ldc) |
| Performs symmetric rank-k update with two matrices. | |
| template<typename T_Scalar> | |
| void | herkx (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar *alpha, const T_Scalar *a, int_t lda, const T_Scalar *b, int_t ldb, const typename TypeTraits< T_Scalar >::real_type *beta, T_Scalar *c, int_t ldc) |
| Performs Hermitian rank-k update with two matrices. | |
The cuBlas handler class.
This class provides a wrapper around the cuBLAS library for performing Basic Linear Algebra Subprograms (BLAS) operations on GPU devices. It manages the cuBLAS handle and provides convenient interfaces for vector and matrix operations.
| culite::CuBlasHandler::CuBlasHandler | ( | ) |
Constructor.
Initializes the cuBLAS handle and internal state.
| culite::CuBlasHandler::~CuBlasHandler | ( | ) |
Destructor.
Destroys the cuBLAS handle and releases all allocated resources.
| cublasHandle_t culite::CuBlasHandler::handle | ( | ) |
Get the cuBlas handle.
| cublasPointerMode_t culite::CuBlasHandler::setPointerMode | ( | cublasPointerMode_t | mode | ) |
Set the pointer mode for cuBLAS operations.
Changes the pointer mode and returns the previous mode. The pointer mode determines whether scalar values (alpha, beta) are passed by reference on the host or device.
| [in] | mode | The new pointer mode to set. |
| cublasPointerMode_t culite::CuBlasHandler::pointerMode | ( | ) |
Get the current pointer mode.
Returns the current pointer mode setting for cuBLAS operations.
| void culite::CuBlasHandler::iamax | ( | int_t | n, |
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| int_t * | result ) |
Finds the index of the maximum absolute value element.
Computes the index of the element with the maximum absolute value in vector x.
| T_Scalar | The scalar type of the vector elements. |
| [in] | n | The number of elements in the vector. |
| [in] | x | Pointer to the device vector. |
| [in] | incx | The stride between consecutive elements of x. |
| [out] | result | Pointer to store the result index (1-based). |
| void culite::CuBlasHandler::asum | ( | int_t | n, |
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| typename TypeTraits< T_Scalar >::real_type * | result ) |
Computes the sum of absolute values.
Computes \( \sum_{i=1}^{n} |x_i| \) for the elements of vector x.
| T_Scalar | The scalar type of the vector elements. |
| [in] | n | The number of elements in the vector. |
| [in] | x | Pointer to the device vector. |
| [in] | incx | The stride between consecutive elements of x. |
| [out] | result | Pointer to store the sum of absolute values. |
| void culite::CuBlasHandler::nrm2 | ( | int_t | n, |
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| typename TypeTraits< T_Scalar >::real_type * | result ) |
Computes the Euclidean norm (L2 norm).
Computes \( \sqrt{\sum_{i=1}^{n} |x_i|^2} \) for the elements of vector x.
| T_Scalar | The scalar type of the vector elements. |
| [in] | n | The number of elements in the vector. |
| [in] | x | Pointer to the device vector. |
| [in] | incx | The stride between consecutive elements of x. |
| [out] | result | Pointer to store the Euclidean norm. |
| void culite::CuBlasHandler::copy | ( | int_t | n, |
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| T_Scalar * | y, | ||
| int_t | incy ) |
Copies a vector to another vector.
Copies vector x to vector y, computing \( y = x \).
| T_Scalar | The scalar type of the vector elements. |
| [in] | n | The number of elements in the vectors. |
| [in] | x | Pointer to the source device vector. |
| [in] | incx | The stride between consecutive elements of x. |
| [out] | y | Pointer to the destination device vector. |
| [in] | incy | The stride between consecutive elements of y. |
| void culite::CuBlasHandler::scal | ( | int_t | n, |
| const T_Scalar * | alpha, | ||
| T_Scalar * | x, | ||
| int_t | incx ) |
Scales a vector by a scalar.
Computes \( x = \alpha \cdot x \) for vector x.
| T_Scalar | The scalar type of the vector elements. |
| [in] | n | The number of elements in the vector. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in,out] | x | Pointer to the device vector (modified in-place). |
| [in] | incx | The stride between consecutive elements of x. |
| void culite::CuBlasHandler::dot | ( | int_t | n, |
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| const T_Scalar * | y, | ||
| int_t | incy, | ||
| T_Scalar * | result ) |
Computes the dot product of two vectors.
Computes \( result = x^T \cdot y = \sum_{i=1}^{n} x_i \cdot y_i \).
| T_Scalar | The scalar type of the vector elements. |
| [in] | n | The number of elements in the vectors. |
| [in] | x | Pointer to the first vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in] | y | Pointer to the second vector on device. |
| [in] | incy | The stride between consecutive elements of y. |
| [out] | result | Pointer to store the dot product result. |
| void culite::CuBlasHandler::dotc | ( | int_t | n, |
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| const T_Scalar * | y, | ||
| int_t | incy, | ||
| T_Scalar * | result ) |
Computes the conjugate dot product of two vectors.
Computes \( result = x^H \cdot y = \sum_{i=1}^{n} \overline{x_i} \cdot y_i \), where \( \overline{x_i} \) denotes the complex conjugate of \( x_i \).
| T_Scalar | The scalar type of the vector elements. |
| [in] | n | The number of elements in the vectors. |
| [in] | x | Pointer to the first vector on device (will be conjugated). |
| [in] | incx | The stride between consecutive elements of x. |
| [in] | y | Pointer to the second vector on device. |
| [in] | incy | The stride between consecutive elements of y. |
| [out] | result | Pointer to store the conjugate dot product result. |
| void culite::CuBlasHandler::axpy | ( | int_t | n, |
| const T_Scalar * | alpha, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| T_Scalar * | y, | ||
| int_t | incy ) |
Computes a vector plus scalar times a vector.
Computes \( y = \alpha \cdot x + y \), updating vector y in-place.
| T_Scalar | The scalar type of the vector elements. |
| [in] | n | The number of elements in the vectors. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | x | Pointer to the input vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in,out] | y | Pointer to the vector on device (modified in-place). |
| [in] | incy | The stride between consecutive elements of y. |
| void culite::CuBlasHandler::geam | ( | op_t | opA, |
| op_t | opB, | ||
| int_t | m, | ||
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | beta, | ||
| const T_Scalar * | b, | ||
| int_t | ldb, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs parametrized matrix addition.
Computes \( C = \alpha \cdot op_A(A) + \beta \cdot op_B(B) \), where \( op_A \) and \( op_B \) can independently be no-transpose, transpose, or conjugate transpose operations.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | opA | The operation to apply to matrix a (no-transpose, transpose, or conjugate transpose). |
| [in] | opB | The operation to apply to matrix b (no-transpose, transpose, or conjugate transpose). |
| [in] | m | The number of rows in matrices \( op_A(a) \), \( op_B(b) \), and c. |
| [in] | n | The number of columns in matrices \( op_A(a) \), \( op_B(b) \), and c. |
| [in] | alpha | Pointer to the scalar multiplier for matrix a. |
| [in] | a | Pointer to matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | beta | Pointer to the scalar multiplier for matrix b. |
| [in] | b | Pointer to matrix b on device. |
| [in] | ldb | The leading dimension of matrix b. |
| [out] | c | Pointer to matrix c on device (result). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::dgmm | ( | side_t | side, |
| int_t | m, | ||
| int_t | n, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs matrix-diagonal matrix multiplication.
Computes \( C = A \cdot diag(x) \) if side is cla3p::side_t::Right, or \( C = diag(x) \cdot A \) if side is cla3p::side_t::Left, where \( diag(x) \) is a diagonal matrix constructed from vector x.
| T_Scalar | The scalar type of the matrix and vector elements. |
| [in] | side | Specifies whether the diagonal matrix multiplies from the left or right. |
| [in] | m | The number of rows in matrices a and c. |
| [in] | n | The number of columns in matrices a and c. |
| [in] | a | Pointer to matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | x | Pointer to the diagonal vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [out] | c | Pointer to matrix c on device (result). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::ger | ( | int_t | m, |
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| const T_Scalar * | y, | ||
| int_t | incy, | ||
| T_Scalar * | a, | ||
| int_t | lda ) |
Performs general rank-1 update (unconjugated).
Computes \( A = \alpha \cdot x \cdot y^T + A \).
| T_Scalar | The scalar type of the vector and matrix elements. |
| [in] | m | The number of rows in matrix a. |
| [in] | n | The number of columns in matrix a. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | x | Pointer to the first vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in] | y | Pointer to the second vector on device. |
| [in] | incy | The stride between consecutive elements of y. |
| [in,out] | a | Pointer to matrix a on device (modified in-place). |
| [in] | lda | The leading dimension of matrix a. |
| void culite::CuBlasHandler::gerc | ( | int_t | m, |
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| const T_Scalar * | y, | ||
| int_t | incy, | ||
| T_Scalar * | a, | ||
| int_t | lda ) |
Performs general rank-1 update (conjugated).
Computes \( A = \alpha \cdot x \cdot y^H + A \), where \( y^H \) denotes the conjugate transpose of vector y.
| T_Scalar | The scalar type of the vector and matrix elements. |
| [in] | m | The number of rows in matrix a. |
| [in] | n | The number of columns in matrix a. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | x | Pointer to the first vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in] | y | Pointer to the second vector on device (will be conjugated). |
| [in] | incy | The stride between consecutive elements of y. |
| [in,out] | a | Pointer to matrix a on device (modified in-place). |
| [in] | lda | The leading dimension of matrix a. |
| void culite::CuBlasHandler::syr | ( | uplo_t | uplo, |
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| T_Scalar * | a, | ||
| int_t | lda ) |
Performs symmetric rank-1 update.
Computes \( A = \alpha \cdot x \cdot x^T + A \), where \( A \) is symmetric.
| T_Scalar | The scalar type of the vector and matrix elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is referenced. |
| [in] | n | The number of rows and columns in matrix a. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | x | Pointer to the vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in,out] | a | Pointer to the symmetric matrix a on device (modified in-place). |
| [in] | lda | The leading dimension of matrix a. |
| void culite::CuBlasHandler::her | ( | uplo_t | uplo, |
| int_t | n, | ||
| const typename TypeTraits< T_Scalar >::real_type * | alpha, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| T_Scalar * | a, | ||
| int_t | lda ) |
Performs Hermitian rank-1 update.
Computes \( A = \alpha \cdot x \cdot x^H + A \), where \( A \) is Hermitian.
| T_Scalar | The scalar type of the vector and matrix elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is referenced. |
| [in] | n | The number of rows and columns in matrix a. |
| [in] | alpha | Pointer to the real scalar multiplier. |
| [in] | x | Pointer to the vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in,out] | a | Pointer to the Hermitian matrix a on device (modified in-place). |
| [in] | lda | The leading dimension of matrix a. |
| void culite::CuBlasHandler::gemv | ( | op_t | op, |
| int_t | m, | ||
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| const T_Scalar * | beta, | ||
| T_Scalar * | y, | ||
| int_t | incy ) |
Performs general matrix-vector multiplication.
Computes \( y = \alpha \cdot op(A) \cdot x + \beta \cdot y \).
| T_Scalar | The scalar type of the matrix and vector elements. |
| [in] | op | The operation to apply to matrix a (no-transpose, transpose, or conjugate transpose). |
| [in] | m | The number of rows in matrix \( op(A) \). |
| [in] | n | The number of columns in matrix \( op(A) \). |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | x | Pointer to the input vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in] | beta | Pointer to the scalar multiplier for y. |
| [in,out] | y | Pointer to the result vector on device (modified in-place). |
| [in] | incy | The stride between consecutive elements of y. |
| void culite::CuBlasHandler::symv | ( | uplo_t | uplo, |
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| const T_Scalar * | beta, | ||
| T_Scalar * | y, | ||
| int_t | incy ) |
Performs symmetric matrix-vector multiplication.
Computes \( y = \alpha \cdot A \cdot x + \beta \cdot y \), where \( A \) is symmetric.
| T_Scalar | The scalar type of the matrix and vector elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is referenced. |
| [in] | n | The number of rows and columns in matrix a. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to the symmetric matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | x | Pointer to the input vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in] | beta | Pointer to the scalar multiplier for y. |
| [in,out] | y | Pointer to the result vector on device (modified in-place). |
| [in] | incy | The stride between consecutive elements of y. |
| void culite::CuBlasHandler::hemv | ( | uplo_t | uplo, |
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | x, | ||
| int_t | incx, | ||
| const T_Scalar * | beta, | ||
| T_Scalar * | y, | ||
| int_t | incy ) |
Performs Hermitian matrix-vector multiplication.
Computes \( y = \alpha \cdot A \cdot x + \beta \cdot y \), where \( A \) is Hermitian.
| T_Scalar | The scalar type of the matrix and vector elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is referenced. |
| [in] | n | The number of rows and columns in matrix a. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to the Hermitian matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | x | Pointer to the input vector on device. |
| [in] | incx | The stride between consecutive elements of x. |
| [in] | beta | Pointer to the scalar multiplier for y. |
| [in,out] | y | Pointer to the result vector on device (modified in-place). |
| [in] | incy | The stride between consecutive elements of y. |
| void culite::CuBlasHandler::trmv | ( | uplo_t | uplo, |
| op_t | trans, | ||
| diag_t | diag, | ||
| int_t | n, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| T_Scalar * | x, | ||
| int_t | incx ) |
Performs triangular matrix-vector multiplication.
Computes \( x = op(A) \cdot x \), where \( A \) is triangular.
| T_Scalar | The scalar type of the matrix and vector elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is used. |
| [in] | trans | The operation to apply to matrix a (no-transpose, transpose, or conjugate transpose). |
| [in] | diag | Specifies whether the matrix is unit triangular or not. |
| [in] | n | The number of rows and columns in matrix a. |
| [in] | a | Pointer to the triangular matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in,out] | x | Pointer to the vector on device (modified in-place). |
| [in] | incx | The stride between consecutive elements of x. |
| void culite::CuBlasHandler::trsv | ( | uplo_t | uplo, |
| op_t | trans, | ||
| diag_t | diag, | ||
| int_t | n, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| T_Scalar * | x, | ||
| int_t | incx ) |
Solves a triangular system of equations.
Solves \( op(A) \cdot x = b \), where \( A \) is triangular and x contains \( b \) on entry.
| T_Scalar | The scalar type of the matrix and vector elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is used. |
| [in] | trans | The operation to apply to matrix a (no-transpose, transpose, or conjugate transpose). |
| [in] | diag | Specifies whether the matrix is unit triangular or not. |
| [in] | n | The number of rows and columns in matrix a. |
| [in] | a | Pointer to the triangular matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in,out] | x | Pointer to the vector on device containing \( b \) on entry and the solution on exit. |
| [in] | incx | The stride between consecutive elements of x. |
| void culite::CuBlasHandler::gemm | ( | op_t | transa, |
| op_t | transb, | ||
| int_t | m, | ||
| int_t | n, | ||
| int_t | k, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | b, | ||
| int_t | ldb, | ||
| const T_Scalar * | beta, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs general matrix-matrix multiplication.
Computes \( C = \alpha \cdot op(A) \cdot op(B) + \beta \cdot C \).
| T_Scalar | The scalar type of the matrix elements. |
| [in] | transa | The operation to apply to matrix a (no-transpose, transpose, or conjugate transpose). |
| [in] | transb | The operation to apply to matrix b (no-transpose, transpose, or conjugate transpose). |
| [in] | m | The number of rows in matrices \( op(A) \) and c. |
| [in] | n | The number of columns in matrices \( op(B) \) and c. |
| [in] | k | The number of columns in \( op(A) \) and rows in \( op(B) \). |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | b | Pointer to matrix b on device. |
| [in] | ldb | The leading dimension of matrix b. |
| [in] | beta | Pointer to the scalar multiplier for c. |
| [in,out] | c | Pointer to matrix c on device (modified in-place). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::symm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int_t | m, | ||
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | b, | ||
| int_t | ldb, | ||
| const T_Scalar * | beta, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs symmetric matrix-matrix multiplication.
Computes \( C = \alpha \cdot A \cdot B + \beta \cdot C \) if side is Left, or \( C = \alpha \cdot B \cdot A + \beta \cdot C \) if side is Right, where \( A \) is symmetric.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | side | Specifies whether the symmetric matrix a multiplies from the left or right. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is referenced. |
| [in] | m | The number of rows in matrices b and c. |
| [in] | n | The number of columns in matrices b and c. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to the symmetric matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | b | Pointer to matrix b on device. |
| [in] | ldb | The leading dimension of matrix b. |
| [in] | beta | Pointer to the scalar multiplier for c. |
| [in,out] | c | Pointer to matrix c on device (modified in-place). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::hemm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int_t | m, | ||
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | b, | ||
| int_t | ldb, | ||
| const T_Scalar * | beta, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs Hermitian matrix-matrix multiplication.
Computes \( C = \alpha \cdot A \cdot B + \beta \cdot C \) if side is Left, or \( C = \alpha \cdot B \cdot A + \beta \cdot C \) if side is Right, where \( A \) is Hermitian.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | side | Specifies whether the Hermitian matrix a multiplies from the left or right. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is referenced. |
| [in] | m | The number of rows in matrices b and c. |
| [in] | n | The number of columns in matrices b and c. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to the Hermitian matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | b | Pointer to matrix b on device. |
| [in] | ldb | The leading dimension of matrix b. |
| [in] | beta | Pointer to the scalar multiplier for c. |
| [in,out] | c | Pointer to matrix c on device (modified in-place). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::trmm | ( | side_t | side, |
| uplo_t | uplo, | ||
| op_t | trans, | ||
| diag_t | diag, | ||
| int_t | m, | ||
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | b, | ||
| int_t | ldb, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs triangular matrix-matrix multiplication.
Computes \( C = \alpha \cdot op(A) \cdot B \) if side is Left, or \( C = \alpha \cdot B \cdot op(A) \) if side is Right, where \( A \) is triangular.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | side | Specifies whether the triangular matrix a multiplies from the left or right. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is used. |
| [in] | trans | The operation to apply to matrix a (no-transpose, transpose, or conjugate transpose). |
| [in] | diag | Specifies whether the matrix is unit triangular or not. |
| [in] | m | The number of rows in matrices b and c. |
| [in] | n | The number of columns in matrices b and c. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to the triangular matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | b | Pointer to matrix b on device. |
| [in] | ldb | The leading dimension of matrix b. |
| [out] | c | Pointer to matrix c on device (result). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::trsm | ( | side_t | side, |
| uplo_t | uplo, | ||
| op_t | trans, | ||
| diag_t | diag, | ||
| int_t | m, | ||
| int_t | n, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| T_Scalar * | b, | ||
| int_t | ldb ) |
Solves a triangular matrix equation.
Solves \( op(A) \cdot X = \alpha \cdot B \) if side is Left, or \( X \cdot op(A) = \alpha \cdot B \) if side is Right, where \( A \) is triangular. The solution \( X \) overwrites b on exit.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | side | Specifies whether the triangular matrix a is on the left or right. |
| [in] | uplo | Specifies whether the upper or lower triangular part of a is used. |
| [in] | trans | The operation to apply to matrix a (no-transpose, transpose, or conjugate transpose). |
| [in] | diag | Specifies whether the matrix is unit triangular or not. |
| [in] | m | The number of rows in matrix b. |
| [in] | n | The number of columns in matrix b. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to the triangular matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in,out] | b | Pointer to matrix b on device (contains right-hand side on entry, solution on exit). |
| [in] | ldb | The leading dimension of matrix b. |
| void culite::CuBlasHandler::syrk | ( | uplo_t | uplo, |
| op_t | trans, | ||
| int_t | n, | ||
| int_t | k, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | beta, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs symmetric rank-k update.
Computes \( C = \alpha \cdot A \cdot A^T + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^T \cdot A + \beta \cdot C \) if trans is Trans, where \( C \) is symmetric.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of c is updated. |
| [in] | trans | The operation to apply to matrix a (no-transpose or transpose). |
| [in] | n | The number of rows and columns in matrix c. |
| [in] | k | The inner dimension of the multiplication. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | beta | Pointer to the scalar multiplier for c. |
| [in,out] | c | Pointer to the symmetric matrix c on device (modified in-place). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::herk | ( | uplo_t | uplo, |
| op_t | trans, | ||
| int_t | n, | ||
| int_t | k, | ||
| const typename TypeTraits< T_Scalar >::real_type * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const typename TypeTraits< T_Scalar >::real_type * | beta, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs Hermitian rank-k update.
Computes \( C = \alpha \cdot A \cdot A^H + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^H \cdot A + \beta \cdot C \) if trans is ConjTrans, where \( C \) is Hermitian.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of c is updated. |
| [in] | trans | The operation to apply to matrix a (no-transpose or conjugate transpose). |
| [in] | n | The number of rows and columns in matrix c. |
| [in] | k | The inner dimension of the multiplication. |
| [in] | alpha | Pointer to the real scalar multiplier. |
| [in] | a | Pointer to matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | beta | Pointer to the real scalar multiplier for c. |
| [in,out] | c | Pointer to the Hermitian matrix c on device (modified in-place). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::syrkx | ( | uplo_t | uplo, |
| op_t | trans, | ||
| int_t | n, | ||
| int_t | k, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | b, | ||
| int_t | ldb, | ||
| const T_Scalar * | beta, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs symmetric rank-k update with two matrices.
Computes \( C = \alpha \cdot A \cdot B^T + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^T \cdot B + \beta \cdot C \) if trans is Trans, where \( C \) is symmetric.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of c is updated. |
| [in] | trans | The operation to apply to matrices a and b (no-transpose or transpose). |
| [in] | n | The number of rows and columns in matrix c. |
| [in] | k | The inner dimension of the multiplication. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | b | Pointer to matrix b on device. |
| [in] | ldb | The leading dimension of matrix b. |
| [in] | beta | Pointer to the scalar multiplier for c. |
| [in,out] | c | Pointer to the symmetric matrix c on device (modified in-place). |
| [in] | ldc | The leading dimension of matrix c. |
| void culite::CuBlasHandler::herkx | ( | uplo_t | uplo, |
| op_t | trans, | ||
| int_t | n, | ||
| int_t | k, | ||
| const T_Scalar * | alpha, | ||
| const T_Scalar * | a, | ||
| int_t | lda, | ||
| const T_Scalar * | b, | ||
| int_t | ldb, | ||
| const typename TypeTraits< T_Scalar >::real_type * | beta, | ||
| T_Scalar * | c, | ||
| int_t | ldc ) |
Performs Hermitian rank-k update with two matrices.
Computes \( C = \alpha \cdot A \cdot B^H + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^H \cdot B + \beta \cdot C \) if trans is ConjTrans, where \( C \) is Hermitian.
| T_Scalar | The scalar type of the matrix elements. |
| [in] | uplo | Specifies whether the upper or lower triangular part of c is updated. |
| [in] | trans | The operation to apply to matrices a and b (no-transpose or conjugate transpose). |
| [in] | n | The number of rows and columns in matrix c. |
| [in] | k | The inner dimension of the multiplication. |
| [in] | alpha | Pointer to the scalar multiplier. |
| [in] | a | Pointer to matrix a on device. |
| [in] | lda | The leading dimension of matrix a. |
| [in] | b | Pointer to matrix b on device. |
| [in] | ldb | The leading dimension of matrix b. |
| [in] | beta | Pointer to the real scalar multiplier for c. |
| [in,out] | c | Pointer to the Hermitian matrix c on device (modified in-place). |
| [in] | ldc | The leading dimension of matrix c. |