The cuBlas handler class. More...

Public Member Functions
	CuBlasHandler ()
	Constructor.
	~CuBlasHandler ()
	Destructor.
cublasHandle_t	handle ()
	Get the cuBlas handle.
cublasPointerMode_t	setPointerMode (cublasPointerMode_t mode)
	Set the pointer mode for cuBLAS operations.
cublasPointerMode_t	pointerMode ()
	Get the current pointer mode.
template<typename T_Scalar>
void	iamax (int_t n, const T_Scalar x, int_t incx, int_t result)
	Finds the index of the maximum absolute value element.
template<typename T_Scalar>
void	asum (int_t n, const T_Scalar x, int_t incx, typename TypeTraits< T_Scalar >::real_type result)
	Computes the sum of absolute values.
template<typename T_Scalar>
void	nrm2 (int_t n, const T_Scalar x, int_t incx, typename TypeTraits< T_Scalar >::real_type result)
	Computes the Euclidean norm (L2 norm).
template<typename T_Scalar>
void	copy (int_t n, const T_Scalar x, int_t incx, T_Scalar y, int_t incy)
	Copies a vector to another vector.
template<typename T_Scalar>
void	scal (int_t n, const T_Scalar alpha, T_Scalar x, int_t incx)
	Scales a vector by a scalar.
template<typename T_Scalar>
void	dot (int_t n, const T_Scalar x, int_t incx, const T_Scalar y, int_t incy, T_Scalar *result)
	Computes the dot product of two vectors.
template<typename T_Scalar>
void	dotc (int_t n, const T_Scalar x, int_t incx, const T_Scalar y, int_t incy, T_Scalar *result)
	Computes the conjugate dot product of two vectors.
template<typename T_Scalar>
void	axpy (int_t n, const T_Scalar alpha, const T_Scalar x, int_t incx, T_Scalar *y, int_t incy)
	Computes a vector plus scalar times a vector.
template<typename T_Scalar>
void	geam (op_t opA, op_t opB, int_t m, int_t n, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar beta, const T_Scalar b, int_t ldb, T_Scalar *c, int_t ldc)
	Performs parametrized matrix addition.
template<typename T_Scalar>
void	dgmm (side_t side, int_t m, int_t n, const T_Scalar a, int_t lda, const T_Scalar x, int_t incx, T_Scalar *c, int_t ldc)
	Performs matrix-diagonal matrix multiplication.
template<typename T_Scalar>
void	ger (int_t m, int_t n, const T_Scalar alpha, const T_Scalar x, int_t incx, const T_Scalar y, int_t incy, T_Scalar a, int_t lda)
	Performs general rank-1 update (unconjugated).
template<typename T_Scalar>
void	gerc (int_t m, int_t n, const T_Scalar alpha, const T_Scalar x, int_t incx, const T_Scalar y, int_t incy, T_Scalar a, int_t lda)
	Performs general rank-1 update (conjugated).
template<typename T_Scalar>
void	syr (uplo_t uplo, int_t n, const T_Scalar alpha, const T_Scalar x, int_t incx, T_Scalar *a, int_t lda)
	Performs symmetric rank-1 update.
template<typename T_Scalar>
void	her (uplo_t uplo, int_t n, const typename TypeTraits< T_Scalar >::real_type alpha, const T_Scalar x, int_t incx, T_Scalar *a, int_t lda)
	Performs Hermitian rank-1 update.
template<typename T_Scalar>
void	gemv (op_t op, int_t m, int_t n, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar x, int_t incx, const T_Scalar beta, T_Scalar *y, int_t incy)
	Performs general matrix-vector multiplication.
template<typename T_Scalar>
void	symv (uplo_t uplo, int_t n, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar x, int_t incx, const T_Scalar beta, T_Scalar *y, int_t incy)
	Performs symmetric matrix-vector multiplication.
template<typename T_Scalar>
void	hemv (uplo_t uplo, int_t n, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar x, int_t incx, const T_Scalar beta, T_Scalar *y, int_t incy)
	Performs Hermitian matrix-vector multiplication.
template<typename T_Scalar>
void	trmv (uplo_t uplo, op_t trans, diag_t diag, int_t n, const T_Scalar a, int_t lda, T_Scalar x, int_t incx)
	Performs triangular matrix-vector multiplication.
template<typename T_Scalar>
void	trsv (uplo_t uplo, op_t trans, diag_t diag, int_t n, const T_Scalar a, int_t lda, T_Scalar x, int_t incx)
	Solves a triangular system of equations.
template<typename T_Scalar>
void	gemm (op_t transa, op_t transb, int_t m, int_t n, int_t k, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar b, int_t ldb, const T_Scalar beta, T_Scalar *c, int_t ldc)
	Performs general matrix-matrix multiplication.
template<typename T_Scalar>
void	symm (side_t side, uplo_t uplo, int_t m, int_t n, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar b, int_t ldb, const T_Scalar beta, T_Scalar *c, int_t ldc)
	Performs symmetric matrix-matrix multiplication.
template<typename T_Scalar>
void	hemm (side_t side, uplo_t uplo, int_t m, int_t n, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar b, int_t ldb, const T_Scalar beta, T_Scalar *c, int_t ldc)
	Performs Hermitian matrix-matrix multiplication.
template<typename T_Scalar>
void	trmm (side_t side, uplo_t uplo, op_t trans, diag_t diag, int_t m, int_t n, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar b, int_t ldb, T_Scalar c, int_t ldc)
	Performs triangular matrix-matrix multiplication.
template<typename T_Scalar>
void	trsm (side_t side, uplo_t uplo, op_t trans, diag_t diag, int_t m, int_t n, const T_Scalar alpha, const T_Scalar a, int_t lda, T_Scalar *b, int_t ldb)
	Solves a triangular matrix equation.
template<typename T_Scalar>
void	syrk (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar beta, T_Scalar c, int_t ldc)
	Performs symmetric rank-k update.
template<typename T_Scalar>
void	herk (uplo_t uplo, op_t trans, int_t n, int_t k, const typename TypeTraits< T_Scalar >::real_type alpha, const T_Scalar a, int_t lda, const typename TypeTraits< T_Scalar >::real_type beta, T_Scalar c, int_t ldc)
	Performs Hermitian rank-k update.
template<typename T_Scalar>
void	syrkx (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar b, int_t ldb, const T_Scalar beta, T_Scalar *c, int_t ldc)
	Performs symmetric rank-k update with two matrices.
template<typename T_Scalar>
void	herkx (uplo_t uplo, op_t trans, int_t n, int_t k, const T_Scalar alpha, const T_Scalar a, int_t lda, const T_Scalar b, int_t ldb, const typename TypeTraits< T_Scalar >::real_type beta, T_Scalar *c, int_t ldc)
	Performs Hermitian rank-k update with two matrices.

Detailed Description

The cuBlas handler class.

This class provides a wrapper around the cuBLAS library for performing Basic Linear Algebra Subprograms (BLAS) operations on GPU devices. It manages the cuBLAS handle and provides convenient interfaces for vector and matrix operations.

Constructor & Destructor Documentation

◆ CuBlasHandler()

culite::CuBlasHandler::CuBlasHandler ( )

Constructor.

Initializes the cuBLAS handle and internal state.

◆ ~CuBlasHandler()

culite::CuBlasHandler::~CuBlasHandler ( )

Destructor.

Destroys the cuBLAS handle and releases all allocated resources.

Member Function Documentation

◆ handle()

cublasHandle_t culite::CuBlasHandler::handle ( )

Get the cuBlas handle.

Returns: The cuBlas handle.

◆ setPointerMode()

cublasPointerMode_t culite::CuBlasHandler::setPointerMode ( cublasPointerMode_t mode )

Set the pointer mode for cuBLAS operations.

Changes the pointer mode and returns the previous mode. The pointer mode determines whether scalar values (alpha, beta) are passed by reference on the host or device.

Parameters

[in] mode The new pointer mode to set.

Returns: The pointer mode that was active before this call.

◆ pointerMode()

cublasPointerMode_t culite::CuBlasHandler::pointerMode ( )

Get the current pointer mode.

Returns the current pointer mode setting for cuBLAS operations.

Returns: The current pointer mode.

◆ iamax()

template<typename T_Scalar>

void culite::CuBlasHandler::iamax	(	int_t	n,
		const T_Scalar *	x,
		int_t	incx,
		int_t *	result )

Finds the index of the maximum absolute value element.

Computes the index of the element with the maximum absolute value in vector x.

Template Parameters

T_Scalar The scalar type of the vector elements.

Parameters

[in]	n	The number of elements in the vector.
[in]	x	Pointer to the device vector.
[in]	incx	The stride between consecutive elements of `x`.
[out]	result	Pointer to store the result index (1-based).

◆ asum()

template<typename T_Scalar>

void culite::CuBlasHandler::asum	(	int_t	n,
		const T_Scalar *	x,
		int_t	incx,
		typename TypeTraits< T_Scalar >::real_type *	result )

Computes the sum of absolute values.

Computes \( \sum_{i=1}^{n} |x_i| \) for the elements of vector x.

Template Parameters

T_Scalar The scalar type of the vector elements.

Parameters

[in]	n	The number of elements in the vector.
[in]	x	Pointer to the device vector.
[in]	incx	The stride between consecutive elements of `x`.
[out]	result	Pointer to store the sum of absolute values.

◆ nrm2()

template<typename T_Scalar>

void culite::CuBlasHandler::nrm2	(	int_t	n,
		const T_Scalar *	x,
		int_t	incx,
		typename TypeTraits< T_Scalar >::real_type *	result )

Computes the Euclidean norm (L2 norm).

Computes \( \sqrt{\sum_{i=1}^{n} |x_i|^2} \) for the elements of vector x.

Template Parameters

T_Scalar The scalar type of the vector elements.

Parameters

[in]	n	The number of elements in the vector.
[in]	x	Pointer to the device vector.
[in]	incx	The stride between consecutive elements of `x`.
[out]	result	Pointer to store the Euclidean norm.

◆ copy()

template<typename T_Scalar>

void culite::CuBlasHandler::copy	(	int_t	n,
		const T_Scalar *	x,
		int_t	incx,
		T_Scalar *	y,
		int_t	incy )

Copies a vector to another vector.

Copies vector x to vector y, computing \( y = x \).

Template Parameters

T_Scalar The scalar type of the vector elements.

Parameters

[in]	n	The number of elements in the vectors.
[in]	x	Pointer to the source device vector.
[in]	incx	The stride between consecutive elements of `x`.
[out]	y	Pointer to the destination device vector.
[in]	incy	The stride between consecutive elements of `y`.

◆ scal()

template<typename T_Scalar>

void culite::CuBlasHandler::scal	(	int_t	n,
		const T_Scalar *	alpha,
		T_Scalar *	x,
		int_t	incx )

Scales a vector by a scalar.

Computes \( x = \alpha \cdot x \) for vector x.

Template Parameters

T_Scalar The scalar type of the vector elements.

Parameters

[in]	n	The number of elements in the vector.
[in]	alpha	Pointer to the scalar multiplier.
[in,out]	x	Pointer to the device vector (modified in-place).
[in]	incx	The stride between consecutive elements of `x`.

◆ dot()

template<typename T_Scalar>

void culite::CuBlasHandler::dot	(	int_t	n,
		const T_Scalar *	x,
		int_t	incx,
		const T_Scalar *	y,
		int_t	incy,
		T_Scalar *	result )

Computes the dot product of two vectors.

Computes \( result = x^T \cdot y = \sum_{i=1}^{n} x_i \cdot y_i \).

Template Parameters

T_Scalar The scalar type of the vector elements.

Parameters

[in]	n	The number of elements in the vectors.
[in]	x	Pointer to the first vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in]	y	Pointer to the second vector on device.
[in]	incy	The stride between consecutive elements of `y`.
[out]	result	Pointer to store the dot product result.

◆ dotc()

template<typename T_Scalar>

void culite::CuBlasHandler::dotc	(	int_t	n,
		const T_Scalar *	x,
		int_t	incx,
		const T_Scalar *	y,
		int_t	incy,
		T_Scalar *	result )

Computes the conjugate dot product of two vectors.

Computes \( result = x^H \cdot y = \sum_{i=1}^{n} \overline{x_i} \cdot y_i \), where \( \overline{x_i} \) denotes the complex conjugate of \( x_i \).

Template Parameters

T_Scalar The scalar type of the vector elements.

Parameters

[in]	n	The number of elements in the vectors.
[in]	x	Pointer to the first vector on device (will be conjugated).
[in]	incx	The stride between consecutive elements of `x`.
[in]	y	Pointer to the second vector on device.
[in]	incy	The stride between consecutive elements of `y`.
[out]	result	Pointer to store the conjugate dot product result.

◆ axpy()

template<typename T_Scalar>

void culite::CuBlasHandler::axpy	(	int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	x,
		int_t	incx,
		T_Scalar *	y,
		int_t	incy )

Computes a vector plus scalar times a vector.

Computes \( y = \alpha \cdot x + y \), updating vector y in-place.

Template Parameters

T_Scalar The scalar type of the vector elements.

Parameters

[in]	n	The number of elements in the vectors.
[in]	alpha	Pointer to the scalar multiplier.
[in]	x	Pointer to the input vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in,out]	y	Pointer to the vector on device (modified in-place).
[in]	incy	The stride between consecutive elements of `y`.

◆ geam()

template<typename T_Scalar>

void culite::CuBlasHandler::geam	(	op_t	opA,
		op_t	opB,
		int_t	m,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	beta,
		const T_Scalar *	b,
		int_t	ldb,
		T_Scalar *	c,
		int_t	ldc )

Performs parametrized matrix addition.

Computes \( C = \alpha \cdot op_A(A) + \beta \cdot op_B(B) \), where \( op_A \) and \( op_B \) can independently be no-transpose, transpose, or conjugate transpose operations.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	opA	The operation to apply to matrix `a` (no-transpose, transpose, or conjugate transpose).
[in]	opB	The operation to apply to matrix `b` (no-transpose, transpose, or conjugate transpose).
[in]	m	The number of rows in matrices \( op_A(a) \), \( op_B(b) \), and `c`.
[in]	n	The number of columns in matrices \( op_A(a) \), \( op_B(b) \), and `c`.
[in]	alpha	Pointer to the scalar multiplier for matrix `a`.
[in]	a	Pointer to matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	beta	Pointer to the scalar multiplier for matrix `b`.
[in]	b	Pointer to matrix `b` on device.
[in]	ldb	The leading dimension of matrix `b`.
[out]	c	Pointer to matrix `c` on device (result).
[in]	ldc	The leading dimension of matrix `c`.

◆ dgmm()

template<typename T_Scalar>

void culite::CuBlasHandler::dgmm	(	side_t	side,
		int_t	m,
		int_t	n,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	x,
		int_t	incx,
		T_Scalar *	c,
		int_t	ldc )

Performs matrix-diagonal matrix multiplication.

Computes \( C = A \cdot diag(x) \) if side is cla3p::side_t::Right, or \( C = diag(x) \cdot A \) if side is cla3p::side_t::Left, where \( diag(x) \) is a diagonal matrix constructed from vector x.

Template Parameters

T_Scalar The scalar type of the matrix and vector elements.

Parameters

[in]	side	Specifies whether the diagonal matrix multiplies from the left or right.
[in]	m	The number of rows in matrices `a` and `c`.
[in]	n	The number of columns in matrices `a` and `c`.
[in]	a	Pointer to matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	x	Pointer to the diagonal vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[out]	c	Pointer to matrix `c` on device (result).
[in]	ldc	The leading dimension of matrix `c`.

◆ ger()

template<typename T_Scalar>

void culite::CuBlasHandler::ger	(	int_t	m,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	x,
		int_t	incx,
		const T_Scalar *	y,
		int_t	incy,
		T_Scalar *	a,
		int_t	lda )

Performs general rank-1 update (unconjugated).

Computes \( A = \alpha \cdot x \cdot y^T + A \).

Template Parameters

T_Scalar The scalar type of the vector and matrix elements.

Parameters

[in]	m	The number of rows in matrix `a`.
[in]	n	The number of columns in matrix `a`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	x	Pointer to the first vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in]	y	Pointer to the second vector on device.
[in]	incy	The stride between consecutive elements of `y`.
[in,out]	a	Pointer to matrix `a` on device (modified in-place).
[in]	lda	The leading dimension of matrix `a`.

◆ gerc()

template<typename T_Scalar>

void culite::CuBlasHandler::gerc	(	int_t	m,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	x,
		int_t	incx,
		const T_Scalar *	y,
		int_t	incy,
		T_Scalar *	a,
		int_t	lda )

Performs general rank-1 update (conjugated).

Computes \( A = \alpha \cdot x \cdot y^H + A \), where \( y^H \) denotes the conjugate transpose of vector y.

Template Parameters

T_Scalar The scalar type of the vector and matrix elements.

Parameters

[in]	m	The number of rows in matrix `a`.
[in]	n	The number of columns in matrix `a`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	x	Pointer to the first vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in]	y	Pointer to the second vector on device (will be conjugated).
[in]	incy	The stride between consecutive elements of `y`.
[in,out]	a	Pointer to matrix `a` on device (modified in-place).
[in]	lda	The leading dimension of matrix `a`.

◆ syr()

template<typename T_Scalar>

void culite::CuBlasHandler::syr	(	uplo_t	uplo,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	x,
		int_t	incx,
		T_Scalar *	a,
		int_t	lda )

Performs symmetric rank-1 update.

Computes \( A = \alpha \cdot x \cdot x^T + A \), where \( A \) is symmetric.

Template Parameters

T_Scalar The scalar type of the vector and matrix elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `a` is referenced.
[in]	n	The number of rows and columns in matrix `a`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	x	Pointer to the vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in,out]	a	Pointer to the symmetric matrix `a` on device (modified in-place).
[in]	lda	The leading dimension of matrix `a`.

◆ her()

template<typename T_Scalar>

void culite::CuBlasHandler::her	(	uplo_t	uplo,
		int_t	n,
		const typename TypeTraits< T_Scalar >::real_type *	alpha,
		const T_Scalar *	x,
		int_t	incx,
		T_Scalar *	a,
		int_t	lda )

Performs Hermitian rank-1 update.

Computes \( A = \alpha \cdot x \cdot x^H + A \), where \( A \) is Hermitian.

Template Parameters

T_Scalar The scalar type of the vector and matrix elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `a` is referenced.
[in]	n	The number of rows and columns in matrix `a`.
[in]	alpha	Pointer to the real scalar multiplier.
[in]	x	Pointer to the vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in,out]	a	Pointer to the Hermitian matrix `a` on device (modified in-place).
[in]	lda	The leading dimension of matrix `a`.

◆ gemv()

template<typename T_Scalar>

void culite::CuBlasHandler::gemv	(	op_t	op,
		int_t	m,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	x,
		int_t	incx,
		const T_Scalar *	beta,
		T_Scalar *	y,
		int_t	incy )

Performs general matrix-vector multiplication.

Computes \( y = \alpha \cdot op(A) \cdot x + \beta \cdot y \).

Template Parameters

T_Scalar The scalar type of the matrix and vector elements.

Parameters

[in]	op	The operation to apply to matrix `a` (no-transpose, transpose, or conjugate transpose).
[in]	m	The number of rows in matrix \( op(A) \).
[in]	n	The number of columns in matrix \( op(A) \).
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	x	Pointer to the input vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in]	beta	Pointer to the scalar multiplier for `y`.
[in,out]	y	Pointer to the result vector on device (modified in-place).
[in]	incy	The stride between consecutive elements of `y`.

◆ symv()

template<typename T_Scalar>

void culite::CuBlasHandler::symv	(	uplo_t	uplo,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	x,
		int_t	incx,
		const T_Scalar *	beta,
		T_Scalar *	y,
		int_t	incy )

Performs symmetric matrix-vector multiplication.

Computes \( y = \alpha \cdot A \cdot x + \beta \cdot y \), where \( A \) is symmetric.

Template Parameters

T_Scalar The scalar type of the matrix and vector elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `a` is referenced.
[in]	n	The number of rows and columns in matrix `a`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to the symmetric matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	x	Pointer to the input vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in]	beta	Pointer to the scalar multiplier for `y`.
[in,out]	y	Pointer to the result vector on device (modified in-place).
[in]	incy	The stride between consecutive elements of `y`.

◆ hemv()

template<typename T_Scalar>

void culite::CuBlasHandler::hemv	(	uplo_t	uplo,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	x,
		int_t	incx,
		const T_Scalar *	beta,
		T_Scalar *	y,
		int_t	incy )

Performs Hermitian matrix-vector multiplication.

Computes \( y = \alpha \cdot A \cdot x + \beta \cdot y \), where \( A \) is Hermitian.

Template Parameters

T_Scalar The scalar type of the matrix and vector elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `a` is referenced.
[in]	n	The number of rows and columns in matrix `a`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to the Hermitian matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	x	Pointer to the input vector on device.
[in]	incx	The stride between consecutive elements of `x`.
[in]	beta	Pointer to the scalar multiplier for `y`.
[in,out]	y	Pointer to the result vector on device (modified in-place).
[in]	incy	The stride between consecutive elements of `y`.

◆ trmv()

template<typename T_Scalar>

void culite::CuBlasHandler::trmv	(	uplo_t	uplo,
		op_t	trans,
		diag_t	diag,
		int_t	n,
		const T_Scalar *	a,
		int_t	lda,
		T_Scalar *	x,
		int_t	incx )

Performs triangular matrix-vector multiplication.

Computes \( x = op(A) \cdot x \), where \( A \) is triangular.

Template Parameters

T_Scalar The scalar type of the matrix and vector elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `a` is used.
[in]	trans	The operation to apply to matrix `a` (no-transpose, transpose, or conjugate transpose).
[in]	diag	Specifies whether the matrix is unit triangular or not.
[in]	n	The number of rows and columns in matrix `a`.
[in]	a	Pointer to the triangular matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in,out]	x	Pointer to the vector on device (modified in-place).
[in]	incx	The stride between consecutive elements of `x`.

◆ trsv()

template<typename T_Scalar>

void culite::CuBlasHandler::trsv	(	uplo_t	uplo,
		op_t	trans,
		diag_t	diag,
		int_t	n,
		const T_Scalar *	a,
		int_t	lda,
		T_Scalar *	x,
		int_t	incx )

Solves a triangular system of equations.

Solves \( op(A) \cdot x = b \), where \( A \) is triangular and x contains \( b \) on entry.

Template Parameters

T_Scalar The scalar type of the matrix and vector elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `a` is used.
[in]	trans	The operation to apply to matrix `a` (no-transpose, transpose, or conjugate transpose).
[in]	diag	Specifies whether the matrix is unit triangular or not.
[in]	n	The number of rows and columns in matrix `a`.
[in]	a	Pointer to the triangular matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in,out]	x	Pointer to the vector on device containing \( b \) on entry and the solution on exit.
[in]	incx	The stride between consecutive elements of `x`.

◆ gemm()

template<typename T_Scalar>

void culite::CuBlasHandler::gemm	(	op_t	transa,
		op_t	transb,
		int_t	m,
		int_t	n,
		int_t	k,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	b,
		int_t	ldb,
		const T_Scalar *	beta,
		T_Scalar *	c,
		int_t	ldc )

Performs general matrix-matrix multiplication.

Computes \( C = \alpha \cdot op(A) \cdot op(B) + \beta \cdot C \).

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	transa	The operation to apply to matrix `a` (no-transpose, transpose, or conjugate transpose).
[in]	transb	The operation to apply to matrix `b` (no-transpose, transpose, or conjugate transpose).
[in]	m	The number of rows in matrices \( op(A) \) and `c`.
[in]	n	The number of columns in matrices \( op(B) \) and `c`.
[in]	k	The number of columns in \( op(A) \) and rows in \( op(B) \).
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	b	Pointer to matrix `b` on device.
[in]	ldb	The leading dimension of matrix `b`.
[in]	beta	Pointer to the scalar multiplier for `c`.
[in,out]	c	Pointer to matrix `c` on device (modified in-place).
[in]	ldc	The leading dimension of matrix `c`.

◆ symm()

template<typename T_Scalar>

void culite::CuBlasHandler::symm	(	side_t	side,
		uplo_t	uplo,
		int_t	m,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	b,
		int_t	ldb,
		const T_Scalar *	beta,
		T_Scalar *	c,
		int_t	ldc )

Performs symmetric matrix-matrix multiplication.

Computes \( C = \alpha \cdot A \cdot B + \beta \cdot C \) if side is Left, or \( C = \alpha \cdot B \cdot A + \beta \cdot C \) if side is Right, where \( A \) is symmetric.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	side	Specifies whether the symmetric matrix `a` multiplies from the left or right.
[in]	uplo	Specifies whether the upper or lower triangular part of `a` is referenced.
[in]	m	The number of rows in matrices `b` and `c`.
[in]	n	The number of columns in matrices `b` and `c`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to the symmetric matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	b	Pointer to matrix `b` on device.
[in]	ldb	The leading dimension of matrix `b`.
[in]	beta	Pointer to the scalar multiplier for `c`.
[in,out]	c	Pointer to matrix `c` on device (modified in-place).
[in]	ldc	The leading dimension of matrix `c`.

◆ hemm()

template<typename T_Scalar>

void culite::CuBlasHandler::hemm	(	side_t	side,
		uplo_t	uplo,
		int_t	m,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	b,
		int_t	ldb,
		const T_Scalar *	beta,
		T_Scalar *	c,
		int_t	ldc )

Performs Hermitian matrix-matrix multiplication.

Computes \( C = \alpha \cdot A \cdot B + \beta \cdot C \) if side is Left, or \( C = \alpha \cdot B \cdot A + \beta \cdot C \) if side is Right, where \( A \) is Hermitian.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	side	Specifies whether the Hermitian matrix `a` multiplies from the left or right.
[in]	uplo	Specifies whether the upper or lower triangular part of `a` is referenced.
[in]	m	The number of rows in matrices `b` and `c`.
[in]	n	The number of columns in matrices `b` and `c`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to the Hermitian matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	b	Pointer to matrix `b` on device.
[in]	ldb	The leading dimension of matrix `b`.
[in]	beta	Pointer to the scalar multiplier for `c`.
[in,out]	c	Pointer to matrix `c` on device (modified in-place).
[in]	ldc	The leading dimension of matrix `c`.

◆ trmm()

template<typename T_Scalar>

void culite::CuBlasHandler::trmm	(	side_t	side,
		uplo_t	uplo,
		op_t	trans,
		diag_t	diag,
		int_t	m,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	b,
		int_t	ldb,
		T_Scalar *	c,
		int_t	ldc )

Performs triangular matrix-matrix multiplication.

Computes \( C = \alpha \cdot op(A) \cdot B \) if side is Left, or \( C = \alpha \cdot B \cdot op(A) \) if side is Right, where \( A \) is triangular.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	side	Specifies whether the triangular matrix `a` multiplies from the left or right.
[in]	uplo	Specifies whether the upper or lower triangular part of `a` is used.
[in]	trans	The operation to apply to matrix `a` (no-transpose, transpose, or conjugate transpose).
[in]	diag	Specifies whether the matrix is unit triangular or not.
[in]	m	The number of rows in matrices `b` and `c`.
[in]	n	The number of columns in matrices `b` and `c`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to the triangular matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	b	Pointer to matrix `b` on device.
[in]	ldb	The leading dimension of matrix `b`.
[out]	c	Pointer to matrix `c` on device (result).
[in]	ldc	The leading dimension of matrix `c`.

◆ trsm()

template<typename T_Scalar>

void culite::CuBlasHandler::trsm	(	side_t	side,
		uplo_t	uplo,
		op_t	trans,
		diag_t	diag,
		int_t	m,
		int_t	n,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		T_Scalar *	b,
		int_t	ldb )

Solves a triangular matrix equation.

Solves \( op(A) \cdot X = \alpha \cdot B \) if side is Left, or \( X \cdot op(A) = \alpha \cdot B \) if side is Right, where \( A \) is triangular. The solution \( X \) overwrites b on exit.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	side	Specifies whether the triangular matrix `a` is on the left or right.
[in]	uplo	Specifies whether the upper or lower triangular part of `a` is used.
[in]	trans	The operation to apply to matrix `a` (no-transpose, transpose, or conjugate transpose).
[in]	diag	Specifies whether the matrix is unit triangular or not.
[in]	m	The number of rows in matrix `b`.
[in]	n	The number of columns in matrix `b`.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to the triangular matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in,out]	b	Pointer to matrix `b` on device (contains right-hand side on entry, solution on exit).
[in]	ldb	The leading dimension of matrix `b`.

◆ syrk()

template<typename T_Scalar>

void culite::CuBlasHandler::syrk	(	uplo_t	uplo,
		op_t	trans,
		int_t	n,
		int_t	k,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	beta,
		T_Scalar *	c,
		int_t	ldc )

Performs symmetric rank-k update.

Computes \( C = \alpha \cdot A \cdot A^T + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^T \cdot A + \beta \cdot C \) if trans is Trans, where \( C \) is symmetric.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `c` is updated.
[in]	trans	The operation to apply to matrix `a` (no-transpose or transpose).
[in]	n	The number of rows and columns in matrix `c`.
[in]	k	The inner dimension of the multiplication.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	beta	Pointer to the scalar multiplier for `c`.
[in,out]	c	Pointer to the symmetric matrix `c` on device (modified in-place).
[in]	ldc	The leading dimension of matrix `c`.

◆ herk()

template<typename T_Scalar>

void culite::CuBlasHandler::herk	(	uplo_t	uplo,
		op_t	trans,
		int_t	n,
		int_t	k,
		const typename TypeTraits< T_Scalar >::real_type *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const typename TypeTraits< T_Scalar >::real_type *	beta,
		T_Scalar *	c,
		int_t	ldc )

Performs Hermitian rank-k update.

Computes \( C = \alpha \cdot A \cdot A^H + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^H \cdot A + \beta \cdot C \) if trans is ConjTrans, where \( C \) is Hermitian.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `c` is updated.
[in]	trans	The operation to apply to matrix `a` (no-transpose or conjugate transpose).
[in]	n	The number of rows and columns in matrix `c`.
[in]	k	The inner dimension of the multiplication.
[in]	alpha	Pointer to the real scalar multiplier.
[in]	a	Pointer to matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	beta	Pointer to the real scalar multiplier for `c`.
[in,out]	c	Pointer to the Hermitian matrix `c` on device (modified in-place).
[in]	ldc	The leading dimension of matrix `c`.

◆ syrkx()

template<typename T_Scalar>

void culite::CuBlasHandler::syrkx	(	uplo_t	uplo,
		op_t	trans,
		int_t	n,
		int_t	k,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	b,
		int_t	ldb,
		const T_Scalar *	beta,
		T_Scalar *	c,
		int_t	ldc )

Performs symmetric rank-k update with two matrices.

Computes \( C = \alpha \cdot A \cdot B^T + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^T \cdot B + \beta \cdot C \) if trans is Trans, where \( C \) is symmetric.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `c` is updated.
[in]	trans	The operation to apply to matrices `a` and `b` (no-transpose or transpose).
[in]	n	The number of rows and columns in matrix `c`.
[in]	k	The inner dimension of the multiplication.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	b	Pointer to matrix `b` on device.
[in]	ldb	The leading dimension of matrix `b`.
[in]	beta	Pointer to the scalar multiplier for `c`.
[in,out]	c	Pointer to the symmetric matrix `c` on device (modified in-place).
[in]	ldc	The leading dimension of matrix `c`.

◆ herkx()

template<typename T_Scalar>

void culite::CuBlasHandler::herkx	(	uplo_t	uplo,
		op_t	trans,
		int_t	n,
		int_t	k,
		const T_Scalar *	alpha,
		const T_Scalar *	a,
		int_t	lda,
		const T_Scalar *	b,
		int_t	ldb,
		const typename TypeTraits< T_Scalar >::real_type *	beta,
		T_Scalar *	c,
		int_t	ldc )

Performs Hermitian rank-k update with two matrices.

Computes \( C = \alpha \cdot A \cdot B^H + \beta \cdot C \) if trans is NoTrans, or \( C = \alpha \cdot A^H \cdot B + \beta \cdot C \) if trans is ConjTrans, where \( C \) is Hermitian.

Template Parameters

T_Scalar The scalar type of the matrix elements.

Parameters

[in]	uplo	Specifies whether the upper or lower triangular part of `c` is updated.
[in]	trans	The operation to apply to matrices `a` and `b` (no-transpose or conjugate transpose).
[in]	n	The number of rows and columns in matrix `c`.
[in]	k	The inner dimension of the multiplication.
[in]	alpha	Pointer to the scalar multiplier.
[in]	a	Pointer to matrix `a` on device.
[in]	lda	The leading dimension of matrix `a`.
[in]	b	Pointer to matrix `b` on device.
[in]	ldb	The leading dimension of matrix `b`.
[in]	beta	Pointer to the real scalar multiplier for `c`.
[in,out]	c	Pointer to the Hermitian matrix `c` on device (modified in-place).
[in]	ldc	The leading dimension of matrix `c`.

Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ CuBlasHandler()

◆ ~CuBlasHandler()

Member Function Documentation

◆ handle()

◆ setPointerMode()

◆ pointerMode()

◆ iamax()

◆ asum()

◆ nrm2()

◆ copy()

◆ scal()

◆ dot()

◆ dotc()

◆ axpy()

◆ geam()

◆ dgmm()

◆ ger()

◆ gerc()

◆ syr()

◆ her()

◆ gemv()

◆ symv()

◆ hemv()

◆ trmv()

◆ trsv()

◆ gemm()

◆ symm()

◆ hemm()

◆ trmm()

◆ trsm()

◆ syrk()

◆ herk()

◆ syrkx()

◆ herkx()