#!/usr/bin/env python
"""
Python interface to CUBLAS functions.
Note: this module does not explicitly depend on PyCUDA.
"""
from __future__ import absolute_import
import re
import os
import sys
import warnings
import ctypes
import ctypes.util
import atexit
import numpy as np
from string import Template
from . import cuda
from . import utils
# Load library:
_version_list = [10.0, 9.2, 9.1, 9.0, 8.0, 7.5, 7.0, 6.5, 6.0, 5.5, 5.0, 4.0]
if 'linux' in sys.platform:
_libcublas_libname_list = ['libcublas.so'] + \
['libcublas.so.%s' % v for v in _version_list]
elif sys.platform == 'darwin':
_libcublas_libname_list = ['libcublas.dylib']
elif sys.platform == 'win32':
if sys.maxsize > 2**32:
_libcublas_libname_list = ['cublas.dll'] + \
['cublas64_%s.dll' % int(10*v) for v in _version_list]
else:
_libcublas_libname_list = ['cublas.dll'] + \
['cublas32_%s.dll' % int(10*v) for v in _version_list]
else:
raise RuntimeError('unsupported platform')
# Print understandable error message when library cannot be found:
_libcublas = None
for _libcublas_libname in _libcublas_libname_list:
try:
if sys.platform == 'win32':
_libcublas = ctypes.windll.LoadLibrary(_libcublas_libname)
else:
_libcublas = ctypes.cdll.LoadLibrary(_libcublas_libname)
except OSError:
pass
else:
break
if _libcublas == None:
raise OSError('cublas library not found')
# Generic CUBLAS error:
class cublasError(Exception):
"""CUBLAS error"""
pass
# Exceptions corresponding to different CUBLAS errors:
class cublasNotInitialized(cublasError):
"""CUBLAS library not initialized."""
pass
class cublasAllocFailed(cublasError):
"""Resource allocation failed."""
pass
class cublasInvalidValue(cublasError):
"""Unsupported numerical value was passed to function."""
pass
class cublasArchMismatch(cublasError):
"""Function requires an architectural feature absent from the device."""
pass
class cublasMappingError(cublasError):
"""Access to GPU memory space failed."""
pass
class cublasExecutionFailed(cublasError):
"""GPU program failed to execute."""
pass
class cublasInternalError(cublasError):
"""An internal CUBLAS operation failed."""
pass
class cublasNotSupported(cublasError):
"""Not supported."""
pass
class cublasLicenseError(cublasError):
"""License error."""
pass
cublasExceptions = {
1: cublasNotInitialized,
3: cublasAllocFailed,
7: cublasInvalidValue,
8: cublasArchMismatch,
11: cublasMappingError,
13: cublasExecutionFailed,
14: cublasInternalError,
15: cublasNotSupported,
16: cublasLicenseError
}
_CUBLAS_OP = {
0: 0, # CUBLAS_OP_N
'n': 0,
'N': 0,
1: 1, # CUBLAS_OP_T
't': 1,
'T': 1,
2: 2, # CUBLAS_OP_C
'c': 2,
'C': 2,
}
_CUBLAS_FILL_MODE = {
0: 0, # CUBLAS_FILL_MODE_LOWER
'l': 0,
'L': 0,
1: 1, # CUBLAS_FILL_MODE_UPPER
'u': 1,
'U': 1,
}
_CUBLAS_DIAG = {
0: 0, # CUBLAS_DIAG_NON_UNIT,
'n': 0,
'N': 0,
1: 1, # CUBLAS_DIAG_UNIT
'u': 1,
'U': 1,
}
_CUBLAS_SIDE_MODE = {
0: 0, # CUBLAS_SIDE_LEFT
'l': 0,
'L': 0,
1: 1, # CUBLAS_SIDE_RIGHT
'r': 1,
'R': 1
}
class _types:
"""Some alias types."""
handle = ctypes.c_void_p
stream = ctypes.c_void_p
[docs]def cublasCheckStatus(status):
"""
Raise CUBLAS exception
Raise an exception corresponding to the specified CUBLAS error
code.
Parameters
----------
status : int
CUBLAS error code.
See Also
--------
cublasExceptions
"""
if status != 0:
try:
e = cublasExceptions[status]
except KeyError:
raise cublasError
else:
raise e
# Helper functions:
_libcublas.cublasCreate_v2.restype = int
_libcublas.cublasCreate_v2.argtypes = [_types.handle]
[docs]def cublasCreate():
"""
Initialize CUBLAS.
Initializes CUBLAS and creates a handle to a structure holding
the CUBLAS library context.
Returns
-------
handle : int
CUBLAS context.
References
----------
`cublasCreate <http://docs.nvidia.com/cuda/cublas/#cublascreate>`_
"""
handle = _types.handle()
status = _libcublas.cublasCreate_v2(ctypes.byref(handle))
cublasCheckStatus(status)
return handle.value
_libcublas.cublasDestroy_v2.restype = int
_libcublas.cublasDestroy_v2.argtypes = [_types.handle]
[docs]def cublasDestroy(handle):
"""
Release CUBLAS resources.
Releases hardware resources used by CUBLAS.
Parameters
----------
handle : int
CUBLAS context.
References
----------
`cublasDestroy <http://docs.nvidia.com/cuda/cublas/#cublasdestroy>`_
"""
status = _libcublas.cublasDestroy_v2(handle)
cublasCheckStatus(status)
_libcublas.cublasGetVersion_v2.restype = int
_libcublas.cublasGetVersion_v2.argtypes = [_types.handle,
ctypes.c_void_p]
[docs]def cublasGetVersion(handle):
"""
Get CUBLAS version.
Returns version number of installed CUBLAS libraries.
Parameters
----------
handle : int
CUBLAS context.
Returns
-------
version : int
CUBLAS version.
References
----------
`cublasGetVersion <http://docs.nvidia.com/cuda/cublas/#cublasgetversion>`_
"""
version = ctypes.c_int()
status = _libcublas.cublasGetVersion_v2(handle, ctypes.byref(version))
cublasCheckStatus(status)
return version.value
def _get_cublas_version():
"""
Get and save CUBLAS version using the CUBLAS library's SONAME.
This function tries to avoid calling cublasGetVersion because creating a
CUBLAS context can subtly affect the performance of subsequent
CUDA operations in certain circumstances.
Results
-------
version : str
Zeros are appended to match format of version returned
by cublasGetVersion() (e.g., '6050' corresponds to version 6.5).
Notes
-----
Since the version number does not appear to be obtainable from the
MacOSX CUBLAS library, this function must call cublasGetVersion() on
MacOSX (but raises a warning to let the user know).
"""
cublas_path = utils.find_lib_path('cublas')
try:
major, minor = re.search(r'[\D\.]+\.+(\d+)\.(\d+)',
utils.get_soname(cublas_path)).groups()
except:
# Create a temporary context to run cublasGetVersion():
warnings.warn('creating CUBLAS context to get version number')
h = cublasCreate()
version = cublasGetVersion(h)
cublasDestroy(h)
return str(version)
else:
return major.ljust(len(major)+1, '0')+minor.ljust(2, '0')
_cublas_version = int(_get_cublas_version())
class _cublas_version_req(object):
"""
Decorator to replace function with a placeholder that raises an exception
if the installed CUBLAS version is not greater than `v`.
"""
def __init__(self, v):
self.vs = str(v)
if isinstance(v, int):
major = str(v)
minor = '0'
else:
major, minor = re.search(r'(\d+)\.(\d+)', self.vs).groups()
self.vi = major.ljust(len(major)+1, '0')+minor.ljust(2, '0')
def __call__(self,f):
def f_new(*args,**kwargs):
raise NotImplementedError('CUBLAS '+self.vs+' required')
f_new.__doc__ = f.__doc__
if _cublas_version >= int(self.vi):
return f
else:
return f_new
_libcublas.cublasSetStream_v2.restype = int
_libcublas.cublasSetStream_v2.argtypes = [_types.handle,
_types.stream]
[docs]def cublasSetStream(handle, id):
"""
Set current CUBLAS library stream.
Parameters
----------
handle : id
CUBLAS context.
id : int
Stream ID.
References
----------
`cublasSetStream <http://docs.nvidia.com/cuda/cublas/#cublassetstream>`_
"""
status = _libcublas.cublasSetStream_v2(handle, id)
cublasCheckStatus(status)
_libcublas.cublasGetStream_v2.restype = int
_libcublas.cublasGetStream_v2.argtypes = [_types.handle,
ctypes.c_void_p]
[docs]def cublasGetStream(handle):
"""
Set current CUBLAS library stream.
Parameters
----------
handle : int
CUBLAS context.
Returns
-------
id : int
Stream ID.
References
----------
`cublasGetStream <http://docs.nvidia.com/cuda/cublas/#cublasgetstream>`_
"""
id = _types.stream()
status = _libcublas.cublasGetStream_v2(handle, ctypes.byref(id))
cublasCheckStatus(status)
return id.value
try:
_libcublas.cublasGetCurrentCtx.restype = int
except AttributeError:
def cublasGetCurrentCtx():
raise NotImplementedError(
'cublasGetCurrentCtx() not found; CULA CUBLAS library probably\n'
'precedes NVIDIA CUBLAS library in library search path')
else:
[docs] def cublasGetCurrentCtx():
return _libcublas.cublasGetCurrentCtx()
cublasGetCurrentCtx.__doc__ = """
Get current CUBLAS context.
Returns the current context used by CUBLAS.
Returns
-------
handle : int
CUBLAS context.
"""
### BLAS Level 1 Functions ###
# ISAMAX, IDAMAX, ICAMAX, IZAMAX
I_AMAX_doc = Template(
"""
Index of maximum magnitude element.
Finds the smallest index of the maximum magnitude element of a
${precision} ${real} vector.
Note: for complex arguments x, the "magnitude" is defined as
`abs(x.real) + abs(x.imag)`, *not* as `abs(x)`.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vector.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input vector.
incx : int
Storage spacing between elements of `x`.
Returns
-------
idx : int
Index of maximum magnitude element.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> x = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> h = cublasCreate()
>>> m = ${func}(h, x_gpu.size, x_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> np.allclose(m, np.argmax(abs(x.real) + abs(x.imag)))
True
Notes
-----
This function returns a 0-based index.
References
----------
`cublasI<t>amax <http://docs.nvidia.com/cuda/cublas/#cublasi-lt-t-gt-amax>`_
""")
_libcublas.cublasIsamax_v2.restype = int
_libcublas.cublasIsamax_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasIsamax(handle, n, x, incx):
result = ctypes.c_int()
status = \
_libcublas.cublasIsamax_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return result.value-1
cublasIsamax.__doc__ = \
I_AMAX_doc.substitute(precision='single precision',
real='real',
data='np.random.rand(5).astype(np.float32)',
func='cublasIsamax')
_libcublas.cublasIdamax_v2.restype = int
_libcublas.cublasIdamax_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasIdamax(handle, n, x, incx):
result = ctypes.c_int()
status = \
_libcublas.cublasIdamax_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return result.value-1
cublasIdamax.__doc__ = \
I_AMAX_doc.substitute(precision='double precision',
real='real',
data='np.random.rand(5).astype(np.float64)',
func='cublasIdamax')
_libcublas.cublasIcamax_v2.restype = int
_libcublas.cublasIcamax_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasIcamax(handle, n, x, incx):
result = ctypes.c_int()
status = \
_libcublas.cublasIcamax_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return result.value-1
cublasIcamax.__doc__ = \
I_AMAX_doc.substitute(precision='single precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasIcamax')
_libcublas.cublasIzamax_v2.restype = int
_libcublas.cublasIzamax_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasIzamax(handle, n, x, incx):
result = ctypes.c_int()
status = \
_libcublas.cublasIzamax_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return result.value-1
cublasIzamax.__doc__ = \
I_AMAX_doc.substitute(precision='double precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasIzamax')
# ISAMIN, IDAMIN, ICAMIN, IZAMIN
I_AMIN_doc = Template(
"""
Index of minimum magnitude element (${precision} ${real}).
Finds the smallest index of the minimum magnitude element of a
${precision} ${real} vector.
Note: for complex arguments x, the "magnitude" is defined as
`abs(x.real) + abs(x.imag)`, *not* as `abs(x)`.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vector.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input vector.
incx : int
Storage spacing between elements of `x`.
Returns
-------
idx : int
Index of minimum magnitude element.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> x = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> h = cublasCreate()
>>> m = ${func}(h, x_gpu.size, x_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> np.allclose(m, np.argmin(abs(x.real) + abs(x.imag)))
True
Notes
-----
This function returns a 0-based index.
References
----------
`cublasI<t>amin <http://docs.nvidia.com/cuda/cublas/#cublasi-lt-t-gt-amin>`_
"""
)
_libcublas.cublasIsamin_v2.restype = int
_libcublas.cublasIsamin_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasIsamin(handle, n, x, incx):
result = ctypes.c_int()
status = \
_libcublas.cublasIsamin_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return result.value-1
cublasIsamin.__doc__ = \
I_AMIN_doc.substitute(precision='single precision',
real='real',
data='np.random.rand(5).astype(np.float32)',
func='cublasIsamin')
_libcublas.cublasIdamin_v2.restype = int
_libcublas.cublasIdamin_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasIdamin(handle, n, x, incx):
result = ctypes.c_int()
status = \
_libcublas.cublasIdamin_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return result.value-1
cublasIdamin.__doc__ = \
I_AMIN_doc.substitute(precision='double precision',
real='real',
data='np.random.rand(5).astype(np.float64)',
func='cublasIdamin')
_libcublas.cublasIcamin_v2.restype = int
_libcublas.cublasIcamin_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasIcamin(handle, n, x, incx):
result = ctypes.c_int()
status = \
_libcublas.cublasIcamin_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return result.value-1
cublasIcamin.__doc__ = \
I_AMIN_doc.substitute(precision='single precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasIcamin')
_libcublas.cublasIzamin_v2.restype = int
_libcublas.cublasIzamin_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasIzamin(handle, n, x, incx):
result = ctypes.c_int()
status = \
_libcublas.cublasIzamin_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return result.value-1
cublasIzamin.__doc__ = \
I_AMIN_doc.substitute(precision='double precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasIzamin')
# SASUM, DASUM, SCASUM, DZASUM
_ASUM_doc = Template(
"""
Sum of absolute values of ${precision} ${real} vector.
Computes the sum of the absolute values of the elements of a
${precision} ${real} vector.
Note: if the vector is complex, then this computes the sum
`sum(abs(x.real)) + sum(abs(x.imag))`
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vector.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input vector.
incx : int
Storage spacing between elements of `x`.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> x = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> h = cublasCreate()
>>> s = ${func}(h, x_gpu.size, x_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> np.allclose(s, abs(x.real).sum() + abs(x.imag).sum())
True
Returns
-------
s : ${ret_type}
Sum of absolute values.
References
----------
`cublas<t>sum <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-asum>`_
"""
)
_libcublas.cublasSasum_v2.restype = int
_libcublas.cublasSasum_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasSasum(handle, n, x, incx):
result = ctypes.c_float()
status = _libcublas.cublasSasum_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return np.float32(result.value)
cublasSasum.__doc__ = \
_ASUM_doc.substitute(precision='single precision',
real='real',
data='np.random.rand(5).astype(np.float32)',
func='cublasSasum',
ret_type='numpy.float32')
_libcublas.cublasDasum_v2.restype = int
_libcublas.cublasDasum_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasDasum(handle, n, x, incx):
result = ctypes.c_double()
status = _libcublas.cublasDasum_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return np.float64(result.value)
cublasDasum.__doc__ = \
_ASUM_doc.substitute(precision='double precision',
real='real',
data='np.random.rand(5).astype(np.float64)',
func='cublasDasum',
ret_type='numpy.float64')
_libcublas.cublasScasum_v2.restype = int
_libcublas.cublasScasum_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasScasum(handle, n, x, incx):
result = ctypes.c_float()
status = _libcublas.cublasScasum_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return np.float32(result.value)
cublasScasum.__doc__ = \
_ASUM_doc.substitute(precision='single precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasScasum',
ret_type='numpy.float32')
_libcublas.cublasDzasum_v2.restype = int
_libcublas.cublasDzasum_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasDzasum(handle, n, x, incx):
result = ctypes.c_double()
status = _libcublas.cublasDzasum_v2(handle,
n, int(x), incx, ctypes.byref(result))
cublasCheckStatus(status)
return np.float64(result.value)
cublasDzasum.__doc__ = \
_ASUM_doc.substitute(precision='double precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasDzasum',
ret_type='numpy.float64')
# SAXPY, DAXPY, CAXPY, ZAXPY
_AXPY_doc = Template(
"""
Vector addition (${precision} ${real}).
Computes the sum of a ${precision} ${real} vector scaled by a
${precision} ${real} scalar and another ${precision} ${real} vector.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vectors.
alpha : ${type}
Scalar.
x : ctypes.c_void_p
Pointer to single precision input vector.
incx : int
Storage spacing between elements of `x`.
y : ctypes.c_void_p
Pointer to single precision input/output vector.
incy : int
Storage spacing between elements of `y`.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> alpha = ${alpha}
>>> x = ${data}
>>> y = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> y_gpu = gpuarray.to_gpu(y)
>>> h = cublasCreate()
>>> ${func}(h, x_gpu.size, alpha, x_gpu.gpudata, 1, y_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> np.allclose(y_gpu.get(), alpha*x+y)
True
Notes
-----
Both `x` and `y` must contain `n` elements.
References
----------
`cublas<t>axpy <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-axpy>`_
"""
)
_libcublas.cublasSaxpy_v2.restype = int
_libcublas.cublasSaxpy_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSaxpy(handle, n, alpha, x, incx, y, incy):
status = _libcublas.cublasSaxpy_v2(handle,
n, ctypes.byref(ctypes.c_float(alpha)),
int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasSaxpy.__doc__ = \
_AXPY_doc.substitute(precision='single precision',
real='real',
type='numpy.float32',
alpha='np.float32(np.random.rand())',
data='np.random.rand(5).astype(np.float32)',
func='cublasSaxpy')
_libcublas.cublasDaxpy_v2.restype = int
_libcublas.cublasDaxpy_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDaxpy(handle, n, alpha, x, incx, y, incy):
status = _libcublas.cublasDaxpy_v2(handle,
n, ctypes.byref(ctypes.c_double(alpha)),
int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasDaxpy.__doc__ = \
_AXPY_doc.substitute(precision='double precision',
real='real',
type='numpy.float64',
alpha='np.float64(np.random.rand())',
data='np.random.rand(5).astype(np.float64)',
func='cublasDaxpy')
_libcublas.cublasCaxpy_v2.restype = int
_libcublas.cublasCaxpy_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCaxpy(handle, n, alpha, x, incx, y, incy):
status = _libcublas.cublasCaxpy_v2(handle, n,
ctypes.byref(cuda.cuFloatComplex(alpha.real, alpha.imag)),
int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasCaxpy.__doc__ = \
_AXPY_doc.substitute(precision='single precision',
real='complex',
type='numpy.complex64',
alpha='np.complex64(np.random.rand()+1j*np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasCaxpy')
_libcublas.cublasZaxpy_v2.restype = int
_libcublas.cublasZaxpy_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZaxpy(handle, n, alpha, x, incx, y, incy):
status = _libcublas.cublasZaxpy_v2(handle, n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real, alpha.imag)),
int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasZaxpy.__doc__ = \
_AXPY_doc.substitute(precision='double precision',
real='complex',
type='numpy.complex128',
alpha='np.complex128(np.random.rand()+1j*np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasZaxpy')
# SCOPY, DCOPY, CCOPY, ZCOPY
_COPY_doc = Template(
"""
Vector copy (${precision} ${real})
Copies a ${precision} ${real} vector to another ${precision} ${real}
vector.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vectors.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input vector.
incx : int
Storage spacing between elements of `x`.
y : ctypes.c_void_p
Pointer to ${precision} ${real} output vector.
incy : int
Storage spacing between elements of `y`.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> x = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> y_gpu = gpuarray.zeros_like(x_gpu)
>>> h = cublasCreate()
>>> ${func}(h, x_gpu.size, x_gpu.gpudata, 1, y_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> np.allclose(y_gpu.get(), x_gpu.get())
True
Notes
-----
Both `x` and `y` must contain `n` elements.
References
----------
`cublas<t>copy <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-copy>`_
""")
_libcublas.cublasScopy_v2.restype = int
_libcublas.cublasScopy_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasScopy(handle, n, x, incx, y, incy):
status = _libcublas.cublasScopy_v2(handle,
n, int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasScopy.__doc__ = \
_COPY_doc.substitute(precision='single precision',
real='real',
data='np.random.rand(5).astype(np.float32)',
func='cublasScopy')
_libcublas.cublasDcopy_v2.restype = int
_libcublas.cublasDcopy_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDcopy(handle, n, x, incx, y, incy):
status = _libcublas.cublasDcopy_v2(handle,
n, int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasDcopy.__doc__ = \
_COPY_doc.substitute(precision='double precision',
real='real',
data='np.random.rand(5).astype(np.float64)',
func='cublasDcopy')
_libcublas.cublasCcopy_v2.restype = int
_libcublas.cublasCcopy_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCcopy(handle, n, x, incx, y, incy):
status = _libcublas.cublasCcopy_v2(handle,
n, int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasCcopy.__doc__ = \
_COPY_doc.substitute(precision='single precision',
real='complex',
data='(np.random.rand(5)+np.random.rand(5)).astype(np.complex64)',
func='cublasCcopy')
_libcublas.cublasZcopy_v2.restype = int
_libcublas.cublasZcopy_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZcopy(handle, n, x, incx, y, incy):
status = _libcublas.cublasZcopy_v2(handle,
n, int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasZcopy.__doc__ = \
_COPY_doc.substitute(precision='double precision',
real='complex',
data='(np.random.rand(5)+np.random.rand(5)).astype(np.complex128)',
func='cublasZcopy')
# SDOT, DDOT, CDOTU, CDOTC, ZDOTU, ZDOTC
_DOT_doc = Template(
"""
Vector dot product (${precision} ${real})
Computes the dot product of two ${precision} ${real} vectors.
cublasCdotc and cublasZdotc use the conjugate of the first vector
when computing the dot product.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vectors.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input vector.
incx : int
Storage spacing between elements of `x`.
y : ctypes.c_void_p
Pointer to ${precision} ${real} input/output vector.
incy : int
Storage spacing between elements of `y`.
Returns
-------
d : ${ret_type}
Dot product of `x` and `y`.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> x = ${data}
>>> y = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> y_gpu = gpuarray.to_gpu(y)
>>> h = cublasCreate()
>>> d = ${func}(h, x_gpu.size, x_gpu.gpudata, 1, y_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> ${check}
True
Notes
-----
Both `x` and `y` must contain `n` elements.
References
----------
`cublas<t>dot <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-dot>`_
""")
_libcublas.cublasSdot_v2.restype = int
_libcublas.cublasSdot_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasSdot(handle, n, x, incx, y, incy):
result = ctypes.c_float()
status = _libcublas.cublasSdot_v2(handle, n,
int(x), incx, int(y), incy,
ctypes.byref(result))
cublasCheckStatus(status)
return np.float32(result.value)
cublasSdot.__doc__ = _DOT_doc.substitute(precision='single precision',
real='real',
data='np.float32(np.random.rand(5))',
ret_type='np.float32',
func='cublasSdot',
check='np.allclose(d, np.dot(x, y))')
_libcublas.cublasDdot_v2.restype = int
_libcublas.cublasDdot_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasDdot(handle, n, x, incx, y, incy):
result = ctypes.c_double()
status = _libcublas.cublasDdot_v2(handle, n,
int(x), incx, int(y), incy,
ctypes.byref(result))
cublasCheckStatus(status)
return np.float64(result.value)
cublasDdot.__doc__ = _DOT_doc.substitute(precision='double precision',
real='real',
data='np.float64(np.random.rand(5))',
ret_type='np.float64',
func='cublasDdot',
check='np.allclose(d, np.dot(x, y))')
_libcublas.cublasCdotu_v2.restype = int
_libcublas.cublasCdotu_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasCdotu(handle, n, x, incx, y, incy):
result = cuda.cuFloatComplex()
status = _libcublas.cublasCdotu_v2(handle, n,
int(x), incx, int(y), incy,
ctypes.byref(result))
cublasCheckStatus(status)
return np.complex64(result.value)
cublasCdotu.__doc__ = _DOT_doc.substitute(precision='single precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
ret_type='np.complex64',
func='cublasCdotu',
check='np.allclose(d, np.dot(x, y))')
_libcublas.cublasCdotc_v2.restype = int
_libcublas.cublasCdotc_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasCdotc(handle, n, x, incx, y, incy):
result = cuda.cuFloatComplex()
status = _libcublas.cublasCdotc_v2(handle, n,
int(x), incx, int(y), incy,
ctypes.byref(result))
cublasCheckStatus(status)
return np.complex64(result.value)
cublasCdotc.__doc__ = _DOT_doc.substitute(precision='single precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
ret_type='np.complex64',
func='cublasCdotc',
check='np.allclose(d, np.dot(np.conj(x), y))')
_libcublas.cublasZdotu_v2.restype = int
_libcublas.cublasZdotu_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasZdotu(handle, n, x, incx, y, incy):
result = cuda.cuDoubleComplex()
status = _libcublas.cublasZdotu_v2(handle, n,
int(x), incx, int(y), incy,
ctypes.byref(result))
cublasCheckStatus(status)
return np.complex128(result.value)
cublasZdotu.__doc__ = _DOT_doc.substitute(precision='double precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
ret_type='np.complex128',
func='cublasZdotu',
check='np.allclose(d, np.dot(x, y))')
_libcublas.cublasZdotc_v2.restype = int
_libcublas.cublasZdotc_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasZdotc(handle, n, x, incx, y, incy):
result = cuda.cuDoubleComplex()
status = _libcublas.cublasZdotc_v2(handle, n,
int(x), incx, int(y), incy,
ctypes.byref(result))
cublasCheckStatus(status)
return np.complex128(result.value)
cublasZdotc.__doc__ = _DOT_doc.substitute(precision='double precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
ret_type='np.complex128',
func='cublasZdotc',
check='np.allclose(d, np.dot(np.conj(x), y))')
# SNRM2, DNRM2, SCNRM2, DZNRM2
_NRM2_doc = Template(
"""
Euclidean norm (2-norm) of real vector.
Computes the Euclidean norm of a ${precision} ${real} vector.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vectors.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input vector.
incx : int
Storage spacing between elements of `x`.
Returns
-------
nrm : ${ret_type}
Euclidean norm of `x`.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> x = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> h = cublasCreate()
>>> nrm = ${func}(h, x.size, x_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> np.allclose(nrm, np.linalg.norm(x))
True
References
----------
`cublas<t>nrm2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-nrm2>`_
""")
_libcublas.cublasSnrm2_v2.restype = int
_libcublas.cublasSnrm2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasSnrm2(handle, n, x, incx):
result = ctypes.c_float()
status = _libcublas.cublasSnrm2_v2(handle,
n, int(x), incx,
ctypes.byref(result))
cublasCheckStatus(status)
return np.float32(result.value)
cublasSnrm2.__doc__ = \
_NRM2_doc.substitute(precision='single precision',
real='real',
data='np.float32(np.random.rand(5))',
ret_type = 'numpy.float32',
func='cublasSnrm2')
_libcublas.cublasDnrm2_v2.restype = int
_libcublas.cublasDnrm2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasDnrm2(handle, n, x, incx):
result = ctypes.c_double()
status = _libcublas.cublasDnrm2_v2(handle,
n, int(x), incx,
ctypes.byref(result))
cublasCheckStatus(status)
return np.float64(result.value)
cublasDnrm2.__doc__ = \
_NRM2_doc.substitute(precision='double precision',
real='real',
data='np.float64(np.random.rand(5))',
ret_type = 'numpy.float64',
func='cublasDnrm2')
_libcublas.cublasScnrm2_v2.restype = int
_libcublas.cublasScnrm2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasScnrm2(handle, n, x, incx):
result = ctypes.c_float()
status = _libcublas.cublasScnrm2_v2(handle,
n, int(x), incx,
ctypes.byref(result))
cublasCheckStatus(status)
return np.float32(result.value)
cublasScnrm2.__doc__ = \
_NRM2_doc.substitute(precision='single precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
ret_type = 'numpy.complex64',
func='cublasScnrm2')
_libcublas.cublasDznrm2_v2.restype = int
_libcublas.cublasDznrm2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasDznrm2(handle, n, x, incx):
result = ctypes.c_double()
status = _libcublas.cublasDznrm2_v2(handle,
n, int(x), incx,
ctypes.byref(result))
cublasCheckStatus(status)
return np.float64(result.value)
cublasDznrm2.__doc__ = \
_NRM2_doc.substitute(precision='double precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
ret_type = 'numpy.complex128',
func='cublasDznrm2')
# SROT, DROT, CROT, CSROT, ZROT, ZDROT
_ROT_doc = Template(
"""
Apply a ${real} rotation to ${real} vectors (${precision})
Multiplies the ${precision} matrix `[[c, s], [-s.conj(), c]]`
with the 2 x `n` ${precision} matrix `[[x.T], [y.T]]`.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vectors.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input/output vector.
incx : int
Storage spacing between elements of `x`.
y : ctypes.c_void_p
Pointer to ${precision} ${real} input/output vector.
incy : int
Storage spacing between elements of `y`.
c : ${c_type}
Element of rotation matrix.
s : ${s_type}
Element of rotation matrix.
Notes
-----
Both `x` and `y` must contain `n` elements.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> s = ${s_val}; c = ${c_val};
>>> x = ${data}
>>> y = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> y_gpu = gpuarray.to_gpu(y)
>>> h = cublasCreate()
>>> ${func}(h, x.size, x_gpu.gpudata, 1, y_gpu.gpudata, 1, c, s)
>>> cublasDestroy(h)
>>> np.allclose(x_gpu.get(), c*x+s*y)
True
>>> np.allclose(y_gpu.get(), -s.conj()*x+c*y)
True
References
----------
`cublas<t>rot <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-rot>`_
""")
_libcublas.cublasSrot_v2.restype = int
_libcublas.cublasSrot_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasSrot(handle, n, x, incx, y, incy, c, s):
status = _libcublas.cublasSrot_v2(handle,
n, int(x), incx,
int(y), incy,
ctypes.byref(ctypes.c_float(c)),
ctypes.byref(ctypes.c_float(s)))
cublasCheckStatus(status)
cublasSrot.__doc__ = _ROT_doc.substitute(precision='single precision',
real='real',
c_type='numpy.float32',
s_type='numpy.float32',
c_val='np.float32(np.random.rand())',
s_val='np.float32(np.random.rand())',
data='np.random.rand(5).astype(np.float32)',
func='cublasSrot')
_libcublas.cublasDrot_v2.restype = int
_libcublas.cublasDrot_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasDrot(handle, n, x, incx, y, incy, c, s):
status = _libcublas.cublasDrot_v2(handle,
n, int(x),
incx, int(y), incy,
ctypes.byref(ctypes.c_double(c)),
ctypes.byref(ctypes.c_double(s)))
cublasCheckStatus(status)
cublasDrot.__doc__ = _ROT_doc.substitute(precision='double precision',
real='real',
c_type='numpy.float64',
s_type='numpy.float64',
c_val='np.float64(np.random.rand())',
s_val='np.float64(np.random.rand())',
data='np.random.rand(5).astype(np.float64)',
func='cublasDrot')
_libcublas.cublasCrot_v2.restype = int
_libcublas.cublasCrot_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasCrot(handle, n, x, incx, y, incy, c, s):
status = _libcublas.cublasCrot_v2(handle,
n, int(x),
incx, int(y), incy,
ctypes.byref(ctypes.c_float(c)),
ctypes.byref(cuda.cuFloatComplex(s.real,
s.imag)))
cublasCheckStatus(status)
cublasCrot.__doc__ = _ROT_doc.substitute(precision='single precision',
real='complex',
c_type='numpy.float32',
s_type='numpy.complex64',
c_val='np.float32(np.random.rand())',
s_val='np.complex64(np.random.rand()+1j*np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasCrot')
_libcublas.cublasCsrot_v2.restype = int
_libcublas.cublasCsrot_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasCsrot(handle, n, x, incx, y, incy, c, s):
status = _libcublas.cublasCsrot_v2(handle,
n, int(x),
incx, int(y), incy,
ctypes.byref(ctypes.c_float(c)),
ctypes.byref(ctypes.c_float(s)))
cublasCheckStatus(status)
cublasCsrot.__doc__ = _ROT_doc.substitute(precision='single precision',
real='complex',
c_type='numpy.float32',
s_type='numpy.float32',
c_val='np.float32(np.random.rand())',
s_val='np.float32(np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasCsrot')
_libcublas.cublasZrot_v2.restype = int
_libcublas.cublasZrot_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasZrot(handle, n, x, incx, y, incy, c, s):
status = _libcublas.cublasZrot_v2(handle,
n, int(x),
incx, int(y), incy,
ctypes.byref(ctypes.c_double(c)),
ctypes.byref(cuda.cuDoubleComplex(s.real,
s.imag)))
cublasCheckStatus(status)
cublasZrot.__doc__ = _ROT_doc.substitute(precision='double precision',
real='complex',
c_type='numpy.float64',
s_type='numpy.complex128',
c_val='np.float64(np.random.rand())',
s_val='np.complex128(np.random.rand()+1j*np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasZrot')
_libcublas.cublasZdrot_v2.restype = int
_libcublas.cublasZdrot_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasZdrot(handle, n, x, incx, y, incy, c, s):
status = _libcublas.cublasZdrot_v2(handle,
n, int(x),
incx, int(y), incy,
ctypes.byref(ctypes.c_double(c)),
ctypes.byref(ctypes.c_double(s)))
cublasCheckStatus(status)
cublasZdrot.__doc__ = _ROT_doc.substitute(precision='double precision',
real='complex',
c_type='numpy.float64',
s_type='numpy.float64',
c_val='np.float64(np.random.rand())',
s_val='np.float64(np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasZdrot')
# SROTG, DROTG, CROTG, ZROTG
_ROTG_doc = Template(
"""
Construct a ${precision} ${real} Givens rotation matrix.
Constructs the ${precision} ${real} Givens rotation matrix
`G = [[c, s], [-s.conj(), c]]` such that
`dot(G, [[a], [b]] == [[r], [0]]`, where
`c**2+s**2 == 1` and `r == a**2+b**2` for real numbers and
`c**2+(conj(s)*s) == 1` and `r ==
(a/abs(a))*sqrt(abs(a)**2+abs(b)**2)` for `a != 0` and `r == b`
for `a == 0`.
Parameters
----------
handle : int
CUBLAS context.
a, b : ${type}
Entries of vector whose second entry should be zeroed
out by the rotation.
Returns
-------
r : ${type}
Defined above.
c : ${c_type}
Cosine component of rotation matrix.
s : ${s_type}
Sine component of rotation matrix.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> a = ${a_val}
>>> b = ${b_val}
>>> h = cublasCreate()
>>> r, c, s = ${func}(h, a, b)
>>> cublasDestroy(h)
>>> np.allclose(np.dot(np.array([[c, s], [-np.conj(s), c]]), np.array([[a], [b]])), np.array([[r], [0.0]]), atol=1e-6)
True
References
----------
`cublas<t>rotg <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-rotg>`_
""")
_libcublas.cublasSrotg_v2.restype = int
_libcublas.cublasSrotg_v2.argtypes = [_types.handle,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasSrotg(handle, a, b):
_a = ctypes.c_float(a)
_b = ctypes.c_float(b)
_c = ctypes.c_float()
_s = ctypes.c_float()
status = _libcublas.cublasSrotg_v2(handle,
ctypes.byref(_a), ctypes.byref(_b),
ctypes.byref(_c), ctypes.byref(_s))
cublasCheckStatus(status)
return np.float32(_a.value), np.float32(_c.value), np.float32(_s.value)
cublasSrotg.__doc__ = \
_ROTG_doc.substitute(precision='single precision',
real='real',
type='numpy.float32',
c_type='numpy.float32',
s_type='numpy.float32',
a_val='np.float32(np.random.rand())',
b_val='np.float32(np.random.rand())',
func='cublasSrotg')
_libcublas.cublasDrotg_v2.restype = int
_libcublas.cublasDrotg_v2.argtypes = [_types.handle,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasDrotg(handle, a, b):
_a = ctypes.c_double(a)
_b = ctypes.c_double(b)
_c = ctypes.c_double()
_s = ctypes.c_double()
status = _libcublas.cublasDrotg_v2(handle,
ctypes.byref(_a), ctypes.byref(_b),
ctypes.byref(_c), ctypes.byref(_s))
cublasCheckStatus(status)
return np.float64(_a.value), np.float64(_c.value), np.float64(_s.value)
cublasDrotg.__doc__ = \
_ROTG_doc.substitute(precision='double precision',
real='real',
type='numpy.float64',
c_type='numpy.float64',
s_type='numpy.float64',
a_val='np.float64(np.random.rand())',
b_val='np.float64(np.random.rand())',
func='cublasDrotg')
_libcublas.cublasCrotg_v2.restype = int
_libcublas.cublasCrotg_v2.argtypes = [_types.handle,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasCrotg(handle, a, b):
_a = cuda.cuFloatComplex(a.real, a.imag)
_b = cuda.cuFloatComplex(b.real, b.imag)
_c = ctypes.c_float()
_s = cuda.cuFloatComplex()
status = _libcublas.cublasCrotg_v2(handle,
ctypes.byref(_a), ctypes.byref(_b),
ctypes.byref(_c), ctypes.byref(_s))
cublasCheckStatus(status)
return np.complex64(_a.value), np.float32(_c.value), np.complex64(_s.value)
cublasCrotg.__doc__ = \
_ROTG_doc.substitute(precision='single precision',
real='complex',
type='numpy.complex64',
c_type='numpy.float32',
s_type='numpy.complex64',
a_val='np.complex64(np.random.rand()+1j*np.random.rand())',
b_val='np.complex64(np.random.rand()+1j*np.random.rand())',
func='cublasCrotg')
_libcublas.cublasZrotg_v2.restype = int
_libcublas.cublasZrotg_v2.argtypes = [_types.handle,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasZrotg(handle, a, b):
_a = cuda.cuDoubleComplex(a.real, a.imag)
_b = cuda.cuDoubleComplex(b.real, b.imag)
_c = ctypes.c_double()
_s = cuda.cuDoubleComplex()
status = _libcublas.cublasZrotg_v2(handle,
ctypes.byref(_a), ctypes.byref(_b),
ctypes.byref(_c), ctypes.byref(_s))
cublasCheckStatus(status)
return np.complex128(_a.value), np.float64(_c.value), np.complex128(_s.value)
cublasZrotg.__doc__ = \
_ROTG_doc.substitute(precision='double precision',
real='complex',
type='numpy.complex128',
c_type='numpy.float64',
s_type='numpy.complex128',
a_val='np.complex128(np.random.rand()+1j*np.random.rand())',
b_val='np.complex128(np.random.rand()+1j*np.random.rand())',
func='cublasZrotg')
# SROTM, DROTM (need to add example)
_ROTM_doc = Template(
"""
Apply a ${precision} real modified Givens rotation.
Applies the ${precision} real modified Givens rotation matrix `h`
to the 2 x `n` matrix `[[x.T], [y.T]]`.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vectors.
x : ctypes.c_void_p
Pointer to ${precision} real input/output vector.
incx : int
Storage spacing between elements of `x`.
y : ctypes.c_void_p
Pointer to ${precision} real input/output vector.
incy : int
Storage spacing between elements of `y`.
sparam : numpy.ndarray
sparam[0] contains the `flag` described below;
sparam[1:5] contains the values `[h00, h10, h01, h11]`
that determine the rotation matrix `h`.
Notes
-----
The rotation matrix may assume the following values:
for `flag` == -1.0, `h` == `[[h00, h01], [h10, h11]]`
for `flag` == 0.0, `h` == `[[1.0, h01], [h10, 1.0]]`
for `flag` == 1.0, `h` == `[[h00, 1.0], [-1.0, h11]]`
for `flag` == -2.0, `h` == `[[1.0, 0.0], [0.0, 1.0]]`
Both `x` and `y` must contain `n` elements.
References
----------
`cublas<t>srotm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-rotm>`_
""")
_libcublas.cublasSrotm_v2.restype = int
_libcublas.cublasSrotm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasSrotm(handle, n, x, incx, y, incy, sparam):
status = _libcublas.cublasSrotm_v2(handle,
n, int(x), incx, int(y),
incy, int(sparam.ctypes.data))
cublasCheckStatus(status)
cublasSrotm.__doc__ = \
_ROTM_doc.substitute(precision='single precision')
_libcublas.cublasDrotm_v2.restype = int
_libcublas.cublasDrotm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasDrotm(handle, n, x, incx, y, incy, sparam):
status = _libcublas.cublasDrotm_v2(handle,
n, int(x), incx, int(y),
incy, int(sparam.ctypes.data))
cublasCheckStatus(status)
cublasDrotm.__doc__ = \
_ROTM_doc.substitute(precision='double precision')
# SROTMG, DROTMG (need to add example)
_ROTMG_doc = Template(
"""
Construct a ${precision} real modified Givens rotation matrix.
Constructs the ${precision} real modified Givens rotation matrix
`h = [[h11, h12], [h21, h22]]` that zeros out the second entry of
the vector `[[sqrt(d1)*x1], [sqrt(d2)*x2]]`.
Parameters
----------
handle : int
CUBLAS context.
d1 : ${type}
${precision} real value.
d2 : ${type}
${precision} real value.
x1 : ${type}
${precision} real value.
x2 : ${type}
${precision} real value.
Returns
-------
sparam : numpy.ndarray
sparam[0] contains the `flag` described below;
sparam[1:5] contains the values `[h00, h10, h01, h11]`
that determine the rotation matrix `h`.
Notes
-----
The rotation matrix may assume the following values:
for `flag` == -1.0, `h` == `[[h00, h01], [h10, h11]]`
for `flag` == 0.0, `h` == `[[1.0, h01], [h10, 1.0]]`
for `flag` == 1.0, `h` == `[[h00, 1.0], [-1.0, h11]]`
for `flag` == -2.0, `h` == `[[1.0, 0.0], [0.0, 1.0]]`
References
----------
`cublas<t>rotmg <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-rotmg>`_
""")
_libcublas.cublasSrotmg_v2.restype = int
_libcublas.cublasSrotmg_v2.argtypes = [_types.handle,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasSrotmg(handle, d1, d2, x1, y1):
_d1 = ctypes.c_float(d1)
_d2 = ctypes.c_float(d2)
_x1 = ctypes.c_float(x1)
_y1 = ctypes.c_float(y1)
sparam = np.empty(5, np.float32)
status = _libcublas.cublasSrotmg_v2(handle,
ctypes.byref(_d1), ctypes.byref(_d2),
ctypes.byref(_x1), ctypes.byref(_y1),
int(sparam.ctypes.data))
cublasCheckStatus(status)
return sparam
cublasSrotmg.__doc__ = \
_ROTMG_doc.substitute(precision='single precision',
type='numpy.float32')
_libcublas.cublasDrotmg_v2.restype = int
_libcublas.cublasDrotmg_v2.argtypes = [_types.handle,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p]
[docs]def cublasDrotmg(handle, d1, d2, x1, y1):
_d1 = ctypes.c_double(d1)
_d2 = ctypes.c_double(d2)
_x1 = ctypes.c_double(x1)
_y1 = ctypes.c_double(y1)
sparam = np.empty(5, np.float64)
status = _libcublas.cublasDrotmg_v2(handle,
ctypes.byref(_d1), ctypes.byref(_d2),
ctypes.byref(_x1), ctypes.byref(_y1),
int(sparam.ctypes.data))
cublasCheckStatus(status)
return sparam
cublasDrotmg.__doc__ = \
_ROTMG_doc.substitute(precision='double precision',
type='numpy.float64')
# SSCAL, DSCAL, CSCAL, CSCAL, CSSCAL, ZSCAL, ZDSCAL
_SCAL_doc = Template(
"""
Scale a ${precision} ${real} vector by a ${precision} ${a_real} scalar.
Replaces a ${precision} ${real} vector `x` with
`alpha * x`.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vectors.
alpha : ${a_type}
Scalar multiplier.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input/output vector.
incx : int
Storage spacing between elements of `x`.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> x = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> alpha = ${alpha}
>>> h = cublasCreate()
>>> ${func}(h, x.size, alpha, x_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> np.allclose(x_gpu.get(), alpha*x)
True
References
----------
`cublas<t>scal <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-scal>`_
""")
_libcublas.cublasSscal_v2.restype = int
_libcublas.cublasSscal_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSscal(handle, n, alpha, x, incx):
status = _libcublas.cublasSscal_v2(handle, n,
ctypes.byref(ctypes.c_float(alpha)),
int(x), incx)
cublasCheckStatus(status)
cublasSscal.__doc__ = \
_SCAL_doc.substitute(precision='single precision',
real='real',
a_real='real',
a_type='numpy.float32',
alpha='np.float32(np.random.rand())',
data='np.random.rand(5).astype(np.float32)',
func='cublasSscal')
_libcublas.cublasDscal_v2.restype = int
_libcublas.cublasDscal_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDscal(handle, n, alpha, x, incx):
status = _libcublas.cublasDscal_v2(handle, n,
ctypes.byref(ctypes.c_double(alpha)),
int(x), incx)
cublasCheckStatus(status)
cublasDscal.__doc__ = \
_SCAL_doc.substitute(precision='double precision',
real='real',
a_real='real',
a_type='numpy.float64',
alpha='np.float64(np.random.rand())',
data='np.random.rand(5).astype(np.float64)',
func='cublasDscal')
_libcublas.cublasCscal_v2.restype = int
_libcublas.cublasCscal_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCscal(handle, n, alpha, x, incx):
status = _libcublas.cublasCscal_v2(handle, n,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(x), incx)
cublasCheckStatus(status)
cublasCscal.__doc__ = \
_SCAL_doc.substitute(precision='single precision',
real='complex',
a_real='complex',
a_type='numpy.complex64',
alpha='np.complex64(np.random.rand()+1j*np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasCscal')
_libcublas.cublasCsscal_v2.restype = int
_libcublas.cublasCsscal_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCsscal(handle, n, alpha, x, incx):
status = _libcublas.cublasCsscal_v2(handle, n,
ctypes.byref(ctypes.c_float(alpha)),
int(x), incx)
cublasCheckStatus(status)
cublasCsscal.__doc__ = \
_SCAL_doc.substitute(precision='single precision',
real='complex',
a_real='real',
a_type='numpy.float32',
alpha='np.float32(np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasCsscal')
_libcublas.cublasZscal_v2.restype = int
_libcublas.cublasZscal_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZscal(handle, n, alpha, x, incx):
status = _libcublas.cublasZscal_v2(handle, n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(x), incx)
cublasCheckStatus(status)
cublasZscal.__doc__ = \
_SCAL_doc.substitute(precision='double precision',
real='complex',
a_real='complex',
a_type='numpy.complex128',
alpha='np.complex128(np.random.rand()+1j*np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasZscal')
_libcublas.cublasZdscal_v2.restype = int
_libcublas.cublasZdscal_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZdscal(handle, n, alpha, x, incx):
status = _libcublas.cublasZdscal_v2(handle, n,
ctypes.byref(ctypes.c_double(alpha)),
int(x), incx)
cublasCheckStatus(status)
cublasZdscal.__doc__ = \
_SCAL_doc.substitute(precision='double precision',
real='complex',
a_real='real',
a_type='numpy.float64',
alpha='np.float64(np.random.rand())',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasZdscal')
# SSWAP, DSWAP, CSWAP, ZSWAP
_SWAP_doc = Template(
"""
Swap ${precision} ${real} vectors.
Swaps the contents of one ${precision} ${real} vector with those
of another ${precision} ${real} vector.
Parameters
----------
handle : int
CUBLAS context.
n : int
Number of elements in input vectors.
x : ctypes.c_void_p
Pointer to ${precision} ${real} input/output vector.
incx : int
Storage spacing between elements of `x`.
y : ctypes.c_void_p
Pointer to ${precision} ${real} input/output vector.
incy : int
Storage spacing between elements of `y`.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> x = ${data}
>>> y = ${data}
>>> x_gpu = gpuarray.to_gpu(x)
>>> y_gpu = gpuarray.to_gpu(y)
>>> h = cublasCreate()
>>> ${func}(h, x.size, x_gpu.gpudata, 1, y_gpu.gpudata, 1)
>>> cublasDestroy(h)
>>> np.allclose(x_gpu.get(), y)
True
>>> np.allclose(y_gpu.get(), x)
True
Notes
-----
Both `x` and `y` must contain `n` elements.
References
----------
`cublas<t>swap <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-swap>`_
""")
_libcublas.cublasSswap_v2.restype = int
_libcublas.cublasSswap_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSswap(handle, n, x, incx, y, incy):
status = _libcublas.cublasSswap_v2(handle,
n, int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasSswap.__doc__ = \
_SWAP_doc.substitute(precision='single precision',
real='real',
data='np.random.rand(5).astype(np.float32)',
func='cublasSswap')
_libcublas.cublasDswap_v2.restype = int
_libcublas.cublasDswap_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDswap(handle, n, x, incx, y, incy):
status = _libcublas.cublasDswap_v2(handle,
n, int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasDswap.__doc__ = \
_SWAP_doc.substitute(precision='double precision',
real='real',
data='np.random.rand(5).astype(np.float64)',
func='cublasDswap')
_libcublas.cublasCswap_v2.restype = int
_libcublas.cublasCswap_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCswap(handle, n, x, incx, y, incy):
status = _libcublas.cublasCswap_v2(handle,
n, int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasCswap.__doc__ = \
_SWAP_doc.substitute(precision='single precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex64)',
func='cublasCswap')
_libcublas.cublasZswap_v2.restype = int
_libcublas.cublasZswap_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZswap(handle, n, x, incx, y, incy):
status = _libcublas.cublasZswap_v2(handle,
n, int(x), incx, int(y), incy)
cublasCheckStatus(status)
cublasZswap.__doc__ = \
_SWAP_doc.substitute(precision='double precision',
real='complex',
data='(np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)',
func='cublasZswap')
### BLAS Level 2 Functions ###
# SGBMV, DGVMV, CGBMV, ZGBMV
_libcublas.cublasSgbmv_v2.restype = int
_libcublas.cublasSgbmv_v2.argtypes = [_types.handle,
ctypes.c_char,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSgbmv(handle, trans, m, n, kl, ku, alpha, A, lda,
x, incx, beta, y, incy):
"""
Matrix-vector product for real single precision general banded matrix.
References
----------
`cublas<t>gbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gbmv>`_
"""
trans = trans.encode('ascii')
status = _libcublas.cublasSgbmv_v2(handle,
trans, m, n, kl, ku,
ctypes.byref(ctypes.c_float(alpha)),
int(A), lda,
int(x), incx,
ctypes.byref(ctypes.c_float(beta)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasDgbmv_v2.restype = int
_libcublas.cublasDgbmv_v2.argtypes = [_types.handle,
ctypes.c_char,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDgbmv(handle, trans, m, n, kl, ku, alpha, A, lda,
x, incx, beta, y, incy):
"""
Matrix-vector product for real double precision general banded matrix.
References
----------
`cublas<t>gbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gbmv>`_
"""
trans = trans.encode('ascii')
status = _libcublas.cublasDgbmv_v2(handle,
trans, m, n, kl, ku,
ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(x), incx,
ctypes.byref(ctypes.c_float(beta)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasCgbmv_v2.restype = int
_libcublas.cublasCgbmv_v2.argtypes = [_types.handle,
ctypes.c_char,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCgbmv(handle, trans, m, n, kl, ku, alpha, A, lda,
x, incx, beta, y, incy):
"""
Matrix-vector product for complex single precision general banded matrix.
References
----------
`cublas<t>gbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gbmv>`_
"""
trans = trans.encode('ascii')
status = _libcublas.cublasCgbmv_v2(handle,
trans, m, n, kl, ku,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasZgbmv_v2.restype = int
_libcublas.cublasZgbmv_v2.argtypes = [ctypes.c_char,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZgbmv(handle, trans, m, n, kl, ku, alpha, A, lda,
x, incx, beta, y, incy):
"""
Matrix-vector product for complex double precision general banded matrix.
References
----------
`cublas<t>gbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gbmv>`_
"""
trans = trans.encode('ascii')
status = _libcublas.cublasZgbmv_v2(handle,
trans, m, n, kl, ku,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
# SGEMV, DGEMV, CGEMV, ZGEMV # XXX need to adjust
# _GEMV_doc = Template(
# """
# Matrix-vector product for ${precision} ${real} general matrix.
# Computes the product `alpha*op(A)*x+beta*y`, where `op(A)` == `A`
# or `op(A)` == `A.T`, and stores it in `y`.
# Parameters
# ----------
# trans : char
# If `upper(trans)` in `['T', 'C']`, assume that `A` is
# transposed.
# m : int
# Number of rows in `A`.
# n : int
# Number of columns in `A`.
# alpha : ${a_type}
# `A` is multiplied by this quantity.
# A : ctypes.c_void_p
# Pointer to ${precision} matrix. The matrix has
# shape `(lda, n)` if `upper(trans)` == 'N', `(lda, m)`
# otherwise.
# lda : int
# Leading dimension of `A`.
# X : ctypes.c_void_p
# Pointer to ${precision} array of length at least
# `(1+(n-1)*abs(incx))` if `upper(trans) == 'N',
# `(1+(m+1)*abs(incx))` otherwise.
# incx : int
# Spacing between elements of `x`. Must be nonzero.
# beta : ${a_type}
# `y` is multiplied by this quantity. If zero, `y` is ignored.
# y : ctypes.c_void_p
# Pointer to ${precision} array of length at least
# `(1+(m+1)*abs(incy))` if `upper(trans)` == `N`,
# `(1+(n+1)*abs(incy))` otherwise.
# incy : int
# Spacing between elements of `y`. Must be nonzero.
# Examples
# --------
# >>> import pycuda.autoinit
# >>> import pycuda.gpuarray as gpuarray
# >>> import numpy as np
# >>> a = np.random.rand(2, 3).astype(np.float32)
# >>> x = np.random.rand(3, 1).astype(np.float32)
# >>> a_gpu = gpuarray.to_gpu(a.T.copy())
# >>> x_gpu = gpuarray.to_gpu(x)
# >>> y_gpu = gpuarray.empty((2, 1), np.float32)
# >>> alpha = np.float32(1.0)
# >>> beta = np.float32(0)
# >>> h = cublasCreate()
# >>> ${func}(h, 'n', 2, 3, alpha, a_gpu.gpudata, 2, x_gpu.gpudata, 1, beta, y_gpu.gpudata, 1)
# >>> cublasDestroy(h)
# >>> np.allclose(y_gpu.get(), np.dot(a, x))
# True
# """
_libcublas.cublasSgemv_v2.restype = int
_libcublas.cublasSgemv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for real single precision general matrix.
References
----------
`cublas<t>gemv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemv>`_
"""
status = _libcublas.cublasSgemv_v2(handle,
_CUBLAS_OP[trans], m, n,
ctypes.byref(ctypes.c_float(alpha)), int(A), lda,
int(x), incx,
ctypes.byref(ctypes.c_float(beta)), int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasDgemv_v2.restype = int
_libcublas.cublasDgemv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for real double precision general matrix.
References
----------
`cublas<t>gemv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemv>`_
"""
status = _libcublas.cublasDgemv_v2(handle,
_CUBLAS_OP[trans], m, n,
ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(x), incx,
ctypes.byref(ctypes.c_double(beta)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasCgemv_v2.restype = int
_libcublas.cublasCgemv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for complex single precision general matrix.
References
----------
`cublas<t>gemv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemv>`_
"""
status = _libcublas.cublasCgemv_v2(handle,
_CUBLAS_OP[trans], m, n,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasZgemv_v2.restype = int
_libcublas.cublasZgemv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for complex double precision general matrix.
References
----------
`cublas<t>gemv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemv>`_
"""
status = _libcublas.cublasZgemv_v2(handle,
_CUBLAS_OP[trans], m, n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
# SGER, DGER, CGERU, CGERC, ZGERU, ZGERC
_libcublas.cublasSger_v2.restype = int
_libcublas.cublasSger_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSger(handle, m, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-1 operation on real single precision general matrix.
References
----------
`cublas<t>ger <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-ger>`_
"""
status = _libcublas.cublasSger_v2(handle,
m, n,
ctypes.byref(ctypes.c_float(alpha)),
int(x), incx,
int(y), incy, int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasDger_v2.restype = int
_libcublas.cublasDger_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDger(handle, m, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-1 operation on real double precision general matrix.
References
----------
`cublas<t>ger <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-ger>`_
"""
status = _libcublas.cublasDger_v2(handle,
m, n,
ctypes.byref(ctypes.c_double(alpha)),
int(x), incx,
int(y), incy, int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasCgerc_v2.restype = int
_libcublas.cublasCgerc_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCgerc(handle, m, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-1 operation on complex single precision general matrix.
References
----------
`cublas<t>ger <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-ger>`_
"""
status = _libcublas.cublasCgerc_v2(handle,
m, n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy, int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasCgeru_v2.restype = int
_libcublas.cublasCgeru_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCgeru(handle, m, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-1 operation on complex single precision general matrix.
References
----------
`cublas<t>ger <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-ger>`_
"""
status = _libcublas.cublasCgeru_v2(handle,
m, n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy, int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasZgerc_v2.restype = int
_libcublas.cublasZgerc_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZgerc(handle, m, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-1 operation on complex double precision general matrix.
References
----------
`cublas<t>ger <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-ger>`_
"""
status = _libcublas.cublasZgerc_v2(handle,
m, n, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy, int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasZgeru_v2.restype = int
_libcublas.cublasZgeru_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZgeru(handle, m, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-1 operation on complex double precision general matrix.
References
----------
`cublas<t>ger <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-ger>`_
"""
status = _libcublas.cublasZgeru_v2(handle,
m, n, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy, int(A), lda)
cublasCheckStatus(status)
# SSBMV, DSBMV
_libcublas.cublasSsbmv_v2.restype = int
_libcublas.cublasSsbmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSsbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for real single precision symmetric-banded matrix.
References
----------
`cublas<t>sbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-sbmv>`_
"""
status = _libcublas.cublasSsbmv_v2(handle,
_CUBLAS_FILL_MODE[uplo], n, k,
ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(x), incx,
ctypes.byref(ctypes.c_float(beta)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasDsbmv_v2.restype = int
_libcublas.cublasDsbmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDsbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for real double precision symmetric-banded matrix.
References
----------
`cublas<t>ger <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-ger>`_
"""
status = _libcublas.cublasDsbmv_v2(handle,
_CUBLAS_FILL_MODE[uplo], n, k,
ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(x), incx,
ctypes.byref(ctypes.c_double(beta)),
int(y), incy)
cublasCheckStatus(status)
# SSPMV, DSPMV
_libcublas.cublasSspmv_v2.restype = int
_libcublas.cublasSspmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSspmv(handle, uplo, n, alpha, AP, x, incx, beta, y, incy):
"""
Matrix-vector product for real single precision symmetric packed matrix.
References
----------
`cublas<t>spmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-spmv>`_
"""
status = _libcublas.cublasSspmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n,
ctypes.byref(ctypes.c_float(alpha)),
ctypes.byref(ctypes.c_float(AP)),
int(x),
incx,
ctypes.byref(ctypes.c_float(beta)),
int(y),
incy)
cublasCheckStatus(status)
_libcublas.cublasDspmv_v2.restype = int
_libcublas.cublasDspmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDspmv(handle, uplo, n, alpha, AP, x, incx, beta, y, incy):
"""
Matrix-vector product for real double precision symmetric packed matrix.
References
----------
`cublas<t>spmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-spmv>`_
"""
status = _libcublas.cublasDspmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n,
ctypes.byref(ctypes.c_double(alpha)),
ctypes.byref(ctypes.c_double(AP)),
int(x),
incx,
ctypes.byref(ctypes.c_double(beta)),
int(y),
incy)
cublasCheckStatus(status)
# SSPR, DSPR
_libcublas.cublasSspr_v2.restype = int
_libcublas.cublasSspr_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasSspr(handle, uplo, n, alpha, x, incx, AP):
"""
Rank-1 operation on real single precision symmetric packed matrix.
References
----------
`cublas<t>spr <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-spr>`_
"""
status = _libcublas.cublasSspr_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_float(alpha)),
int(x), incx, int(AP))
cublasCheckStatus(status)
_libcublas.cublasDspr_v2.restype = int
_libcublas.cublasDspr_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasDspr(handle, uplo, n, alpha, x, incx, AP):
"""
Rank-1 operation on real double precision symmetric packed matrix.
References
----------
`cublas<t>spr <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-spr>`_
"""
status = _libcublas.cublasDspr_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_double(alpha)),
int(x), incx, int(AP))
cublasCheckStatus(status)
# SSPR2, DSPR2
_libcublas.cublasSspr2_v2.restype = int
_libcublas.cublasSspr2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasSspr2(handle, uplo, n, alpha, x, incx, y, incy, AP):
"""
Rank-2 operation on real single precision symmetric packed matrix.
References
----------
`cublas<t>spr2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-spr2>`_
"""
status = _libcublas.cublasSspr2_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_float(alpha)),
int(x), incx, int(y), incy, int(AP))
cublasCheckStatus(status)
_libcublas.cublasDspr2_v2.restype = int
_libcublas.cublasDspr2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasDspr2(handle, uplo, n, alpha, x, incx, y, incy, AP):
"""
Rank-2 operation on real double precision symmetric packed matrix.
References
----------
`cublas<t>spr2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-spr2>`_
"""
status = _libcublas.cublasDspr2_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_double(alpha)),
int(x), incx, int(y), incy, int(AP))
cublasCheckStatus(status)
# SSYMV, DSYMV, CSYMV, ZSYMV
_libcublas.cublasSsymv_v2.restype = int
_libcublas.cublasSsymv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSsymv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for real symmetric matrix.
References
----------
`cublas<t>symv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-symv>`_
"""
status = _libcublas.cublasSsymv_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(x), incx,
ctypes.byref(ctypes.c_float(beta)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasDsymv_v2.restype = int
_libcublas.cublasDsymv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDsymv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for real double precision symmetric matrix.
References
----------
`cublas<t>symv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-symv>`_
"""
status = _libcublas.cublasDsymv_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(x), incx,
ctypes.byref(ctypes.c_double(beta)),
int(y), incy)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasCsymv_v2.restype = int
_libcublas.cublasCsymv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasCsymv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for complex single precision symmetric matrix.
References
----------
`cublas<t>symv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-symv>`_
"""
status = _libcublas.cublasCsymv_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasZsymv_v2.restype = int
_libcublas.cublasZsymv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasZsymv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for complex double precision symmetric matrix.
References
----------
`cublas<t>symv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-symv>`_
"""
status = _libcublas.cublasZsymv_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
# SSYR, DSYR, CSYR, ZSYR
_libcublas.cublasSsyr_v2.restype = int
_libcublas.cublasSsyr_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSsyr(handle, uplo, n, alpha, x, incx, A, lda):
"""
Rank-1 operation on real single precision symmetric matrix.
References
----------
`cublas<t>syr <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr>`_
"""
status = _libcublas.cublasSsyr_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_float(alpha)),
int(x), incx, int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasDsyr_v2.restype = int
_libcublas.cublasDsyr_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDsyr(handle, uplo, n, alpha, x, incx, A, lda):
"""
Rank-1 operation on real double precision symmetric matrix.
References
----------
`cublas<t>syr <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr>`_
"""
status = _libcublas.cublasDsyr_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_double(alpha)),
int(x), incx, int(A), lda)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasCsyr_v2.restype = int
_libcublas.cublasCsyr_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasCsyr(handle, uplo, n, alpha, x, incx, A, lda):
"""
Rank-1 operation on complex single precision symmetric matrix.
References
----------
`cublas<t>syr <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr>`_
"""
status = _libcublas.cublasCsyr_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(x), incx, int(A), lda)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasZsyr_v2.restype = int
_libcublas.cublasZsyr_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasZsyr(handle, uplo, n, alpha, x, incx, A, lda):
"""
Rank-1 operation on complex double precision symmetric matrix.
References
----------
`cublas<t>syr <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr>`_
"""
status = _libcublas.cublasZsyr_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(x), incx, int(A), lda)
cublasCheckStatus(status)
# SSYR2, DSYR2, CSYR2, ZSYR2
_libcublas.cublasSsyr2_v2.restype = int
_libcublas.cublasSsyr2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSsyr2(handle, uplo, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-2 operation on real single precision symmetric matrix.
References
----------
`cublas<t>syr2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr2>`_
"""
status = _libcublas.cublasSsyr2_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_float(alpha)),
int(x), incx, int(y), incy,
int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasDsyr2_v2.restype = int
_libcublas.cublasDsyr2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDsyr2(handle, uplo, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-2 operation on real double precision symmetric matrix.
References
----------
`cublas<t>syr2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr2>`_
"""
status = _libcublas.cublasDsyr2_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(ctypes.c_double(alpha)),
int(x), incx, int(y), incy,
int(A), lda)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasCsyr2_v2.restype = int
_libcublas.cublasCsyr2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasCsyr2(handle, uplo, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-2 operation on complex single precision symmetric matrix.
References
----------
`cublas<t>syr2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr2>`_
"""
status = _libcublas.cublasCsyr2_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy,
int(A), lda)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasZsyr2_v2.restype = int
_libcublas.cublasZsyr2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasZsyr2(handle, uplo, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-2 operation on complex double precision symmetric matrix.
References
----------
`cublas<t>syr2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr2>`_
"""
status = _libcublas.cublasZsyr2_v2(handle,
_CUBLAS_FILL_MODE[uplo], n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy,
int(A), lda)
cublasCheckStatus(status)
# STBMV, DTBMV, CTBMV, ZTBMV
_libcublas.cublasStbmv_v2.restype = int
_libcublas.cublasStbmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasStbmv(handle, uplo, trans, diag, n, k, A, lda, x, incx):
"""
Matrix-vector product for real single precision triangular banded matrix.
References
----------
`cublas<t>tbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tbmv>`_
"""
status = _libcublas.cublasStbmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, k, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasDtbmv_v2.restype = int
_libcublas.cublasDtbmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDtbmv(handle, uplo, trans, diag, n, k, A, lda, x, incx):
"""
Matrix-vector product for real double precision triangular banded matrix.
References
----------
`cublas<t>tbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tbmv>`_
"""
status = _libcublas.cublasDtbmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, k, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasCtbmv_v2.restype = int
_libcublas.cublasCtbmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCtbmv(handle, uplo, trans, diag, n, k, A, lda, x, incx):
"""
Matrix-vector product for complex single precision triangular banded matrix.
References
----------
`cublas<t>tbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tbmv>`_
"""
status = _libcublas.cublasCtbmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, k, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasZtbmv_v2.restype = int
_libcublas.cublasZtbmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZtbmv(handle, uplo, trans, diag, n, k, A, lda, x, incx):
"""
Matrix-vector product for complex double triangular banded matrix.
References
----------
`cublas<t>tbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tbmv>`_
"""
status = _libcublas.cublasZtbmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, k, int(A), lda, int(x), incx)
cublasCheckStatus(status)
# STBSV, DTBSV, CTBSV, ZTBSV
_libcublas.cublasStbsv_v2.restype = int
_libcublas.cublasStbsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasStbsv(handle, uplo, trans, diag, n, k, A, lda, x, incx):
"""
Solve real single precision triangular banded system with one right-hand side.
References
----------
`cublas<t>tbsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tbsv>`_
"""
status = _libcublas.cublasStbsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, k, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasDtbsv_v2.restype = int
_libcublas.cublasDtbsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDtbsv(handle, uplo, trans, diag, n, k, A, lda, x, incx):
"""
Solve real double precision triangular banded system with one right-hand side.
References
----------
`cublas<t>tbsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tbsv>`_
"""
status = _libcublas.cublasDtbsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, k, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasCtbsv_v2.restype = int
_libcublas.cublasCtbsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCtbsv(handle, uplo, trans, diag, n, k, A, lda, x, incx):
"""
Solve complex single precision triangular banded system with one right-hand side.
References
----------
`cublas<t>tbsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tbsv>`_
"""
status = _libcublas.cublasCtbsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, k, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasZtbsv_v2.restype = int
_libcublas.cublasZtbsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZtbsv(handle, uplo, trans, diag, n, k, A, lda, x, incx):
"""
Solve complex double precision triangular banded system with one right-hand side.
References
----------
`cublas<t>tbsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tbsv>`_
"""
status = _libcublas.cublasZtbsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, k, int(A), lda, int(x), incx)
cublasCheckStatus(status)
# STPMV, DTPMV, CTPMV, ZTPMV
_libcublas.cublasStpmv_v2.restype = int
_libcublas.cublasStpmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasStpmv(handle, uplo, trans, diag, n, AP, x, incx):
"""
Matrix-vector product for real single precision triangular packed matrix.
References
----------
`cublas<t>tpmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpmv>`_
"""
status = _libcublas.cublasStpmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(AP), int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasCtpmv_v2.restype = int
_libcublas.cublasCtpmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCtpmv(handle, uplo, trans, diag, n, AP, x, incx):
"""
Matrix-vector product for complex single precision triangular packed matrix.
References
----------
`cublas<t>tpmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpmv>`_
"""
status = _libcublas.cublasCtpmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(AP), int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasDtpmv_v2.restype = int
_libcublas.cublasDtpmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDtpmv(handle, uplo, trans, diag, n, AP, x, incx):
"""
Matrix-vector product for real double precision triangular packed matrix.
References
----------
`cublas<t>tpmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpmv>`_
"""
status = _libcublas.cublasDtpmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(AP), int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasZtpmv_v2.restype = int
_libcublas.cublasZtpmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZtpmv(handle, uplo, trans, diag, n, AP, x, incx):
"""
Matrix-vector product for complex double precision triangular packed matrix.
References
----------
`cublas<t>tpmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpmv>`_
"""
status = _libcublas.cublasZtpmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(AP), int(x), incx)
cublasCheckStatus(status)
# STPSV, DTPSV, CTPSV, ZTPSV
_libcublas.cublasStpsv_v2.restype = int
_libcublas.cublasStpsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasStpsv(handle, uplo, trans, diag, n, AP, x, incx):
"""
Solve real triangular packed system with one right-hand side.
References
----------
`cublas<t>tpsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpsv>`_
"""
status = _libcublas.cublasStpsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(AP), int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasDtpsv_v2.restype = int
_libcublas.cublasDtpsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDtpsv(handle, uplo, trans, diag, n, AP, x, incx):
"""
Solve real double precision triangular packed system with one right-hand side.
References
----------
`cublas<t>tpsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpsv>`_
"""
status = _libcublas.cublasDtpsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(AP), int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasCtpsv_v2.restype = int
_libcublas.cublasCtpsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCtpsv(handle, uplo, trans, diag, n, AP, x, incx):
"""
Solve complex single precision triangular packed system with one right-hand side.
References
----------
`cublas<t>tpsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpsv>`_
"""
status = _libcublas.cublasCtpsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(AP), int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasZtpsv_v2.restype = int
_libcublas.cublasZtpsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZtpsv(handle, uplo, trans, diag, n, AP, x, incx):
"""
Solve complex double precision triangular packed system with one right-hand size.
References
----------
`cublas<t>tpsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpsv>`_
"""
status = _libcublas.cublasZtpsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(AP), int(x), incx)
cublasCheckStatus(status)
# STRMV, DTRMV, CTRMV, ZTRMV
_libcublas.cublasStrmv_v2.restype = int
_libcublas.cublasStrmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasStrmv(handle, uplo, trans, diag, n, A, lda, x, inx):
"""
Matrix-vector product for real single precision triangular matrix.
References
----------
`cublas<t>trmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trmv>`_
"""
status = _libcublas.cublasStrmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(A), lda, int(x), inx)
cublasCheckStatus(status)
_libcublas.cublasCtrmv_v2.restype = int
_libcublas.cublasCtrmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCtrmv(handle, uplo, trans, diag, n, A, lda, x, incx):
"""
Matrix-vector product for complex single precision triangular matrix.
References
----------
`cublas<t>trmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trmv>`_
"""
status = _libcublas.cublasCtrmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasDtrmv_v2.restype = int
_libcublas.cublasDtrmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDtrmv(handle, uplo, trans, diag, n, A, lda, x, inx):
"""
Matrix-vector product for real double precision triangular matrix.
References
----------
`cublas<t>trmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trmv>`_
"""
status = _libcublas.cublasDtrmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(A), lda, int(x), inx)
cublasCheckStatus(status)
_libcublas.cublasZtrmv_v2.restype = int
_libcublas.cublasZtrmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZtrmv(handle, uplo, trans, diag, n, A, lda, x, incx):
"""
Matrix-vector product for complex double precision triangular matrix.
References
----------
`cublas<t>trmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trmv>`_
"""
status = _libcublas.cublasZtrmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(A), lda, int(x), incx)
cublasCheckStatus(status)
# STRSV, DTRSV, CTRSV, ZTRSV
_libcublas.cublasStrsv_v2.restype = int
_libcublas.cublasStrsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasStrsv(handle, uplo, trans, diag, n, A, lda, x, incx):
"""
Solve real triangular system with one right-hand side.
References
----------
`cublas<t>trsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsv>`_
"""
status = _libcublas.cublasStrsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasDtrsv_v2.restype = int
_libcublas.cublasDtrsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDtrsv(handle, uplo, trans, diag, n, A, lda, x, incx):
"""
Solve real double precision triangular system with one right-hand side.
References
----------
`cublas<t>trsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsv>`_
"""
status = _libcublas.cublasDtrsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasCtrsv_v2.restype = int
_libcublas.cublasCtrsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCtrsv(handle, uplo, trans, diag, n, A, lda, x, incx):
"""
Solve complex single precision triangular system with one right-hand side.
References
----------
`cublas<t>trsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsv>`_
"""
status = _libcublas.cublasCtrsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(A), lda, int(x), incx)
cublasCheckStatus(status)
_libcublas.cublasZtrsv_v2.restype = int
_libcublas.cublasZtrsv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZtrsv(handle, uplo, trans, diag, n, A, lda, x, incx):
"""
Solve complex double precision triangular system with one right-hand side.
References
----------
`cublas<t>trsv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsv>`_
"""
status = _libcublas.cublasZtrsv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
n, int(A), lda, int(x), incx)
cublasCheckStatus(status)
# CHEMV, ZHEMV
_libcublas.cublasChemv_v2.restype = int
_libcublas.cublasChemv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasChemv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix vector product for single precision Hermitian matrix.
References
----------
`cublas<t>hemv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hemv>`_
"""
status = _libcublas.cublasChemv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasZhemv_v2.restype = int
_libcublas.cublasZhemv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZhemv(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for double precision Hermitian matrix.
References
----------
`cublas<t>hemv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hemv>`_
"""
status = _libcublas.cublasZhemv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
# CHBMV, ZHBMV
_libcublas.cublasChbmv_v2.restype = int
_libcublas.cublasChbmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasChbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for single precision Hermitian banded matrix.
References
----------
`cublas<t>hbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hbmv>`_
"""
status = _libcublas.cublasChbmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, k,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasZhbmv_v2.restype = int
_libcublas.cublasZhbmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZhbmv(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy):
"""
Matrix-vector product for double precision Hermitian banded matrix.
References
----------
`cublas<t>hbmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hbmv>`_
"""
status = _libcublas.cublasZhbmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, k,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(x), incx,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
# CHPMV, ZHPMV
_libcublas.cublasChpmv_v2.restype = int
_libcublas.cublasChpmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasChpmv(handle, uplo, n, alpha, AP, x, incx, beta, y, incy):
"""
Matrix-vector product for single precision Hermitian packed matrix.
References
----------
`cublas<t>hpmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpmv>`_
"""
status = _libcublas.cublasChpmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(AP), int(x), incx,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
_libcublas.cublasZhpmv_v2.restype = int
_libcublas.cublasZhpmv_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZhpmv(handle, uplo, n, alpha, AP, x, incx, beta, y, incy):
"""
Matrix-vector product for double precision Hermitian packed matrix.
References
----------
`cublas<t>hpmv <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-tpmv>`_
"""
status = _libcublas.cublasZhpmv_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(AP), int(x), incx,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(y), incy)
cublasCheckStatus(status)
# CHER, ZHER
_libcublas.cublasCher_v2.restype = int
_libcublas.cublasCher_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCher(handle, uplo, n, alpha, x, incx, A, lda):
"""
Rank-1 operation on single precision Hermitian matrix.
References
----------
`cublas<t>her <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-her>`_
"""
status = _libcublas.cublasCher_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, alpha, int(x), incx, int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasZher_v2.restype = int
_libcublas.cublasZher_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZher(handle, uplo, n, alpha, x, incx, A, lda):
"""
Rank-1 operation on double precision Hermitian matrix.
References
----------
`cublas<t>her <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-her>`_
"""
status = _libcublas.cublasZher_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, alpha, int(x), incx, int(A), lda)
cublasCheckStatus(status)
# CHER2, ZHER2
_libcublas.cublasCher2_v2.restype = int
_libcublas.cublasCher2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCher2(handle, uplo, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-2 operation on single precision Hermitian matrix.
References
----------
`cublas<t>her2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-her2>`_
"""
status = _libcublas.cublasCher2_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy, int(A), lda)
cublasCheckStatus(status)
_libcublas.cublasZher2_v2.restype = int
_libcublas.cublasZher2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZher2(handle, uplo, n, alpha, x, incx, y, incy, A, lda):
"""
Rank-2 operation on double precision Hermitian matrix.
References
----------
`cublas<t>her2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-her2>`_
"""
status = _libcublas.cublasZher2_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy, int(A), lda)
cublasCheckStatus(status)
# CHPR, ZHPR
_libcublas.cublasChpr_v2.restype = int
_libcublas.cublasChpr_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasChpr(handle, uplo, n, alpha, x, incx, AP):
"""
Rank-1 operation on single precision Hermitian packed matrix.
References
----------
`cublas<t>hpr <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hpr>`_
"""
status = _libcublas.cublasChpr_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(ctypes.c_float(alpha)),
int(x), incx, int(AP))
cublasCheckStatus(status)
_libcublas.cublasZhpr_v2.restype = int
_libcublas.cublasZhpr_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasZhpr(handle, uplo, n, alpha, x, incx, AP):
"""
Rank-1 operation on double precision Hermitian packed matrix.
References
----------
`cublas<t>hpr <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hpr>`_
"""
status = _libcublas.cublasZhpr_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(ctypes.c_double(alpha)),
int(x), incx, int(AP))
cublasCheckStatus(status)
# CHPR2, ZHPR2
_libcublas.cublasChpr2.restype = int
_libcublas.cublasChpr2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasChpr2(handle, uplo, n, alpha, x, inx, y, incy, AP):
"""
Rank-2 operation on single precision Hermitian packed matrix.
References
----------
`cublas<t>hpr2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hpr2>`_
"""
status = _libcublas.cublasChpr2_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy, int(AP))
cublasCheckStatus(status)
_libcublas.cublasZhpr2_v2.restype = int
_libcublas.cublasZhpr2_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
[docs]def cublasZhpr2(handle, uplo, n, alpha, x, inx, y, incy, AP):
"""
Rank-2 operation on double precision Hermitian packed matrix.
References
----------
`cublas<t>hpr2 <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hpr2>`_
"""
status = _libcublas.cublasZhpr2_v2(handle,
_CUBLAS_FILL_MODE[uplo],
n, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(x), incx, int(y), incy, int(AP))
cublasCheckStatus(status)
# SGEMM, CGEMM, DGEMM, ZGEMM
_libcublas.cublasSgemm_v2.restype = int
_libcublas.cublasSgemm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for real single precision general matrix.
References
----------
`cublas<t>gemm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm>`_
"""
status = _libcublas.cublasSgemm_v2(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb], m, n, k,
ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(B), ldb,
ctypes.byref(ctypes.c_float(beta)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasCgemm_v2.restype = int
_libcublas.cublasCgemm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for complex single precision general matrix.
References
----------
`cublas<t>gemm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm>`_
"""
status = _libcublas.cublasCgemm_v2(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb], m, n, k,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasDgemm_v2.restype = int
_libcublas.cublasDgemm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for real double precision general matrix.
References
----------
`cublas<t>gemm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm>`_
"""
status = _libcublas.cublasDgemm_v2(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb], m, n, k,
ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(B), ldb,
ctypes.byref(ctypes.c_double(beta)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasZgemm_v2.restype = int
_libcublas.cublasZgemm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for complex double precision general matrix.
References
----------
`cublas<t>gemm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm>`_
"""
status = _libcublas.cublasZgemm_v2(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb], m, n, k,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
# SSYMM, DSYMM, CSYMM, ZSYMM
_libcublas.cublasSsymm_v2.restype = int
_libcublas.cublasSsymm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSsymm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for real single precision symmetric matrix.
References
----------
`cublas<t>symm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-symm>`_
"""
status = _libcublas.cublasSsymm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
m, n, ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(B), ldb,
ctypes.byref(ctypes.c_float(beta)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasDsymm_v2.restype = int
_libcublas.cublasDsymm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDsymm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for real double precision symmetric matrix.
References
----------
`cublas<t>symm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-symm>`_
"""
status = _libcublas.cublasDsymm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
m, n, ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(B), ldb,
ctypes.byref(ctypes.c_double(beta)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasCsymm_v2.restype = int
_libcublas.cublasCsymm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCsymm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for complex single precision symmetric matrix.
References
----------
`cublas<t>symm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-symm>`_
"""
status = _libcublas.cublasCsymm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
m, n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasZsymm_v2.restype = int
_libcublas.cublasZsymm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZsymm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for complex double precision symmetric matrix.
References
----------
`cublas<t>symm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-symm>`_
"""
status = _libcublas.cublasZsymm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo], m, n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
# SSYRK, DSYRK, CSYRK, ZSYRK
_libcublas.cublasSsyrk_v2.restype = int
_libcublas.cublasSsyrk_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSsyrk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc):
"""
Rank-k operation on real single precision symmetric matrix.
References
----------
`cublas<t>syrk <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syrk>`_
"""
status = _libcublas.cublasSsyrk_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(ctypes.c_float(alpha)),
int(A), lda,
ctypes.byref(ctypes.c_float(beta)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasDsyrk_v2.restype = int
_libcublas.cublasDsyrk_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDsyrk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc):
"""
Rank-k operation on real double precision symmetric matrix.
References
----------
`cublas<t>syrk <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syrk>`_
"""
status = _libcublas.cublasDsyrk_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasCsyrk_v2.restype = int
_libcublas.cublasCsyrk_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCsyrk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc):
"""
Rank-k operation on complex single precision symmetric matrix.
References
----------
`cublas<t>syrk <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syrk>`_
"""
status = _libcublas.cublasCsyrk_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasZsyrk_v2.restype = int
_libcublas.cublasZsyrk_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZsyrk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc):
"""
Rank-k operation on complex double precision symmetric matrix.
References
----------
`cublas<t>syrk <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syrk>`_
"""
status = _libcublas.cublasZsyrk_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
# SSYR2K, DSYR2K, CSYR2K, ZSYR2K
_libcublas.cublasSsyr2k_v2.restype = int
_libcublas.cublasSsyr2k_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasSsyr2k(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Rank-2k operation on real single precision symmetric matrix.
References
----------
`cublas<t>syr2k <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr2k>`_
"""
status = _libcublas.cublasSsyr2k_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(B), ldb,
ctypes.byref(ctypes.c_float(beta)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasDsyr2k_v2.restype = int
_libcublas.cublasDsyr2k_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDsyr2k(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Rank-2k operation on real double precision symmetric matrix.
References
----------
`cublas<t>syr2k <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr2k>`_
"""
status = _libcublas.cublasDsyr2k_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(B), ldb,
ctypes.byref(ctypes.c_double(beta)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasCsyr2k_v2.restype = int
_libcublas.cublasCsyr2k_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCsyr2k(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Rank-2k operation on complex single precision symmetric matrix.
References
----------
`cublas<t>syr2k <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr2k>`_
"""
status = _libcublas.cublasCsyr2k_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasZsyr2k_v2.restype = int
_libcublas.cublasZsyr2k_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZsyr2k(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Rank-2k operation on complex double precision symmetric matrix.
References
----------
`cublas<t>syr2k <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-syr2k>`_
"""
status = _libcublas.cublasZsyr2k_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
# STRMM, DTRMM, CTRMM, ZTRMM
_libcublas.cublasStrmm_v2.restype = int
_libcublas.cublasStrmm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasStrmm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc):
"""
Matrix-matrix product for real single precision triangular matrix.
References
----------
`cublas<t>trmm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trmm>`_
"""
status = _libcublas.cublasStrmm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n, ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(B), ldb, int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasDtrmm_v2.restype = int
_libcublas.cublasDtrmm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDtrmm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc):
"""
Matrix-matrix product for real double precision triangular matrix.
References
----------
`cublas<t>trmm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trmm>`_
"""
status = _libcublas.cublasDtrmm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n, ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(B), ldb, int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasCtrmm_v2.restype = int
_libcublas.cublasCtrmm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCtrmm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc):
"""
Matrix-matrix product for complex single precision triangular matrix.
References
----------
`cublas<t>trmm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trmm>`_
"""
status = _libcublas.cublasCtrmm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb)
cublasCheckStatus(status)
_libcublas.cublasZtrmm_v2.restype = int
_libcublas.cublasZtrmm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZtrmm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc):
"""
Matrix-matrix product for complex double precision triangular matrix.
References
----------
`cublas<t>trmm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trmm>`_
"""
status = _libcublas.cublasZtrmm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb, int(C), ldc)
cublasCheckStatus(status)
# STRSM, DTRSM, CTRSM, ZTRSM
_libcublas.cublasStrsm_v2.restype = int
_libcublas.cublasStrsm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasStrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb):
"""
Solve a real single precision triangular system with multiple right-hand sides.
References
----------
`cublas<t>trsm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsm>`_
"""
status = _libcublas.cublasStrsm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n, ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(B), ldb)
cublasCheckStatus(status)
_libcublas.cublasDtrsm_v2.restype = int
_libcublas.cublasDtrsm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasDtrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb):
"""
Solve a real double precision triangular system with multiple right-hand sides.
References
----------
`cublas<t>trsm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsm>`_
"""
status = _libcublas.cublasDtrsm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n, ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(B), ldb)
cublasCheckStatus(status)
_libcublas.cublasCtrsm_v2.restype = int
_libcublas.cublasCtrsm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCtrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb):
"""
Solve a complex single precision triangular system with multiple right-hand sides.
References
----------
`cublas<t>trsm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsm>`_
"""
status = _libcublas.cublasCtrsm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb)
cublasCheckStatus(status)
_libcublas.cublasZtrsm_v2.restype = int
_libcublas.cublasZtrsm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZtrsm(handle, side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb):
"""
Solve complex double precision triangular system with multiple right-hand sides.
References
----------
`cublas<t>trsm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsm>`_
"""
status = _libcublas.cublasZtrsm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb)
cublasCheckStatus(status)
# CHEMM, ZHEMM
_libcublas.cublasChemm_v2.restype = int
_libcublas.cublasChemm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasChemm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for single precision Hermitian matrix.
References
----------
`cublas<t>hemm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hemm>`_
"""
status = _libcublas.cublasChemm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo], m, n,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasZhemm_v2.restype = int
_libcublas.cublasZhemm_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZhemm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Matrix-matrix product for double precision Hermitian matrix.
References
----------
`cublas<t>hemm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-hemm>`_
"""
status = _libcublas.cublasZhemm_v2(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo], m, n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
# CHERK, ZHERK
_libcublas.cublasCherk_v2.restype = int
_libcublas.cublasCherk_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCherk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc):
"""
Rank-k operation on single precision Hermitian matrix.
References
----------
`cublas<t>herk <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-herk>`_
"""
status = _libcublas.cublasCherk_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(ctypes.c_float(alpha)),
int(A), lda,
ctypes.byref(ctypes.c_float(beta)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasZherk_v2.restype = int
_libcublas.cublasZherk_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZherk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc):
"""
Rank-k operation on double precision Hermitian matrix.
References
----------
`cublas<t>herk <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-herk>`_
"""
status = _libcublas.cublasZherk_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(ctypes.c_double(alpha)),
int(A), lda,
ctypes.byref(ctypes.c_double(beta)),
int(C), ldc)
cublasCheckStatus(status)
# CHER2K, ZHER2K
_libcublas.cublasCher2k_v2.restype = int
_libcublas.cublasCher2k_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_float,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasCher2k(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Rank-2k operation on single precision Hermitian matrix.
References
----------
`cublas<t>her2k <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-her2k>`_
"""
status = _libcublas.cublasCher2k_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
_libcublas.cublasZher2k_v2.restype = int
_libcublas.cublasZher2k_v2.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]def cublasZher2k(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc):
"""
Rank-2k operation on double precision Hermitian matrix.
References
----------
`cublas<t>her2k <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-her2k>`_
"""
status = _libcublas.cublasZher2k_v2(handle,
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
n, k, ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(C), ldc)
cublasCheckStatus(status)
### BLAS-like extension routines ###
# SGEAM, DGEAM, CGEAM, ZGEAM
_GEAM_doc = Template(
"""
Matrix-matrix addition/transposition (${precision} ${real}).
Computes the sum of two ${precision} ${real} scaled and possibly (conjugate)
transposed matrices.
Parameters
----------
handle : int
CUBLAS context
transa, transb : char
't' if they are transposed, 'c' if they are conjugate transposed,
'n' if otherwise.
m : int
Number of rows in `A` and `C`.
n : int
Number of columns in `B` and `C`.
alpha : ${num_type}
Constant by which to scale `A`.
A : ctypes.c_void_p
Pointer to first matrix operand (`A`).
lda : int
Leading dimension of `A`.
beta : ${num_type}
Constant by which to scale `B`.
B : ctypes.c_void_p
Pointer to second matrix operand (`B`).
ldb : int
Leading dimension of `A`.
C : ctypes.c_void_p
Pointer to result matrix (`C`).
ldc : int
Leading dimension of `C`.
Examples
--------
>>> import pycuda.autoinit
>>> import pycuda.gpuarray as gpuarray
>>> import numpy as np
>>> alpha = ${alpha_data}
>>> beta = ${beta_data}
>>> a = ${a_data_1}
>>> b = ${b_data_1}
>>> c = ${c_data_1}
>>> a_gpu = gpuarray.to_gpu(a)
>>> b_gpu = gpuarray.to_gpu(b)
>>> c_gpu = gpuarray.empty(c.shape, c.dtype)
>>> h = cublasCreate()
>>> ${func}(h, 'n', 'n', c.shape[0], c.shape[1], alpha, a_gpu.gpudata, a.shape[0], beta, b_gpu.gpudata, b.shape[0], c_gpu.gpudata, c.shape[0])
>>> np.allclose(c_gpu.get(), c)
True
>>> a = ${a_data_2}
>>> b = ${b_data_2}
>>> c = ${c_data_2}
>>> a_gpu = gpuarray.to_gpu(a.T.copy())
>>> b_gpu = gpuarray.to_gpu(b.T.copy())
>>> c_gpu = gpuarray.empty(c.T.shape, c.dtype)
>>> transa = 'c' if np.iscomplexobj(a) else 't'
>>> ${func}(h, transa, 'n', c.shape[0], c.shape[1], alpha, a_gpu.gpudata, a.shape[0], beta, b_gpu.gpudata, b.shape[0], c_gpu.gpudata, c.shape[0])
>>> np.allclose(c_gpu.get().T, c)
True
>>> cublasDestroy(h)
References
----------
`cublas<t>geam <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-geam>`_
""")
if _cublas_version >= 5000:
_libcublas.cublasSgeam.restype = int
_libcublas.cublasSgeam.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasSgeam(handle, transa, transb,
m, n, alpha, A, lda, beta, B, ldb, C, ldc):
"""
Real matrix-matrix addition/transposition.
"""
status = _libcublas.cublasSgeam(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb],
m, n, ctypes.byref(ctypes.c_float(alpha)),
int(A), lda,
ctypes.byref(ctypes.c_float(beta)),
int(B), ldb,
int(C), ldc)
cublasCheckStatus(status)
cublasSgeam.__doc__ = _GEAM_doc.substitute(precision='single precision',
real='real',
num_type='numpy.float32',
alpha_data='np.float32(np.random.rand())',
beta_data='np.float32(np.random.rand())',
a_data_1='np.random.rand(2, 3).astype(np.float32)',
b_data_1='np.random.rand(2, 3).astype(np.float32)',
a_data_2='np.random.rand(2, 3).astype(np.float32)',
b_data_2='np.random.rand(3, 2).astype(np.float32)',
c_data_1='alpha*a+beta*b',
c_data_2='alpha*a.T+beta*b',
func='cublasSgeam')
if _cublas_version >= 5000:
_libcublas.cublasDgeam.restype = int
_libcublas.cublasDgeam.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasDgeam(handle, transa, transb,
m, n, alpha, A, lda, beta, B, ldb, C, ldc):
"""
Real matrix-matrix addition/transposition.
"""
status = _libcublas.cublasDgeam(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb],
m, n, ctypes.byref(ctypes.c_double(alpha)),
int(A), lda,
ctypes.byref(ctypes.c_double(beta)),
int(B), ldb,
int(C), ldc)
cublasCheckStatus(status)
cublasDgeam.__doc__ = _GEAM_doc.substitute(precision='double precision',
real='real',
num_type='numpy.float64',
alpha_data='np.float64(np.random.rand())',
beta_data='np.float64(np.random.rand())',
a_data_1='np.random.rand(2, 3).astype(np.float64)',
b_data_1='np.random.rand(2, 3).astype(np.float64)',
a_data_2='np.random.rand(2, 3).astype(np.float64)',
b_data_2='np.random.rand(3, 2).astype(np.float64)',
c_data_1='alpha*a+beta*b',
c_data_2='alpha*a.T+beta*b',
func='cublasDgeam')
if _cublas_version >= 5000:
_libcublas.cublasCgeam.restype = int
_libcublas.cublasCgeam.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasCgeam(handle, transa, transb,
m, n, alpha, A, lda, beta, B, ldb, C, ldc):
"""
Complex matrix-matrix addition/transposition.
"""
status = _libcublas.cublasCgeam(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb],
m, n,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(B), ldb,
int(C), ldc)
cublasCheckStatus(status)
cublasCgeam.__doc__ = _GEAM_doc.substitute(precision='single precision',
real='complex',
num_type='numpy.complex64',
alpha_data='np.complex64(np.random.rand()+1j*np.random.rand())',
beta_data='np.complex64(np.random.rand()+1j*np.random.rand())',
a_data_1='(np.random.rand(2, 3)+1j*np.random.rand(2, 3)).astype(np.complex64)',
a_data_2='(np.random.rand(2, 3)+1j*np.random.rand(2, 3)).astype(np.complex64)',
b_data_1='(np.random.rand(2, 3)+1j*np.random.rand(2, 3)).astype(np.complex64)',
b_data_2='(np.random.rand(3, 2)+1j*np.random.rand(3, 2)).astype(np.complex64)',
c_data_1='alpha*a+beta*b',
c_data_2='alpha*np.conj(a).T+beta*b',
func='cublasCgeam')
if _cublas_version >= 5000:
_libcublas.cublasZgeam.restype = int
_libcublas.cublasZgeam.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasZgeam(handle, transa, transb,
m, n, alpha, A, lda, beta, B, ldb, C, ldc):
"""
Complex matrix-matrix addition/transposition.
"""
status = _libcublas.cublasZgeam(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb],
m, n,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(B), ldb,
int(C), ldc)
cublasCheckStatus(status)
cublasZgeam.__doc__ = _GEAM_doc.substitute(precision='double precision',
real='complex',
num_type='numpy.complex128',
alpha_data='np.complex128(np.random.rand()+1j*np.random.rand())',
beta_data='np.complex128(np.random.rand()+1j*np.random.rand())',
a_data_1='(np.random.rand(2, 3)+1j*np.random.rand(2, 3)).astype(np.complex128)',
a_data_2='(np.random.rand(2, 3)+1j*np.random.rand(2, 3)).astype(np.complex128)',
b_data_1='(np.random.rand(2, 3)+1j*np.random.rand(2, 3)).astype(np.complex128)',
b_data_2='(np.random.rand(3, 2)+1j*np.random.rand(3, 2)).astype(np.complex128)',
c_data_1='alpha*a+beta*b',
c_data_2='alpha*np.conj(a).T+beta*b',
func='cublasZgeam')
### Batched routines ###
# SgemmBatched, DgemmBatched
if _cublas_version >= 5000:
_libcublas.cublasSgemmBatched.restype = int
_libcublas.cublasSgemmBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasSgemmBatched(handle, transa, transb, m, n, k,
alpha, A, lda, B, ldb, beta, C, ldc, batchCount):
"""
Matrix-matrix product for arrays of real single precision general matrices.
References
----------
`cublas<t>gemmBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemmbatched>`_
"""
status = _libcublas.cublasSgemmBatched(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb], m, n, k,
ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(B), ldb,
ctypes.byref(ctypes.c_float(beta)),
int(C), ldc, batchCount)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasDgemmBatched.restype = int
_libcublas.cublasDgemmBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasDgemmBatched(handle, transa, transb, m, n, k,
alpha, A, lda, B, ldb, beta, C, ldc, batchCount):
"""
Matrix-matrix product for arrays of real double precision general matrices.
References
----------
`cublas<t>gemmBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemmbatched>`_
"""
status = _libcublas.cublasDgemmBatched(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb], m, n, k,
ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(B), ldb,
ctypes.byref(ctypes.c_double(beta)),
int(C), ldc, batchCount)
cublasCheckStatus(status)
# CgemmBatched, ZgemmBatched
if _cublas_version >= 5000:
_libcublas.cublasCgemmBatched.restype = int
_libcublas.cublasCgemmBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasCgemmBatched(handle, transa, transb, m, n, k,
alpha, A, lda, B, ldb, beta, C, ldc, batchCount):
"""
Matrix-matrix product for arrays of complex single precision general matrices.
References
----------
`cublas<t>gemmBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemmbatched>`_
"""
status = _libcublas.cublasCgemmBatched(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb], m, n, k,
ctypes.byref(cuda.cuFloatComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuFloatComplex(beta.real,
beta.imag)),
int(C), ldc, batchCount)
if _cublas_version >= 5000:
_libcublas.cublasZgemmBatched.restype = int
_libcublas.cublasZgemmBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasZgemmBatched(handle, transa, transb, m, n, k,
alpha, A, lda, B, ldb, beta, C, ldc, batchCount):
"""
Matrix-matrix product for arrays of complex double precision general matrices.
References
----------
`cublas<t>gemmBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemmbatched>`_
"""
status = _libcublas.cublasZgemmBatched(handle,
_CUBLAS_OP[transa],
_CUBLAS_OP[transb], m, n, k,
ctypes.byref(cuda.cuDoubleComplex(alpha.real,
alpha.imag)),
int(A), lda, int(B), ldb,
ctypes.byref(cuda.cuDoubleComplex(beta.real,
beta.imag)),
int(C), ldc, batchCount)
# StrsmBatched, DtrsmBatched
if _cublas_version >= 5000:
_libcublas.cublasStrsmBatched.restype = int
_libcublas.cublasStrsmBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasStrsmBatched(handle, side, uplo, trans, diag, m, n, alpha,
A, lda, B, ldb, batchCount):
"""
This function solves an array of triangular linear systems with multiple right-hand-sides.
References
----------
`cublas<t>trsmBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsmbatched>`_
"""
status = _libcublas.cublasStrsmBatched(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n,
ctypes.byref(ctypes.c_float(alpha)),
int(A), lda, int(B), ldb,
batchCount)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasDtrsmBatched.restype = int
_libcublas.cublasDtrsmBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasDtrsmBatched(handle, side, uplo, trans, diag, m, n, alpha,
A, lda, B, ldb, batchCount):
"""
This function solves an array of triangular linear systems with multiple right-hand-sides.
References
----------
`cublas<t>trsmBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-trsmbatched>`_
"""
status = _libcublas.cublasDtrsmBatched(handle,
_CUBLAS_SIDE_MODE[side],
_CUBLAS_FILL_MODE[uplo],
_CUBLAS_OP[trans],
_CUBLAS_DIAG[diag],
m, n,
ctypes.byref(ctypes.c_double(alpha)),
int(A), lda, int(B), ldb,
batchCount)
cublasCheckStatus(status)
# SgetrfBatched, DgetrfBatched,CgetrfBatched, ZgetrfBatched
if _cublas_version >= 5000:
_libcublas.cublasSgetrfBatched.restype = int
_libcublas.cublasSgetrfBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasSgetrfBatched(handle, n, A, lda, P, info, batchSize):
"""
This function performs the LU factorization of an array of n x n matrices.
References
----------
`cublas<t>getrfBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getrfbatched>`_
"""
status = _libcublas.cublasSgetrfBatched(handle, n,
int(A), lda, int(P),
int(info), batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasDgetrfBatched.restype = int
_libcublas.cublasDgetrfBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasDgetrfBatched(handle, n, A, lda, P, info, batchSize):
"""
This function performs the LU factorization of an array of n x n matrices.
References
----------
`cublas<t>getrfBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getrfbatched>`_
"""
status = _libcublas.cublasDgetrfBatched(handle, n,
int(A), lda, int(P),
int(info), batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasCgetrfBatched.restype = int
_libcublas.cublasCgetrfBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasCgetrfBatched(handle, n, A, lda, P, info, batchSize):
"""
This function performs the LU factorization of an array of n x n matrices.
References
----------
`cublas<t>getrfBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getrfbatched>`_
"""
status = _libcublas.cublasCgetrfBatched(handle, n,
int(A), lda, int(P),
int(info), batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasZgetrfBatched.restype = int
_libcublas.cublasZgetrfBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasZgetrfBatched(handle, n, A, lda, P, info, batchSize):
"""
This function performs the LU factorization of an array of n x n matrices.
References
----------
`cublas<t>getrfBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getrfbatched>`_
"""
status = _libcublas.cublasZgetrfBatched(handle, n,
int(A), lda, int(P),
int(info), batchSize)
cublasCheckStatus(status)
# SgetrsBatched, DgetrsBatched, CgetrsBatched, ZgetrsBatched
if _cublas_version >= 5000:
_libcublas.cublasSgetrsBatched.restype = int
_libcublas.cublasSgetrsBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasSgetrsBatched(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray,
ldb, info, batchSize):
"""
This function solves an array of LU factored linear systems.
References
----------
`cublas<t>getrsBatched <https://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getrsbatched>`_
"""
status = _libcublas.cublasSgetrsBatched(handle, _CUBLAS_OP[trans], n, nrhs,
int(Aarray), lda, int(devIpiv),
int(Barray), ldb, info, batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasDgetrsBatched.restype = int
_libcublas.cublasDgetrsBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasDgetrsBatched(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray,
ldb, info, batchSize):
"""
This function solves an array of LU factored linear systems.
References
----------
`cublas<t>getrsBatched <https://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getrsbatched>`_
"""
status = _libcublas.cublasDgetrsBatched(handle, _CUBLAS_OP[trans], n, nrhs,
int(Aarray), lda, int(devIpiv),
int(Barray), ldb, info, batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasCgetrsBatched.restype = int
_libcublas.cublasCgetrsBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasCgetrsBatched(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray,
ldb, info, batchSize):
"""
This function solves an array of LU factored linear systems.
References
----------
`cublas<t>getrsBatched <https://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getrsbatched>`_
"""
status = _libcublas.cublasCgetrsBatched(handle, _CUBLAS_OP[trans], n, nrhs,
int(Aarray), lda, int(devIpiv),
int(Barray), ldb, info, batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasZgetrsBatched.restype = int
_libcublas.cublasZgetrsBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.0)
def cublasZgetrsBatched(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray,
ldb, info, batchSize):
"""
This function solves an array of LU factored linear systems.
References
----------
`cublas<t>getrsBatched <https://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getrsbatched>`_
"""
status = _libcublas.cublasZgetrsBatched(handle, _CUBLAS_OP[trans], n, nrhs,
int(Aarray), lda, int(devIpiv),
int(Barray), ldb, info, batchSize)
cublasCheckStatus(status)
# SgetriBatched, DgetriBatched, CgetriBatched, ZgetriBatched
if _cublas_version >= 5050:
_libcublas.cublasSgetriBatched.restype = int
_libcublas.cublasSgetriBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.5)
def cublasSgetriBatched(handle, n, A, lda, P, C, ldc, info, batchSize):
"""
This function performs the inversion of an array of n x n matrices.
Notes
-----
The matrices must be factorized first using cublasSgetrfBatched.
References
----------
`cublas<t>getriBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getribatched>`_
"""
status = _libcublas.cublasSgetriBatched(handle, n,
int(A), lda, int(P),
int(C), ldc, int(info),
batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5050:
_libcublas.cublasDgetriBatched.restype = int
_libcublas.cublasDgetriBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.5)
def cublasDgetriBatched(handle, n, A, lda, P, C, ldc, info, batchSize):
"""
This function performs the inversion of an array of n x n matrices.
Notes
-----
The matrices must be factorized first using cublasDgetrfBatched.
References
----------
`cublas<t>getriBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getribatched>`_
"""
status = _libcublas.cublasDgetriBatched(handle, n,
int(A), lda, int(P),
int(C), ldc, int(info),
batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5050:
_libcublas.cublasCgetriBatched.restype = int
_libcublas.cublasCgetriBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.5)
def cublasCgetriBatched(handle, n, A, lda, P, C, ldc, info, batchSize):
"""
This function performs the inversion of an array of n x n matrices.
Notes
-----
The matrices must be factorized first using cublasCgetrfBatched.
References
----------
`cublas<t>getriBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getribatched>`_
"""
status = _libcublas.cublasCgetriBatched(handle, n,
int(A), lda, int(P),
int(C), ldc, int(info),
batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5050:
_libcublas.cublasZgetriBatched.restype = int
_libcublas.cublasZgetriBatched.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
@_cublas_version_req(5.5)
def cublasZgetriBatched(handle, n, A, lda, P, C, ldc, info, batchSize):
"""
This function performs the inversion of an array of n x n matrices.
Notes
-----
The matrices must be factorized first using cublasDgetrfBatched.
References
----------
`cublas<t>getriBatched <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-getribatched>`_
"""
status = _libcublas.cublasZgetriBatched(handle, n,
int(A), lda, int(P),
int(C), ldc, int(info),
batchSize)
cublasCheckStatus(status)
if _cublas_version >= 5000:
_libcublas.cublasSdgmm.restype = \
_libcublas.cublasDdgmm.restype = \
_libcublas.cublasCdgmm.restype = \
_libcublas.cublasZdgmm.restype = int
_libcublas.cublasSdgmm.argtypes = \
_libcublas.cublasDdgmm.argtypes = \
_libcublas.cublasCdgmm.argtypes = \
_libcublas.cublasZdgmm.argtypes = [_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int]
[docs]@_cublas_version_req(5.0)
def cublasSdgmm(handle, side, m, n, A, lda, x, incx, C, ldc):
"""
Multiplies a matrix with a diagonal matrix.
References
----------
`cublas<t>dgmm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-dgmm>`_
"""
status = _libcublas.cublasSdgmm(handle, _CUBLAS_SIDE_MODE[side], m, n,
int(A), lda, int(x), incx, int(C), ldc)
cublasCheckStatus(status)
[docs]@_cublas_version_req(5.0)
def cublasDdgmm(handle, side, m, n, A, lda, x, incx, C, ldc):
"""
Multiplies a matrix with a diagonal matrix.
References
----------
`cublas<t>dgmm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-dgmm>`_
"""
status = _libcublas.cublasDdgmm(handle, _CUBLAS_SIDE_MODE[side], m, n,
int(A), lda, int(x), incx, int(C), ldc)
cublasCheckStatus(status)
[docs]@_cublas_version_req(5.0)
def cublasCdgmm(handle, side, m, n, A, lda, x, incx, C, ldc):
"""
Multiplies a matrix with a diagonal matrix.
References
----------
`cublas<t>dgmm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-dgmm>`_
"""
status = _libcublas.cublasCdgmm(handle, _CUBLAS_SIDE_MODE[side], m, n,
int(A), lda, int(x), incx, int(C), ldc)
cublasCheckStatus(status)
[docs]@_cublas_version_req(5.0)
def cublasZdgmm(handle, side, m, n, A, lda, x, incx, C, ldc):
"""
Multiplies a matrix with a diagonal matrix.
References
----------
`cublas<t>dgmm <http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-dgmm>`_
"""
status = _libcublas.cublasZdgmm(handle, _CUBLAS_SIDE_MODE[side], m, n,
int(A), lda, int(x), incx, int(C), ldc)
cublasCheckStatus(status)
if __name__ == "__main__":
import doctest
doctest.testmod()