39 int dbdiv_cuda(
const int m,
const int n,
const int k,
const double *A,
double *W,
double *H,
const double beta,
const int uType,
const int nIter);
40 int sbdiv_cuda(
const int m,
const int n,
const int k,
const float *A,
float *W,
float *H,
const float beta,
const int uType,
const int nIter);
44 int dbdivg_cuda(
const int m,
const int n,
const int k,
const double *A,
double *W,
double *H,
const double beta,
const int uType,
const int nIter);
45 int sbdivg_cuda(
const int m,
const int n,
const int k,
const float *A,
float *W,
float *H,
const float beta,
const int uType,
const int nIter);
48 int dbdivone_cuda(
const int m,
const int n,
const int k,
const double *A,
double *W,
double *H,
const int uType,
const int nIter);
49 int sbdivone_cuda(
const int m,
const int n,
const int k,
const float *A,
float *W,
float *H,
const int uType,
const int nIter);
53 void dkernelH_cuda(
const int m,
const int n,
const double *L,
const double *A,
double *R,
const double expo, cudaStream_t stream);
54 void skernelH_cuda(
const int m,
const int n,
const float *L,
const float *A,
float *R,
const float expo, cudaStream_t stream);
56 void dkernelW_cuda(
const int m,
const int n,
const double *L,
const double *A,
double *R,
const double expo, cudaStream_t stream);
57 void skernelW_cuda(
const int m,
const int n,
const float *L,
const float *A,
float *R,
const float expo, cudaStream_t stream);
59 void dupdate1H_cuda(
const int n,
const double *X,
double *H, cudaStream_t stream);
60 void supdate1H_cuda(
const int n,
const float *X,
float *H, cudaStream_t stream);
62 void dupdate2H_cuda(
const int m,
const int n,
const double *X,
const double *B,
double *H, cudaStream_t stream);
63 void supdate2H_cuda(
const int m,
const int n,
const float *X,
const float *B,
float *H, cudaStream_t stream);
65 void dupdate1W_cuda(
const int m,
const int n,
const double *X,
double *W, cudaStream_t stream);
66 void supdate1W_cuda(
const int m,
const int n,
const float *X,
float *W, cudaStream_t stream);
68 void dupdate2W_cuda(
const int m,
const int n,
const double *X,
const double *B,
double *W, cudaStream_t stream);
69 void supdate2W_cuda(
const int m,
const int n,
const float *X,
const float *B,
float *W, cudaStream_t stream);
72 __global__
void vdkernelH_cuda(
const int m,
const int n,
const double* __restrict__ L,
const double* __restrict__ A,
double *R,
const double expo);
73 __global__
void vskernelH_cuda(
const int m,
const int n,
const float* __restrict__ L,
const float* __restrict__ A,
float *R,
const float expo);
75 __global__
void vdkernelW_cuda(
const int m,
const int n,
const double* __restrict__ L,
const double* __restrict__ A,
double *R,
const double expo);
76 __global__
void vskernelW_cuda(
const int m,
const int n,
const float* __restrict__ L,
const float* __restrict__ A,
float *R,
const float expo);
78 __global__
void vdupdate1H_cuda(
const int n,
const double* __restrict__ X,
double *H);
79 __global__
void vsupdate1H_cuda(
const int n,
const float* __restrict__ X,
float *H);
81 __global__
void vdupdate1W_cuda(
const int m,
const int n,
const double* __restrict__ X,
double *W);
82 __global__
void vsupdate1W_cuda(
const int m,
const int n,
const float* __restrict__ X,
float *W);
84 __global__
void vdupdate2H_cuda(
const int m,
const int n,
const double* __restrict__ X,
const double* __restrict__ B,
double *H);
85 __global__
void vsupdate2H_cuda(
const int m,
const int n,
const float* __restrict__ X,
const float* __restrict__ B,
float *H);
87 __global__
void vdupdate2W_cuda(
const int m,
const int k,
const double* __restrict__ X,
const double* __restrict__ B,
double *W);
88 __global__
void vsupdate2W_cuda(
const int m,
const int k,
const float* __restrict__ X,
const float* __restrict__ B,
float *W);
Header file for using the mlsa algorithm using cuda functions with GPUs.
Header file for using utility modules from CUDA source codes.
__global__ void vdupdate2H_cuda(const int m, const int n, const double *__restrict__ X, const double *__restrict__ B, double *H)
void dkernelW_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream)
int sbdivg_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const float beta, const int uType, const int nIter)
void supdate1H_cuda(const int n, const float *X, float *H, cudaStream_t stream)
void dupdate1W_cuda(const int m, const int n, const double *X, double *W, cudaStream_t stream)
__global__ void vsupdate2H_cuda(const int m, const int n, const float *__restrict__ X, const float *__restrict__ B, float *H)
__global__ void vsupdate1H_cuda(const int n, const float *__restrict__ X, float *H)
int sbdivone_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const int uType, const int nIter)
__global__ void vsupdate1W_cuda(const int m, const int n, const float *__restrict__ X, float *W)
__global__ void vdkernelW_cuda(const int m, const int n, const double *__restrict__ L, const double *__restrict__ A, double *R, const double expo)
void supdate1W_cuda(const int m, const int n, const float *X, float *W, cudaStream_t stream)
void dupdate2W_cuda(const int m, const int n, const double *X, const double *B, double *W, cudaStream_t stream)
__global__ void vdupdate1W_cuda(const int m, const int n, const double *__restrict__ X, double *W)
void supdate2W_cuda(const int m, const int n, const float *X, const float *B, float *W, cudaStream_t stream)
void skernelH_cuda(const int m, const int n, const float *L, const float *A, float *R, const float expo, cudaStream_t stream)
void dupdate1H_cuda(const int n, const double *X, double *H, cudaStream_t stream)
__global__ void vskernelH_cuda(const int m, const int n, const float *__restrict__ L, const float *__restrict__ A, float *R, const float expo)
int dbdiv_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter)
__global__ void vdkernelH_cuda(const int m, const int n, const double *__restrict__ L, const double *__restrict__ A, double *R, const double expo)
int sbdiv_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const float beta, const int uType, const int nIter)
void skernelW_cuda(const int m, const int n, const float *L, const float *A, float *R, const float expo, cudaStream_t stream)
void dkernelH_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream)
__global__ void vdupdate2W_cuda(const int m, const int k, const double *__restrict__ X, const double *__restrict__ B, double *W)
__global__ void vsupdate2W_cuda(const int m, const int k, const float *__restrict__ X, const float *__restrict__ B, float *W)
__global__ void vskernelW_cuda(const int m, const int n, const float *__restrict__ L, const float *__restrict__ A, float *R, const float expo)
int dbdivg_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter)
int dbdivone_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const int uType, const int nIter)
void dupdate2H_cuda(const int m, const int n, const double *X, const double *B, double *H, cudaStream_t stream)
void supdate2H_cuda(const int m, const int n, const float *X, const float *B, float *H, cudaStream_t stream)
__global__ void vdupdate1H_cuda(const int n, const double *__restrict__ X, double *H)