NnmfPack  2.1
bdiv_cuda.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2014 by PIR (University of Oviedo) and *
3  * INCO2 (Polytechnic University of Valencia) groups. *
4  * nnmfpack@gmail.com *
5  * *
6  * This program is free software; you can redistribute it and/or modify *
7  * it under the terms of the GNU General Public License as published by *
8  * the Free Software Foundation; either version 2 of the License, or *
9  * (at your option) any later version. *
10  * *
11  * This program is distributed in the hope that it will be useful, *
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14  * GNU General Public License for more details. *
15  * *
16  * You should have received a copy of the GNU General Public License *
17  * along with this program; if not, write to the *
18  * Free Software Foundation, Inc., *
19  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
20  ***************************************************************************
21 */
32 #ifndef BDIV_CUDA_H
33 #define BDIV_CUDA_H
34 
35 #include "utils_cuda.h"
36 #include "mlsa_cuda.h"
37 
38 /* wrappers */
39 int dbdiv_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter);
40 int sbdiv_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const float beta, const int uType, const int nIter);
41 
42 /* inner functions */
43 /* general cases */
44 int dbdivg_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter);
45 int sbdivg_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const float beta, const int uType, const int nIter);
46 
47 /* beta=1 */
48 int dbdivone_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const int uType, const int nIter);
49 int sbdivone_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const int uType, const int nIter);
50 
51 
52 /* support functions */
53 void dkernelH_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream);
54 void skernelH_cuda(const int m, const int n, const float *L, const float *A, float *R, const float expo, cudaStream_t stream);
55 
56 void dkernelW_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream);
57 void skernelW_cuda(const int m, const int n, const float *L, const float *A, float *R, const float expo, cudaStream_t stream);
58 
59 void dupdate1H_cuda(const int n, const double *X, double *H, cudaStream_t stream);
60 void supdate1H_cuda(const int n, const float *X, float *H, cudaStream_t stream);
61 
62 void dupdate2H_cuda(const int m, const int n, const double *X, const double *B, double *H, cudaStream_t stream);
63 void supdate2H_cuda(const int m, const int n, const float *X, const float *B, float *H, cudaStream_t stream);
64 
65 void dupdate1W_cuda(const int m, const int n, const double *X, double *W, cudaStream_t stream);
66 void supdate1W_cuda(const int m, const int n, const float *X, float *W, cudaStream_t stream);
67 
68 void dupdate2W_cuda(const int m, const int n, const double *X, const double *B, double *W, cudaStream_t stream);
69 void supdate2W_cuda(const int m, const int n, const float *X, const float *B, float *W, cudaStream_t stream);
70 
71 /*Kernels*/
72 __global__ void vdkernelH_cuda(const int m, const int n, const double* __restrict__ L, const double* __restrict__ A, double *R, const double expo);
73 __global__ void vskernelH_cuda(const int m, const int n, const float* __restrict__ L, const float* __restrict__ A, float *R, const float expo);
74 
75 __global__ void vdkernelW_cuda(const int m, const int n, const double* __restrict__ L, const double* __restrict__ A, double *R, const double expo);
76 __global__ void vskernelW_cuda(const int m, const int n, const float* __restrict__ L, const float* __restrict__ A, float *R, const float expo);
77 
78 __global__ void vdupdate1H_cuda(const int n, const double* __restrict__ X, double *H);
79 __global__ void vsupdate1H_cuda(const int n, const float* __restrict__ X, float *H);
80 
81 __global__ void vdupdate1W_cuda(const int m, const int n, const double* __restrict__ X, double *W);
82 __global__ void vsupdate1W_cuda(const int m, const int n, const float* __restrict__ X, float *W);
83 
84 __global__ void vdupdate2H_cuda(const int m, const int n, const double* __restrict__ X, const double* __restrict__ B, double *H);
85 __global__ void vsupdate2H_cuda(const int m, const int n, const float* __restrict__ X, const float* __restrict__ B, float *H);
86 
87 __global__ void vdupdate2W_cuda(const int m, const int k, const double* __restrict__ X, const double* __restrict__ B, double *W);
88 __global__ void vsupdate2W_cuda(const int m, const int k, const float* __restrict__ X, const float* __restrict__ B, float *W);
89 
90 #endif
Header file for using the mlsa algorithm using cuda functions with GPUs.
Header file for using utility modules from CUDA source codes.
__global__ void vdupdate2H_cuda(const int m, const int n, const double *__restrict__ X, const double *__restrict__ B, double *H)
void dkernelW_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream)
int sbdivg_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const float beta, const int uType, const int nIter)
void supdate1H_cuda(const int n, const float *X, float *H, cudaStream_t stream)
void dupdate1W_cuda(const int m, const int n, const double *X, double *W, cudaStream_t stream)
__global__ void vsupdate2H_cuda(const int m, const int n, const float *__restrict__ X, const float *__restrict__ B, float *H)
__global__ void vsupdate1H_cuda(const int n, const float *__restrict__ X, float *H)
int sbdivone_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const int uType, const int nIter)
__global__ void vsupdate1W_cuda(const int m, const int n, const float *__restrict__ X, float *W)
__global__ void vdkernelW_cuda(const int m, const int n, const double *__restrict__ L, const double *__restrict__ A, double *R, const double expo)
void supdate1W_cuda(const int m, const int n, const float *X, float *W, cudaStream_t stream)
void dupdate2W_cuda(const int m, const int n, const double *X, const double *B, double *W, cudaStream_t stream)
__global__ void vdupdate1W_cuda(const int m, const int n, const double *__restrict__ X, double *W)
void supdate2W_cuda(const int m, const int n, const float *X, const float *B, float *W, cudaStream_t stream)
void skernelH_cuda(const int m, const int n, const float *L, const float *A, float *R, const float expo, cudaStream_t stream)
void dupdate1H_cuda(const int n, const double *X, double *H, cudaStream_t stream)
__global__ void vskernelH_cuda(const int m, const int n, const float *__restrict__ L, const float *__restrict__ A, float *R, const float expo)
int dbdiv_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter)
__global__ void vdkernelH_cuda(const int m, const int n, const double *__restrict__ L, const double *__restrict__ A, double *R, const double expo)
int sbdiv_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const float beta, const int uType, const int nIter)
void skernelW_cuda(const int m, const int n, const float *L, const float *A, float *R, const float expo, cudaStream_t stream)
void dkernelH_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream)
__global__ void vdupdate2W_cuda(const int m, const int k, const double *__restrict__ X, const double *__restrict__ B, double *W)
__global__ void vsupdate2W_cuda(const int m, const int k, const float *__restrict__ X, const float *__restrict__ B, float *W)
__global__ void vskernelW_cuda(const int m, const int n, const float *__restrict__ L, const float *__restrict__ A, float *R, const float expo)
int dbdivg_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter)
int dbdivone_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const int uType, const int nIter)
void dupdate2H_cuda(const int m, const int n, const double *X, const double *B, double *H, cudaStream_t stream)
void supdate2H_cuda(const int m, const int n, const float *X, const float *B, float *H, cudaStream_t stream)
__global__ void vdupdate1H_cuda(const int n, const double *__restrict__ X, double *H)