NMFPack/bdiv__cuda_8h_source.html

 /***************************************************************************
  *   Copyright (C) 2014 by PIR (University of Oviedo) and                  *
  *   INCO2 (Polytechnic University of Valencia) groups.                    *
  *   nnmfpack@gmail.com                                                    *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   the Free Software Foundation; either version 2 of the License, or     *
  *   (at your option) any later version.                                   *
  *                                                                         *
  *   This program is distributed in the hope that it will be useful,       *
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  *   GNU General Public License for more details.                          *
  *                                                                         *
  *   You should have received a copy of the GNU General Public License     *
  *   along with this program; if not, write to the                         *
  *   Free Software Foundation, Inc.,                                       *
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************
 */
 #ifndef BDIV_CUDA_H
 #define BDIV_CUDA_H

 #include "utils_cuda.h"
 #include "mlsa_cuda.h"

 /* wrappers */
 int dbdiv_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter);
 int sbdiv_cuda(const int m, const int n, const int k, const float  *A, float  *W, float  *H, const float  beta, const int uType, const int nIter);

 /* inner functions */
 /* general cases   */
 int dbdivg_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter);
 int sbdivg_cuda(const int m, const int n, const int k, const float  *A, float  *W, float  *H, const float  beta, const int uType, const int nIter);

 /*  beta=1 */
 int dbdivone_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const int uType, const int nIter);
 int sbdivone_cuda(const int m, const int n, const int k, const float  *A, float  *W, float  *H, const int uType, const int nIter);


 /* support functions  */
 void dkernelH_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream);
 void skernelH_cuda(const int m, const int n, const float  *L, const float  *A, float  *R, const float  expo, cudaStream_t stream);

 void dkernelW_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream);
 void skernelW_cuda(const int m, const int n, const float  *L, const float  *A, float  *R, const float  expo, cudaStream_t stream);

 void dupdate1H_cuda(const int n, const double *X, double *H, cudaStream_t stream);
 void supdate1H_cuda(const int n, const float  *X, float  *H, cudaStream_t stream);

 void dupdate2H_cuda(const int m, const int n, const double *X, const double *B, double *H, cudaStream_t stream);
 void supdate2H_cuda(const int m, const int n, const float  *X, const float  *B, float  *H, cudaStream_t stream);

 void dupdate1W_cuda(const int m, const int n, const double *X, double *W, cudaStream_t stream);
 void supdate1W_cuda(const int m, const int n, const float  *X, float  *W, cudaStream_t stream);

 void dupdate2W_cuda(const int m, const int n, const double *X, const double *B, double *W, cudaStream_t stream);
 void supdate2W_cuda(const int m, const int n, const float  *X, const float  *B, float  *W, cudaStream_t stream);

 /*Kernels*/
 __global__ void vdkernelH_cuda(const int m, const int n, const double* __restrict__ L, const double* __restrict__ A, double *R, const double expo);
 __global__ void vskernelH_cuda(const int m, const int n, const float*  __restrict__ L, const float*  __restrict__ A, float  *R, const float  expo);

 __global__ void vdkernelW_cuda(const int m, const int n, const double* __restrict__ L, const double* __restrict__ A, double *R, const double expo);
 __global__ void vskernelW_cuda(const int m, const int n, const float*  __restrict__ L, const float*  __restrict__ A, float  *R, const float  expo);

 __global__ void vdupdate1H_cuda(const int n, const double* __restrict__ X, double *H);
 __global__ void vsupdate1H_cuda(const int n, const float*  __restrict__ X, float  *H);

 __global__ void vdupdate1W_cuda(const int m, const int n, const double* __restrict__ X, double *W);
 __global__ void vsupdate1W_cuda(const int m, const int n, const float*  __restrict__ X, float  *W);

 __global__ void vdupdate2H_cuda(const int m, const int n, const double* __restrict__ X, const double* __restrict__ B, double *H);
 __global__ void vsupdate2H_cuda(const int m, const int n, const float*  __restrict__ X, const float*  __restrict__ B, float  *H);

 __global__ void vdupdate2W_cuda(const int m, const int k, const double* __restrict__ X, const double* __restrict__ B, double *W);
 __global__ void vsupdate2W_cuda(const int m, const int k, const float*  __restrict__ X, const float*  __restrict__ B, float  *W);

 #endif
mlsa_cuda.h
Header file for using the mlsa algorithm using cuda functions with GPUs.

utils_cuda.h
Header file for using utility modules from CUDA source codes.

vdupdate2H_cuda
__global__ void vdupdate2H_cuda(const int m, const int n, const double *__restrict__ X, const double *__restrict__ B, double *H)

dkernelW_cuda
void dkernelW_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream)

sbdivg_cuda
int sbdivg_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const float beta, const int uType, const int nIter)

supdate1H_cuda
void supdate1H_cuda(const int n, const float *X, float *H, cudaStream_t stream)

dupdate1W_cuda
void dupdate1W_cuda(const int m, const int n, const double *X, double *W, cudaStream_t stream)

vsupdate2H_cuda
__global__ void vsupdate2H_cuda(const int m, const int n, const float *__restrict__ X, const float *__restrict__ B, float *H)

vsupdate1H_cuda
__global__ void vsupdate1H_cuda(const int n, const float *__restrict__ X, float *H)

sbdivone_cuda
int sbdivone_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const int uType, const int nIter)

vsupdate1W_cuda
__global__ void vsupdate1W_cuda(const int m, const int n, const float *__restrict__ X, float *W)

vdkernelW_cuda
__global__ void vdkernelW_cuda(const int m, const int n, const double *__restrict__ L, const double *__restrict__ A, double *R, const double expo)

supdate1W_cuda
void supdate1W_cuda(const int m, const int n, const float *X, float *W, cudaStream_t stream)

dupdate2W_cuda
void dupdate2W_cuda(const int m, const int n, const double *X, const double *B, double *W, cudaStream_t stream)

vdupdate1W_cuda
__global__ void vdupdate1W_cuda(const int m, const int n, const double *__restrict__ X, double *W)

supdate2W_cuda
void supdate2W_cuda(const int m, const int n, const float *X, const float *B, float *W, cudaStream_t stream)

skernelH_cuda
void skernelH_cuda(const int m, const int n, const float *L, const float *A, float *R, const float expo, cudaStream_t stream)

dupdate1H_cuda
void dupdate1H_cuda(const int n, const double *X, double *H, cudaStream_t stream)

vskernelH_cuda
__global__ void vskernelH_cuda(const int m, const int n, const float *__restrict__ L, const float *__restrict__ A, float *R, const float expo)

dbdiv_cuda
int dbdiv_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter)

vdkernelH_cuda
__global__ void vdkernelH_cuda(const int m, const int n, const double *__restrict__ L, const double *__restrict__ A, double *R, const double expo)

sbdiv_cuda
int sbdiv_cuda(const int m, const int n, const int k, const float *A, float *W, float *H, const float beta, const int uType, const int nIter)

skernelW_cuda
void skernelW_cuda(const int m, const int n, const float *L, const float *A, float *R, const float expo, cudaStream_t stream)

dkernelH_cuda
void dkernelH_cuda(const int m, const int n, const double *L, const double *A, double *R, const double expo, cudaStream_t stream)

vdupdate2W_cuda
__global__ void vdupdate2W_cuda(const int m, const int k, const double *__restrict__ X, const double *__restrict__ B, double *W)

vsupdate2W_cuda
__global__ void vsupdate2W_cuda(const int m, const int k, const float *__restrict__ X, const float *__restrict__ B, float *W)

vskernelW_cuda
__global__ void vskernelW_cuda(const int m, const int n, const float *__restrict__ L, const float *__restrict__ A, float *R, const float expo)

dbdivg_cuda
int dbdivg_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const double beta, const int uType, const int nIter)

dbdivone_cuda
int dbdivone_cuda(const int m, const int n, const int k, const double *A, double *W, double *H, const int uType, const int nIter)

dupdate2H_cuda
void dupdate2H_cuda(const int m, const int n, const double *X, const double *B, double *H, cudaStream_t stream)

supdate2H_cuda
void supdate2H_cuda(const int m, const int n, const float *X, const float *B, float *H, cudaStream_t stream)

vdupdate1H_cuda
__global__ void vdupdate1H_cuda(const int n, const double *__restrict__ X, double *H)