NMFPack/utils__cuda_8h_source.html

 /***************************************************************************
  *   Copyright (C) 2014 by PIR (University of Oviedo) and                  *
  *   INCO2 (Polytechnic University of Valencia) groups.                    *
  *   nnmfpack@gmail.com                                                    *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   the Free Software Foundation; either version 2 of the License, or     *
  *   (at your option) any later version.                                   *
  *                                                                         *
  *   This program is distributed in the hope that it will be useful,       *
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  *   GNU General Public License for more details.                          *
  *                                                                         *
  *   You should have received a copy of the GNU General Public License     *
  *   along with this program; if not, write to the                         *
  *   Free Software Foundation, Inc.,                                       *
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************
 */
 #ifndef UTILSCUDA_H
 #define UTILSCUDA_H

 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <cuda.h>
 #include <curand.h>
 #include <cuda_runtime.h>
 #include <cublas_v2.h>
 #include <assert.h>

 #define CUDAERR(x) do { if((x)!=cudaSuccess) { \
     printf("CUDA error: %s : %s, line %d\n", cudaGetErrorString(x), __FILE__, __LINE__);\
     return EXIT_FAILURE;}} while(0)

 #define CUBLASERR(x) do { if((x)!=CUBLAS_STATUS_SUCCESS) { \
     printf("CUBLAS error: %s, line %d\n", __FILE__, __LINE__);\
     return EXIT_FAILURE;}} while(0)

 #define max(a,b) (((a)>(b ))?( a):(b))
 #define min(a,b) (((a)<(b ))?( a):(b))

 #define fpe(x) (isnan(x) || isinf(x))

 /* kernels */
 __global__ void vdmemset_cuda(const int n, double *x, const double val);
 __global__ void vsmemset_cuda(const int n, float  *x, const float  val);

 __global__ void vddiv_cuda(const int n, const double* __restrict__ x, const double* __restrict__ y, double *z);
 __global__ void vsdiv_cuda(const int n, const float*  __restrict__ x, const float*  __restrict__ y, float  *z);

 __global__ void vdsub_cuda(const int n, const double* __restrict__ x, double *y);
 __global__ void vssub_cuda(const int n, const float*  __restrict__ x, float  *y);

 __global__ void vderrorbd0_cuda(const int n, const double* __restrict__ x, double *y);
 __global__ void vserrorbd0_cuda(const int n, const float*  __restrict__ x, float  *y);

 __global__ void vderrorbd1_cuda(const int n, const double* __restrict__ x, double *y);
 __global__ void vserrorbd1_cuda(const int n, const float*  __restrict__ x, float  *y);

 __global__ void vderrorbdg_cuda(const int n, const double* __restrict__ x, double *y, const double beta);
 __global__ void vserrorbdg_cuda(const int n, const float*  __restrict__ x, float  *y, const double beta);


 /* wrappers for the kernels */
 void dmemset_cuda(const int n, double *x, const double val, cudaStream_t stream);
 void smemset_cuda(const int n, float  *x, const float  val, cudaStream_t stream);

 void ddiv_cuda(const int n, const double *x, double *y, cudaStream_t stream);
 void sdiv_cuda(const int n, const float  *x, float  *y, cudaStream_t stream);

 void dsub_cuda(const int n, const double *x, double *y);
 void ssub_cuda(const int n, const float  *x, float  *y);

 void dlarngenn_cuda(const int m, const int n, const int seed, double *x);
 void slarngenn_cuda(const int m, const int n, const int seed, float  *x);

 double derror_cuda(const int m, const int n, const int k, const double *x, const double *y, const double *z);
 float  serror_cuda(const int m, const int n, const int k, const float  *x, const float  *y, const float  *z);

 double derrorbd_cuda(const int m, const int n, const int k, const double *A, const double *W, const double *H, const double beta);
 float  serrorbd_cuda(const int m, const int n, const int k, const float  *A, const float  *W, const float  *H, const float  beta);

 #endif
smemset_cuda
void smemset_cuda(const int n, float *x, const float val, cudaStream_t stream)

ssub_cuda
void ssub_cuda(const int n, const float *x, float *y)

vserrorbdg_cuda
__global__ void vserrorbdg_cuda(const int n, const float *__restrict__ x, float *y, const double beta)

vdmemset_cuda
__global__ void vdmemset_cuda(const int n, double *x, const double val)

dmemset_cuda
void dmemset_cuda(const int n, double *x, const double val, cudaStream_t stream)

vdsub_cuda
__global__ void vdsub_cuda(const int n, const double *__restrict__ x, double *y)

serror_cuda
float serror_cuda(const int m, const int n, const int k, const float *x, const float *y, const float *z)

vserrorbd1_cuda
__global__ void vserrorbd1_cuda(const int n, const float *__restrict__ x, float *y)

serrorbd_cuda
float serrorbd_cuda(const int m, const int n, const int k, const float *A, const float *W, const float *H, const float beta)

dsub_cuda
void dsub_cuda(const int n, const double *x, double *y)

vderrorbdg_cuda
__global__ void vderrorbdg_cuda(const int n, const double *__restrict__ x, double *y, const double beta)

derror_cuda
double derror_cuda(const int m, const int n, const int k, const double *x, const double *y, const double *z)

slarngenn_cuda
void slarngenn_cuda(const int m, const int n, const int seed, float *x)

vssub_cuda
__global__ void vssub_cuda(const int n, const float *__restrict__ x, float *y)

vserrorbd0_cuda
__global__ void vserrorbd0_cuda(const int n, const float *__restrict__ x, float *y)

vderrorbd0_cuda
__global__ void vderrorbd0_cuda(const int n, const double *__restrict__ x, double *y)

vsdiv_cuda
__global__ void vsdiv_cuda(const int n, const float *__restrict__ x, const float *__restrict__ y, float *z)

derrorbd_cuda
double derrorbd_cuda(const int m, const int n, const int k, const double *A, const double *W, const double *H, const double beta)

vsmemset_cuda
__global__ void vsmemset_cuda(const int n, float *x, const float val)

ddiv_cuda
void ddiv_cuda(const int n, const double *x, double *y, cudaStream_t stream)

vddiv_cuda
__global__ void vddiv_cuda(const int n, const double *__restrict__ x, const double *__restrict__ y, double *z)

sdiv_cuda
void sdiv_cuda(const int n, const float *x, float *y, cudaStream_t stream)

vderrorbd1_cuda
__global__ void vderrorbd1_cuda(const int n, const double *__restrict__ x, double *y)

dlarngenn_cuda
void dlarngenn_cuda(const int m, const int n, const int seed, double *x)