NnmfPack  2.1
utils_cuda.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2014 by PIR (University of Oviedo) and *
3  * INCO2 (Polytechnic University of Valencia) groups. *
4  * nnmfpack@gmail.com *
5  * *
6  * This program is free software; you can redistribute it and/or modify *
7  * it under the terms of the GNU General Public License as published by *
8  * the Free Software Foundation; either version 2 of the License, or *
9  * (at your option) any later version. *
10  * *
11  * This program is distributed in the hope that it will be useful, *
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14  * GNU General Public License for more details. *
15  * *
16  * You should have received a copy of the GNU General Public License *
17  * along with this program; if not, write to the *
18  * Free Software Foundation, Inc., *
19  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
20  ***************************************************************************
21 */
32 #ifndef UTILSCUDA_H
33 #define UTILSCUDA_H
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <math.h>
38 #include <cuda.h>
39 #include <curand.h>
40 #include <cuda_runtime.h>
41 #include <cublas_v2.h>
42 #include <assert.h>
43 
44 #define CUDAERR(x) do { if((x)!=cudaSuccess) { \
45  printf("CUDA error: %s : %s, line %d\n", cudaGetErrorString(x), __FILE__, __LINE__);\
46  return EXIT_FAILURE;}} while(0)
47 
48 #define CUBLASERR(x) do { if((x)!=CUBLAS_STATUS_SUCCESS) { \
49  printf("CUBLAS error: %s, line %d\n", __FILE__, __LINE__);\
50  return EXIT_FAILURE;}} while(0)
51 
52 #define max(a,b) (((a)>(b ))?( a):(b))
53 #define min(a,b) (((a)<(b ))?( a):(b))
54 
55 #define fpe(x) (isnan(x) || isinf(x))
56 
57 /* kernels */
58 __global__ void vdmemset_cuda(const int n, double *x, const double val);
59 __global__ void vsmemset_cuda(const int n, float *x, const float val);
60 
61 __global__ void vddiv_cuda(const int n, const double* __restrict__ x, const double* __restrict__ y, double *z);
62 __global__ void vsdiv_cuda(const int n, const float* __restrict__ x, const float* __restrict__ y, float *z);
63 
64 __global__ void vdsub_cuda(const int n, const double* __restrict__ x, double *y);
65 __global__ void vssub_cuda(const int n, const float* __restrict__ x, float *y);
66 
67 __global__ void vderrorbd0_cuda(const int n, const double* __restrict__ x, double *y);
68 __global__ void vserrorbd0_cuda(const int n, const float* __restrict__ x, float *y);
69 
70 __global__ void vderrorbd1_cuda(const int n, const double* __restrict__ x, double *y);
71 __global__ void vserrorbd1_cuda(const int n, const float* __restrict__ x, float *y);
72 
73 __global__ void vderrorbdg_cuda(const int n, const double* __restrict__ x, double *y, const double beta);
74 __global__ void vserrorbdg_cuda(const int n, const float* __restrict__ x, float *y, const double beta);
75 
76 
77 /* wrappers for the kernels */
78 void dmemset_cuda(const int n, double *x, const double val, cudaStream_t stream);
79 void smemset_cuda(const int n, float *x, const float val, cudaStream_t stream);
80 
81 void ddiv_cuda(const int n, const double *x, double *y, cudaStream_t stream);
82 void sdiv_cuda(const int n, const float *x, float *y, cudaStream_t stream);
83 
84 void dsub_cuda(const int n, const double *x, double *y);
85 void ssub_cuda(const int n, const float *x, float *y);
86 
87 void dlarngenn_cuda(const int m, const int n, const int seed, double *x);
88 void slarngenn_cuda(const int m, const int n, const int seed, float *x);
89 
90 double derror_cuda(const int m, const int n, const int k, const double *x, const double *y, const double *z);
91 float serror_cuda(const int m, const int n, const int k, const float *x, const float *y, const float *z);
92 
93 double derrorbd_cuda(const int m, const int n, const int k, const double *A, const double *W, const double *H, const double beta);
94 float serrorbd_cuda(const int m, const int n, const int k, const float *A, const float *W, const float *H, const float beta);
95 
96 #endif
void smemset_cuda(const int n, float *x, const float val, cudaStream_t stream)
void ssub_cuda(const int n, const float *x, float *y)
__global__ void vserrorbdg_cuda(const int n, const float *__restrict__ x, float *y, const double beta)
__global__ void vdmemset_cuda(const int n, double *x, const double val)
void dmemset_cuda(const int n, double *x, const double val, cudaStream_t stream)
__global__ void vdsub_cuda(const int n, const double *__restrict__ x, double *y)
float serror_cuda(const int m, const int n, const int k, const float *x, const float *y, const float *z)
__global__ void vserrorbd1_cuda(const int n, const float *__restrict__ x, float *y)
float serrorbd_cuda(const int m, const int n, const int k, const float *A, const float *W, const float *H, const float beta)
void dsub_cuda(const int n, const double *x, double *y)
__global__ void vderrorbdg_cuda(const int n, const double *__restrict__ x, double *y, const double beta)
double derror_cuda(const int m, const int n, const int k, const double *x, const double *y, const double *z)
void slarngenn_cuda(const int m, const int n, const int seed, float *x)
__global__ void vssub_cuda(const int n, const float *__restrict__ x, float *y)
__global__ void vserrorbd0_cuda(const int n, const float *__restrict__ x, float *y)
__global__ void vderrorbd0_cuda(const int n, const double *__restrict__ x, double *y)
__global__ void vsdiv_cuda(const int n, const float *__restrict__ x, const float *__restrict__ y, float *z)
double derrorbd_cuda(const int m, const int n, const int k, const double *A, const double *W, const double *H, const double beta)
__global__ void vsmemset_cuda(const int n, float *x, const float val)
void ddiv_cuda(const int n, const double *x, double *y, cudaStream_t stream)
__global__ void vddiv_cuda(const int n, const double *__restrict__ x, const double *__restrict__ y, double *z)
void sdiv_cuda(const int n, const float *x, float *y, cudaStream_t stream)
__global__ void vderrorbd1_cuda(const int n, const double *__restrict__ x, double *y)
void dlarngenn_cuda(const int m, const int n, const int seed, double *x)