46 #include <cuda_runtime.h> 52 #include <cublas_v2.h> 55 #include <asoundlib.h> 58 #include "../common/defines.h" 64 inline bool IsPow2(
unsigned int);
68 int FFTGPU(MyType*, MyType*, MyFFTGPUType*);
70 void InitSxD(MyType *, MyType *,
const MyType* __restrict__,
const int* __restrict__,
const int,
const int);
72 int AllocAuxiGPU(MyType **,
short **,
short **, MyType **, MyType **,
const int,
const int,
const int);
74 int AllocDataGPU(MyType **,
int **,
int **,
int **,
int **,
int **,
int *,
const int,
const int,
DTWfiles);
76 int AllocDTWGPU(MyType **, MyType **, MyType **,
const int,
const int,
const int);
78 int AllocFFTGPU(MyFFTGPUType *, MyType **, MyType **, MyType **,
int*,
int*,
const int,
DTWfiles);
82 int OneImin (MyType *,
int *, MyType *,
const int,
const int);
83 int FirstImin(MyType *,
int *, MyType *,
const int,
const int);
84 int LastImin (MyType *,
int *, MyType *,
const int,
const int);
89 int ReadAlsaGPU1st(
short *,
short *, snd_pcm_t *, FILE *);
90 int ReadAlsaGPU (
short *,
short *, snd_pcm_t *, FILE *);
94 __global__
void kernel_ApplyWindow(MyType* __restrict__,
const short* __restrict__,
const MyType* __restrict__,
const int,
const int);
96 __global__
void kernel_InitDTW(MyType* __restrict__,
const int,
const int);
98 __global__
void kernel_CompNorB0(MyType* __restrict__,
const MyType,
const int);
100 __global__
void kernel_CompNorB1(MyType* __restrict__,
const MyType* __restrict__,
const int,
const int);
102 __global__
void kernel_CompNorBG(MyType* __restrict__, MyType* __restrict__,
const MyType* __restrict__,
const int,
const MyType,
const int);
104 __global__
void kernel_PowToReal(MyType* __restrict__,
const MyType* __restrict__,
const MyType,
const int);
106 __global__
void kernel_Cfreq(MyType* __restrict__,
const MyType* __restrict__);
108 __global__
void kernel_Modul(MyType* __restrict__,
const MyType* __restrict__,
const int);
112 __global__
void kernel_InitSxD(MyType* __restrict__, MyType* __restrict__,
const MyType* __restrict__,
113 const int* __restrict__,
const int,
const bool,
const int);
115 __global__
void kernel_Sum(MyType* __restrict__,
const MyType* __restrict__,
const int,
const bool,
const int);
119 __global__
void kernel_UpdateSxD(MyType* __restrict__,
const MyType,
const MyType* __restrict__,
const int);
121 __global__
void kernel_DTW(
const MyType* __restrict__, MyType* __restrict__, MyType* __restrict__,
int* __restrict__,
122 const int,
const int,
const int);
124 __global__
void kernel_CompDisB0(MyType* __restrict__,
const MyType* __restrict__,
const MyType* __restrict__,
125 const MyType* __restrict__,
const int,
const int);
127 __global__
void kernel_CompDisB1(MyType* __restrict__,
const MyType* __restrict__,
const MyType* __restrict__,
128 const MyType* __restrict__,
const int,
const int);
130 __global__
void kernel_CompDisBG(MyType* __restrict__,
const MyType* __restrict__,
const MyType* __restrict__,
131 const MyType* __restrict__,
const MyType* __restrict__,
const MyType* __restrict__,
132 const MyType,
const int,
const int);
134 __global__
void kernel_Shift(
short* __restrict__,
const int,
const int);
136 __global__
void kernel_OneImin(MyType* __restrict__,
int* __restrict__,
const MyType* __restrict__,
const int,
137 const bool,
const int);
139 __global__
void kernel_FirstImin(MyType* __restrict__,
int* __restrict__,
const MyType* __restrict__,
const int,
140 const bool,
const int);
142 __global__
void kernel_LastImin(MyType* __restrict__,
int* __restrict__,
const MyType* __restrict__,
const int,
143 const bool,
const int);
145 __global__
void kernel_OneIminLast(MyType* __restrict__,
int* __restrict__,
const MyType* __restrict__,
146 const int* __restrict__,
const int,
const bool,
const int);
148 __global__
void kernel_FirstIminLast(MyType* __restrict__,
int* __restrict__,
const MyType* __restrict__,
149 const int* __restrict__,
const int,
const bool,
const int);
151 __global__
void kernel_LastIminLast(MyType* __restrict__,
int* __restrict__,
const MyType* __restrict__,
152 const int* __restrict__,
const int,
const bool,
const int);
__global__ void kernel_PowToReal(MyType *__restrict__ dest, const MyType *__restrict__ src, const MyType ex, const int size)
kernel_PowToReal This cuda kernel powers the elements of a vector to a real number and stores them in...
int AllocDTWGPU(MyType **, MyType **, MyType **, const int, const int, const int)
AllocDTWGPU Allocates memory for DTW vectors and auxiliar structures.
Struct for store the name of input/verificaton files. Each composition needs a file with values for ...
__global__ void kernel_Sum(MyType *__restrict__ odata, const MyType *__restrict__ idata, const int blockSize, const bool SizeIsPow2, const int size)
kernel_Sum This cuda kernel adds the elements of a vector.
int LastImin(MyType *, int *, MyType *, const int, const int)
This function launches cuda kernels to find the last minimun and its position.
__global__ void kernel_LastIminLast(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int *__restrict__ ipos, const int blockSize, const bool SizeIsPow2, const int size)
kernel_LastIminLast used with kernel_LastImin to calculates the position of the last minimum in a vec...
__global__ void kernel_ApplyWindow(MyType *__restrict__ X_fft, const short *__restrict__ frame, const MyType *__restrict__ v_hanning, const int TTRA, const int NFFT)
kernel_ApplyWindow scales and set the elements of the audio vector X_fft
void BlocksAndThreads(int *, int *, int *, const int, const int)
BlocksAndThreads calculates the suitable number of blocks and threads, and the needed shared memory...
__global__ void kernel_FirstIminLast(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int *__restrict__ ipos, const int blockSize, const bool SizeIsPow2, const int size)
kernel_FirstIminLast used with kernel_OneImin to calculates the position of the first minimum in a ve...
int HaveCompatibleGPU(int &)
HaveCompatibleGPU checks if the system has an appropiate GPU for ReMAS.
__global__ void kernel_Shift(short *__restrict__ frame, const int TTRAMA, const int TMUEST)
kernel_Shift shifts the vector elements TMUEST positions on the left
__global__ void kernel_Vnorm(MyType *__restrict__ odata)
kernel_Vnorm This cuda kernel initializes position 0 of a vector
int ReadWavGPU1st(short *, short *, FILE *)
ReadWavGPU1st reads first audio (frame) from WAV file when NVIDIA GPU is used.
__global__ void kernel_LastImin(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int blockSize, const bool SizeIsPow2, const int size)
kernel_LastImin calculates the position of the last minimum in a vector
int ReadWavGPU(short *, short *, FILE *)
ReadFileGPU reads current audio (frame) from WAV file when NVIDIA GPU is used.
int FFTGPU(MyType *, MyType *, MyFFTGPUType *)
FFTGPU computes FFT.
__global__ void kernel_CompNorB1(MyType *__restrict__ norms, const MyType *__restrict__ s_fk, const int NMIDI, const int size)
kernel_CompNorB1 This cuda kernel computes the norm of a vector when BETA=1
__global__ void kernel_CompNorBG(MyType *__restrict__ norms, MyType *__restrict__ ts_fk, const MyType *__restrict__ s_fk, const int NMIDI, const MyType BETA, const int size)
kernel_CompNorBG This cuda kernel computes the norm of a vector when BETA <> 0 and BETA <> 1 ...
unsigned int NextPow2(unsigned int)
NextPow2 returns the next power of 2 of a given number.
__global__ void kernel_InitSxD(MyType *__restrict__ odata, MyType *__restrict__ v_SxD, const MyType *__restrict__ v_dxState, const int *__restrict__ I_SxD, const int blockSize, const bool SizeIsPow2, const int size)
kernel_InitSxD This cuda kernel sets up the vector SxD.
int OneImin(MyType *, int *, MyType *, const int, const int)
This function launches cuda kernels to find one minimun and its position.
__global__ void kernel_OneIminLast(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int *__restrict__ ipos, const int blockSize, const bool SizeIsPow2, const int size)
kernel_OneIminLast used with kernel_OneImin to calculates the position of one minimum in a vector ...
int AllocAuxiGPU(MyType **, short **, short **, MyType **, MyType **, const int, const int, const int)
AllocAuxiGPU memory reservation for norms, frame, v_cfreq and v_dxState vectors.
__global__ void kernel_OneImin(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int blockSize, const bool SizeIsPow2, const int size)
kernel_OneImin calculates the position of one minimum in a vector
__global__ void kernel_Cfreq(MyType *__restrict__ dest, const MyType *__restrict__ src)
kernel_Cfreq This cuda kernel computes sqrt(sum of elements of a vector) and stores it in dest[0] ...
void InitSxD(MyType *odata, MyType *v_SxD, const MyType *__restrict__ v_dxState, const int *__restrict__ I_SxD, const int maxGrid, const int size)
InitSxD launches the cuda kernel that sets up the vector SxD when "Unified" GPU memory is used...
int AllocDataGPU(MyType **, int **, int **, int **, int **, int **, int *, const int, const int, DTWfiles)
AllocDataGPU Allocates memory and initializes some structures reading info from files.
__global__ void kernel_FirstImin(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int blockSize, const bool SizeIsPow2, const int size)
kernel_FirstImin calculates the position of the first minimum in a vector
__global__ void kernel_Modul(MyType *__restrict__ dest, const MyType *__restrict__ src, const int size)
kernel_Modul This cuda kernel computes the modulus of elements of a vector and stores them in other v...
__global__ void kernel_CompNorB0(MyType *__restrict__ norms, const MyType value, const int size)
kernel_CompNorB0 This cuda kernel computes the norm of a vector when BETA=0
__global__ void kernel_UpdateSxD(MyType *__restrict__ dest, const MyType ALPHA, const MyType *__restrict__ norm, const int size)
kernel_UpdateSxD This cuda kernel update the elements of SxD vector
bool IsPow2(unsigned int)
IsPow2 decides if a number is power of 2.
int AllocS_fkGPU(MyType **, MyType **, MyType **, const MyType, const int, const int, DTWfiles)
AllocS_fkGPU Allocates memory for S_fk vector, read its data from file and initializes other auxiliar...
int FirstImin(MyType *, int *, MyType *, const int, const int)
This function launches cuda kernels to find the first minimun and its position.
__global__ void kernel_InitDTW(MyType *__restrict__ pV, const int pos, const int size)
kernel_InitDTW This cuda kernel initializes DTW vector
__global__ void kernel_DTW(const MyType *__restrict__ Sequence, MyType *__restrict__ pD, const int NSeq, const int Where, const int NST)
kernel_DTW This cuda kernel performs the Online-DTW process for the current frame ...
__global__ void kernel_Reduction(MyType *__restrict__ dest, const int size)
kernel_Reduction This cuda kernel performs a typical sum-reduction of a vector
int AllocFFTGPU(MyFFTGPUType *, MyType **, MyType **, MyType **, int *, int *, const int, DTWfiles)
AllocFFTGPU Allocates "Unified" GPU memory for FFT vector and reads some fft information from files...