ReMAS  1.5
Real-time Musical Accompaniment System
GPUFunctions.h
Go to the documentation of this file.
1 /**************************************************************************
2  * Copyright (C) 2017 by "Information Retrieval and Parallel Computing" *
3  * group (University of Oviedo, Spain), "Interdisciplinary Computation *
4  * and Communication" group (Polytechnic University of Valencia, Spain) *
5  * and "Signal Processing and Telecommunication Systems Research" group *
6  * (University of Jaen, Spain) *
7  * Contact: remaspack@gmail.com *
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  * This program is distributed in the hope that it will be useful, *
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
17  * GNU General Public License for more details. *
18  * *
19  * You should have received a copy of the GNU General Public License *
20  * along with this program; if not, write to the *
21  * Free Software Foundation, Inc., *
22  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
23  **************************************************************************
24 */
34 #pragma once
35 
36 #ifndef PREPROCESS_H
37 #define PREPROCESS_H
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <math.h>
43 #include <float.h>
44 
45 /* CUDA-C includes */
46 #include <cuda_runtime.h>
47 
48 /* CuFFT includes */
49 #include <cufft.h>
50 
51 /* CuBLAS includes */
52 #include <cublas_v2.h>
53 
54 #ifdef ALSA
55  #include <asoundlib.h>
56 #endif
57 
58 #include "../common/defines.h"
59 
60 
61 /* ******************************** Preproceso Functions Prototypes **************************** */
62 void BlocksAndThreads(int*, int*, int*, const int, const int);
63 int HaveCompatibleGPU(int &);
64 inline bool IsPow2(unsigned int);
65 unsigned int NextPow2(unsigned int);
66 
67 
68 int FFTGPU(MyType*, MyType*, MyFFTGPUType*);
69 
70 void InitSxD(MyType *, MyType *, const MyType* __restrict__, const int* __restrict__, const int, const int);
71 
72 int AllocAuxiGPU(MyType **, short **, short **, MyType **, MyType **, const int, const int, const int);
73 
74 int AllocDataGPU(MyType **, int **, int **, int **, int **, int **, int *, const int, const int, DTWfiles);
75 
76 int AllocDTWGPU(MyType **, MyType **, MyType **, const int, const int, const int);
77 
78 int AllocFFTGPU(MyFFTGPUType *, MyType **, MyType **, MyType **, int*, int*, const int, DTWfiles);
79 
80 int AllocS_fkGPU(MyType **, MyType **, MyType **, const MyType, const int, const int, DTWfiles);
81 
82 int OneImin (MyType *, int *, MyType *, const int, const int);
83 int FirstImin(MyType *, int *, MyType *, const int, const int);
84 int LastImin (MyType *, int *, MyType *, const int, const int);
85 
86 int ReadWavGPU1st(short *, short *, FILE *);
87 int ReadWavGPU (short *, short *, FILE *);
88 #ifdef ALSA
89  int ReadAlsaGPU1st(short *, short *, snd_pcm_t *, FILE *);
90  int ReadAlsaGPU (short *, short *, snd_pcm_t *, FILE *);
91 #endif
92 
93 /* ********************************* Preproceso kernels Prototypes ***************************** */
94 __global__ void kernel_ApplyWindow(MyType* __restrict__, const short* __restrict__, const MyType* __restrict__, const int, const int);
95 
96 __global__ void kernel_InitDTW(MyType* __restrict__, const int, const int);
97 
98 __global__ void kernel_CompNorB0(MyType* __restrict__, const MyType, const int);
99 
100 __global__ void kernel_CompNorB1(MyType* __restrict__, const MyType* __restrict__, const int, const int);
101 
102 __global__ void kernel_CompNorBG(MyType* __restrict__, MyType* __restrict__, const MyType* __restrict__, const int, const MyType, const int);
103 
104 __global__ void kernel_PowToReal(MyType* __restrict__, const MyType* __restrict__, const MyType, const int);
105 
106 __global__ void kernel_Cfreq(MyType* __restrict__, const MyType* __restrict__);
107 
108 __global__ void kernel_Modul(MyType* __restrict__, const MyType* __restrict__, const int);
109 
110 __global__ void kernel_Reduction(MyType* __restrict__, const int);
111 
112 __global__ void kernel_InitSxD(MyType* __restrict__, MyType* __restrict__, const MyType* __restrict__,
113  const int* __restrict__, const int, const bool, const int);
114 
115 __global__ void kernel_Sum(MyType* __restrict__, const MyType* __restrict__, const int, const bool, const int);
116 
117 __global__ void kernel_Vnorm(MyType* __restrict__);
118 
119 __global__ void kernel_UpdateSxD(MyType* __restrict__, const MyType, const MyType* __restrict__, const int);
120 
121 __global__ void kernel_DTW(const MyType* __restrict__, MyType* __restrict__, MyType* __restrict__, int* __restrict__,
122  const int, const int, const int);
123 
124 __global__ void kernel_CompDisB0(MyType* __restrict__, const MyType* __restrict__, const MyType* __restrict__,
125  const MyType* __restrict__, const int, const int);
126 
127 __global__ void kernel_CompDisB1(MyType* __restrict__, const MyType* __restrict__, const MyType* __restrict__,
128  const MyType* __restrict__, const int, const int);
129 
130 __global__ void kernel_CompDisBG(MyType* __restrict__, const MyType* __restrict__, const MyType* __restrict__,
131  const MyType* __restrict__, const MyType* __restrict__, const MyType* __restrict__,
132  const MyType, const int, const int);
133 
134 __global__ void kernel_Shift(short* __restrict__, const int, const int);
135 
136 __global__ void kernel_OneImin(MyType* __restrict__, int* __restrict__, const MyType* __restrict__, const int,
137  const bool, const int);
138 
139 __global__ void kernel_FirstImin(MyType* __restrict__, int* __restrict__, const MyType* __restrict__, const int,
140  const bool, const int);
141 
142 __global__ void kernel_LastImin(MyType* __restrict__, int* __restrict__, const MyType* __restrict__, const int,
143  const bool, const int);
144 
145 __global__ void kernel_OneIminLast(MyType* __restrict__, int* __restrict__, const MyType* __restrict__,
146  const int* __restrict__, const int, const bool, const int);
147 
148 __global__ void kernel_FirstIminLast(MyType* __restrict__, int* __restrict__, const MyType* __restrict__,
149  const int* __restrict__, const int, const bool, const int);
150 
151 __global__ void kernel_LastIminLast(MyType* __restrict__, int* __restrict__, const MyType* __restrict__,
152  const int* __restrict__, const int, const bool, const int);
153 
154 #endif
__global__ void kernel_PowToReal(MyType *__restrict__ dest, const MyType *__restrict__ src, const MyType ex, const int size)
kernel_PowToReal This cuda kernel powers the elements of a vector to a real number and stores them in...
Definition: kernels.cuh:481
int AllocDTWGPU(MyType **, MyType **, MyType **, const int, const int, const int)
AllocDTWGPU Allocates memory for DTW vectors and auxiliar structures.
Struct for store the name of input/verificaton files. Each composition needs a file with values for ...
Definition: defines.h:228
__global__ void kernel_Sum(MyType *__restrict__ odata, const MyType *__restrict__ idata, const int blockSize, const bool SizeIsPow2, const int size)
kernel_Sum This cuda kernel adds the elements of a vector.
Definition: kernels.cuh:260
int LastImin(MyType *, int *, MyType *, const int, const int)
This function launches cuda kernels to find the last minimun and its position.
__global__ void kernel_LastIminLast(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int *__restrict__ ipos, const int blockSize, const bool SizeIsPow2, const int size)
kernel_LastIminLast used with kernel_LastImin to calculates the position of the last minimum in a vec...
Definition: kernels.cuh:1376
__global__ void kernel_ApplyWindow(MyType *__restrict__ X_fft, const short *__restrict__ frame, const MyType *__restrict__ v_hanning, const int TTRA, const int NFFT)
kernel_ApplyWindow scales and set the elements of the audio vector X_fft
Definition: kernels.cuh:345
void BlocksAndThreads(int *, int *, int *, const int, const int)
BlocksAndThreads calculates the suitable number of blocks and threads, and the needed shared memory...
__global__ void kernel_FirstIminLast(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int *__restrict__ ipos, const int blockSize, const bool SizeIsPow2, const int size)
kernel_FirstIminLast used with kernel_OneImin to calculates the position of the first minimum in a ve...
Definition: kernels.cuh:1192
int HaveCompatibleGPU(int &)
HaveCompatibleGPU checks if the system has an appropiate GPU for ReMAS.
Definition: GPUFunctions.cu:75
__global__ void kernel_Shift(short *__restrict__ frame, const int TTRAMA, const int TMUEST)
kernel_Shift shifts the vector elements TMUEST positions on the left
Definition: kernels.cuh:880
__global__ void kernel_Vnorm(MyType *__restrict__ odata)
kernel_Vnorm This cuda kernel initializes position 0 of a vector
Definition: kernels.cuh:326
int ReadWavGPU1st(short *, short *, FILE *)
ReadWavGPU1st reads first audio (frame) from WAV file when NVIDIA GPU is used.
__global__ void kernel_LastImin(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int blockSize, const bool SizeIsPow2, const int size)
kernel_LastImin calculates the position of the last minimum in a vector
Definition: kernels.cuh:1283
int ReadWavGPU(short *, short *, FILE *)
ReadFileGPU reads current audio (frame) from WAV file when NVIDIA GPU is used.
int FFTGPU(MyType *, MyType *, MyFFTGPUType *)
FFTGPU computes FFT.
__global__ void kernel_CompNorB1(MyType *__restrict__ norms, const MyType *__restrict__ s_fk, const int NMIDI, const int size)
kernel_CompNorB1 This cuda kernel computes the norm of a vector when BETA=1
Definition: kernels.cuh:404
__global__ void kernel_CompNorBG(MyType *__restrict__ norms, MyType *__restrict__ ts_fk, const MyType *__restrict__ s_fk, const int NMIDI, const MyType BETA, const int size)
kernel_CompNorBG This cuda kernel computes the norm of a vector when BETA <> 0 and BETA <> 1 ...
Definition: kernels.cuh:440
unsigned int NextPow2(unsigned int)
NextPow2 returns the next power of 2 of a given number.
Definition: GPUFunctions.cu:47
__global__ void kernel_InitSxD(MyType *__restrict__ odata, MyType *__restrict__ v_SxD, const MyType *__restrict__ v_dxState, const int *__restrict__ I_SxD, const int blockSize, const bool SizeIsPow2, const int size)
kernel_InitSxD This cuda kernel sets up the vector SxD.
Definition: kernels.cuh:185
int OneImin(MyType *, int *, MyType *, const int, const int)
This function launches cuda kernels to find one minimun and its position.
__global__ void kernel_OneIminLast(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int *__restrict__ ipos, const int blockSize, const bool SizeIsPow2, const int size)
kernel_OneIminLast used with kernel_OneImin to calculates the position of one minimum in a vector ...
Definition: kernels.cuh:1012
int AllocAuxiGPU(MyType **, short **, short **, MyType **, MyType **, const int, const int, const int)
AllocAuxiGPU memory reservation for norms, frame, v_cfreq and v_dxState vectors.
__global__ void kernel_OneImin(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int blockSize, const bool SizeIsPow2, const int size)
kernel_OneImin calculates the position of one minimum in a vector
Definition: kernels.cuh:923
__global__ void kernel_Cfreq(MyType *__restrict__ dest, const MyType *__restrict__ src)
kernel_Cfreq This cuda kernel computes sqrt(sum of elements of a vector) and stores it in dest[0] ...
Definition: kernels.cuh:527
void InitSxD(MyType *odata, MyType *v_SxD, const MyType *__restrict__ v_dxState, const int *__restrict__ I_SxD, const int maxGrid, const int size)
InitSxD launches the cuda kernel that sets up the vector SxD when "Unified" GPU memory is used...
int AllocDataGPU(MyType **, int **, int **, int **, int **, int **, int *, const int, const int, DTWfiles)
AllocDataGPU Allocates memory and initializes some structures reading info from files.
__global__ void kernel_FirstImin(MyType *__restrict__ odata, int *__restrict__ opos, const MyType *__restrict__ idata, const int blockSize, const bool SizeIsPow2, const int size)
kernel_FirstImin calculates the position of the first minimum in a vector
Definition: kernels.cuh:1099
__global__ void kernel_Modul(MyType *__restrict__ dest, const MyType *__restrict__ src, const int size)
kernel_Modul This cuda kernel computes the modulus of elements of a vector and stores them in other v...
Definition: kernels.cuh:503
__global__ void kernel_CompNorB0(MyType *__restrict__ norms, const MyType value, const int size)
kernel_CompNorB0 This cuda kernel computes the norm of a vector when BETA=0
Definition: kernels.cuh:386
__global__ void kernel_UpdateSxD(MyType *__restrict__ dest, const MyType ALPHA, const MyType *__restrict__ norm, const int size)
kernel_UpdateSxD This cuda kernel update the elements of SxD vector
Definition: kernels.cuh:364
bool IsPow2(unsigned int)
IsPow2 decides if a number is power of 2.
Definition: GPUFunctions.cu:66
int AllocS_fkGPU(MyType **, MyType **, MyType **, const MyType, const int, const int, DTWfiles)
AllocS_fkGPU Allocates memory for S_fk vector, read its data from file and initializes other auxiliar...
int FirstImin(MyType *, int *, MyType *, const int, const int)
This function launches cuda kernels to find the first minimun and its position.
__global__ void kernel_InitDTW(MyType *__restrict__ pV, const int pos, const int size)
kernel_InitDTW This cuda kernel initializes DTW vector
Definition: kernels.cuh:108
__global__ void kernel_DTW(const MyType *__restrict__ Sequence, MyType *__restrict__ pD, const int NSeq, const int Where, const int NST)
kernel_DTW This cuda kernel performs the Online-DTW process for the current frame ...
Definition: kernels.cuh:135
__global__ void kernel_Reduction(MyType *__restrict__ dest, const int size)
kernel_Reduction This cuda kernel performs a typical sum-reduction of a vector
Definition: kernels.cuh:560
int AllocFFTGPU(MyFFTGPUType *, MyType **, MyType **, MyType **, int *, int *, const int, DTWfiles)
AllocFFTGPU Allocates "Unified" GPU memory for FFT vector and reads some fft information from files...