35 #include "../common/TimeFunctions.h" 40 #include "../common/FileFunctions.h" 43 __constant__ MyType CCosts[T_COSTS];
44 __constant__
int Ckmax_fft[N_MIDI];
45 __constant__
int Ckmin_fft[N_MIDI];
49 #include "../common/SoundFunctions.h" 50 #include "../common/TempoFunctions.h" 51 #include "../common/NetFunctions.h" 60 int main (
int argc ,
char *argv[])
65 GridNBASES, GridNFFTd2,
67 GridDTWSize, halfWarp=sizeWarp/2;
101 prob_silen = (MyType)0.75,
102 prob_audio = (MyType)0.25;
121 *states_time_i = NULL,
122 *states_time_e = NULL,
128 *frame=NULL, *tmpframe=NULL;
135 snd_pcm_t *SoundHandle=NULL;
136 snd_pcm_uframes_t SoundBufferSize;
141 lo_address DirOSC[MaxOSC];
148 TEMPORL.NextFrame=0; TEMPORL.PrevState=0; TEMPORL.SynthSpeed=1.0; TEMPORL.SoloSpeed=1.0; TEMPORL.numap=1;
149 TEMPORL.SynthTime=0.0; TEMPORL.matched=1; TEMPORL.AudioTimeAP[0]=0; TEMPORL.ScoreTimeAP[0]=0;
152 bool UseOSC=
false, UseMic=
false;
158 int i, j, NumTramas=0;
163 MyType *blockdat=NULL;
172 BETA=strtof(argv[1], NULL);
174 BETA=strtod(argv[1], NULL);
176 TPBlock=atoi(argv[3]);
186 BETA=strtof(argv[1], NULL);
188 BETA=strtod(argv[1], NULL);
190 TPBlock=atoi(argv[3]);
191 UseOSC =atoi(argv[4]);
192 UseMic =atoi(argv[5]);
195 printf(
"General usage: %s <BETA> <configuration file> <threadsPerBlock>\n", argv[0]);
196 printf(
" Example: %s 1.5 parametes.dat 64\n\n", argv[0]);
197 printf(
"Docker usage: %s <BETA> <configuration file> <threadsPerBlock> <OSC yes|no [1|0]> <alsa yes|no [1|0]>\n", argv[0]);
198 printf(
" Example: %s 1.5 parametes.dat 64 1 1\n\n", argv[0]);
206 CUDAERR(cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte));
209 CUBLASERR(cublasCreate(&handle));
210 CUDAERR(cudaEventCreate(&start));
211 CUDAERR(cudaEventCreate(&stop));
217 CHECKERR(
AllocDataGPU(&v_hanning, &states_time_i, &states_time_e, &states_seq, &states_corr, &I_SxD, &DTWSize, TAMTRAMA, Param.
N_STATES, NameFiles));
220 DTWSizePlusPad =(DTWSize + N_COSTS) * (N_COSTS + 1);
221 CHECKERR(
AllocDTWGPU(&pD, &v_SxD, &sdata, maxGrid, DTWSize, DTWSizePlusPad));
227 CHECKERR(
AllocFFTGPU(&plan, &X_fft, &Out_fft, &Mod_fft, kmin_fft, kmax_fft, N_FFT, NameFiles));
230 CHECKERR(
AllocAuxiGPU(&norms, &frame, &tmpframe, &v_cfreq, &v_dxState, Param.
N_BASES, TAMTRAMA, N_MIDI));
242 CUDAERR(cudaMemcpyToSymbol(Ckmax_fft, kmax_fft,
sizeof(kmax_fft)));
243 CUDAERR(cudaMemcpyToSymbol(Ckmin_fft, kmin_fft,
sizeof(kmin_fft)));
244 CUDAERR(cudaMemcpyToSymbol(CCosts, Costs,
sizeof(Costs)));
247 CHECKNULL(preload=(
int *)calloc(DTWSize,
sizeof(
int)));
249 for (i=0; i<DTWSize; i++)
251 if (i > states_time_e[j]) j++;
257 GridNBASES = (Param.
N_BASES + TPBlock - 1) / TPBlock;
258 GridNFFT = (N_FFT + TPBlock - 1) / TPBlock;
259 GridNFFTd2 = (N_FFTdiv2 + TPBlock) / TPBlock;
260 GridDTWSize = (DTWSize + TPBlock - 1) / TPBlock;
261 GridNMID32 = (N_MIDI + sizeWarp- 1) / sizeWarp;
263 dim3 Grid2D((Param.
N_BASES + halfWarp - 1)/halfWarp, 1);
264 dim3 TPBl2D(sizeWarp, halfWarp);
269 for (i=0; i<Param.
NCliOSC; i++)
272 CHECKERR(lo_address_errno(DirOSC[i]=lo_address_new(Param.
HostIP[i], Param.
HostPort[i])));
283 SoundBufferSize=SetMicParams(&SoundHandle, Param);
284 if (SoundBufferSize <=0) CHECKERR(ErrAlsaHw);
287 CHECKNULL(fp=fopen(
"FramesMicRecorded.pcm",
"w"));
289 NumTramas = (Param.
Time_MIC * AlsaRate) / TAMMUESTRA;
291 CHECKNULL(fp=fopen(NameFiles.
file_frame,
"rb"));
294 NumTramas = (WavHeader.
num_samples - TAMTRAMA) / TAMMUESTRA;
297 CHECKNULL(fp=fopen(NameFiles.
file_frame,
"rb"));
300 NumTramas = (WavHeader.
num_samples - TAMTRAMA) / TAMMUESTRA;
308 kernel_InitDTW<<<(DTWSizePlusPad+TPBlock-1)/TPBlock, TPBlock>>>(pD, N_COSTS, DTWSizePlusPad);
311 if (BETA>=0.0 && BETA<=0.0)
312 kernel_CompNorB0<<<GridNBASES, TPBlock>>>(norms, (MyType)N_MIDI, Param.
N_BASES);
313 else if (BETA>=1.0 && BETA<=1.0)
314 kernel_CompNorB1<<<Grid2D, TPBl2D>>>(norms, s_fk, N_MIDI, Param.
N_BASES);
316 kernel_CompNorBG<<<Grid2D, TPBl2D>>>(norms, ts_fk, s_fk, N_MIDI, BETA, Param.
N_BASES);
319 int blockSize, numThreads, sharedSize;
321 CUDAERR(cudaMallocManaged((
void **)&blockdat,
sizeof(MyType)*blockSize,cudaMemAttachGlobal));
322 CUDAERR(cudaMallocManaged((
void **)&blockpos,
sizeof(
int) *blockSize,cudaMemAttachGlobal));
328 CHECKERR(ReadAlsaGPU1st(frame, tmpframe, SoundHandle, fp));
336 printf(
"Listening ...\n");
344 CHECKERR(ReadAlsaGPU(frame, tmpframe, SoundHandle, fp));
352 kernel_ApplyWindow<<<GridNFFT, TPBlock>>>(X_fft, frame, v_hanning, TAMTRAMA, N_FFT);
354 CHECKERR(
FFTGPU(X_fft, Out_fft, &plan));
355 kernel_Modul<<<GridNFFTd2, TPBlock>>>(Mod_fft, Out_fft, N_FFTdiv2);
356 kernel_Cfreq<<<N_MIDI, sizeWarp>>>(v_cfreq, Mod_fft);
358 if (BETA>=0.0 && BETA<=0.0) {
360 kernel_Reduction<<<1, sizeWarp>>>(v_cfreq, N_MIDI);
361 kernel_BetaNorm<<<1, N_MIDI>>>(v_cfreq, N_MIDI);
362 kernel_CompDisB0<<<Grid2D, TPBl2D>>>(v_dxState, v_cfreq, norms, s_fk, N_MIDI, Param.
N_BASES);
364 else if (BETA>=1.0 && BETA<=1.0) {
365 kernel_Reduction<<<1, sizeWarp>>>(v_cfreq, N_MIDI);
366 kernel_BetaNorm <<<1, N_MIDI>>>(v_cfreq, N_MIDI);
367 kernel_Reduction<<<1, sizeWarp>>>(v_cfreq, N_MIDI);
368 kernel_CompDisB1<<<Grid2D, TPBl2D>>>(v_dxState, v_cfreq, norms, s_fk, N_MIDI, Param.
N_BASES);
371 kernel_ReductionPowBeta<<<1, sizeWarp>>>(v_cfreq, BETA, N_MIDI);
372 kernel_BetaNorm <<<1, N_MIDI>>>(v_cfreq, N_MIDI);
373 kernel_PowToReal<<<GridNMID32, sizeWarp>>>(tauxi, v_cfreq, BETA, N_MIDI);
374 kernel_CompDisBG<<<Grid2D, TPBl2D>>>(v_dxState, v_cfreq, norms, s_fk, ts_fk, tauxi, BETA, N_MIDI, Param.
N_BASES);
376 cudaDeviceSynchronize();
377 Silence =
DetectSilence((v_dxState[1]-v_dxState[0]), &prob_silen, &prob_audio);
382 if (UseOSC)
for (i=0; i<Param.
NCliOSC; i++) { CHECKERR(SendPlay(DirOSC[i])); CHECKERR(SendTempo(DirOSC[i], 110)); }
387 CUDAERR(cudaDeviceSynchronize());
388 CUDAERR(cudaEventRecord(start, 0));
391 for(i=1; i<=NumTramas; i++)
393 kernel_ApplyWindow<<<GridNFFT, TPBlock>>>(X_fft, frame, v_hanning, TAMTRAMA, N_FFT);
395 CHECKERR(
FFTGPU(X_fft, Out_fft, &plan));
396 kernel_Modul<<<GridNFFTd2, TPBlock>>>(Mod_fft, Out_fft, N_FFTdiv2);
397 kernel_Cfreq<<<N_MIDI, sizeWarp>>>(v_cfreq, Mod_fft);
399 if (BETA>=0.0 && BETA<=0.0) {
400 kernel_CompDisB0<<<Grid2D, TPBl2D>>>(v_dxState, v_cfreq, norms, s_fk, N_MIDI, Param.
N_BASES);
402 else if (BETA>=1.0 && BETA<=1.0) {
403 kernel_Reduction<<<1, sizeWarp>>>(v_cfreq, N_MIDI);
404 kernel_CompDisB1<<<Grid2D, TPBl2D>>>(v_dxState, v_cfreq, norms, s_fk, N_MIDI, Param.
N_BASES);
407 kernel_PowToReal<<<GridNMID32, sizeWarp>>>(tauxi, v_cfreq, BETA, N_MIDI);
408 kernel_CompDisBG<<<Grid2D, TPBl2D>>>(v_dxState, v_cfreq, norms, s_fk, ts_fk, tauxi, BETA, N_MIDI, Param.
N_BASES);
411 InitSxD(sdata, v_SxD, v_dxState, I_SxD, maxGrid, DTWSize);
412 kernel_UpdateSxD<<<GridDTWSize, TPBlock>>>(v_SxD, Param.
ALPHA, sdata, DTWSize);
414 DTWWhere=(i % TBLOCK) * (N_COSTS + DTWSize) + N_COSTS;
415 kernel_DTW<<<GridDTWSize, TPBlock>>>(v_SxD, pD, i, DTWWhere, DTWSize);
417 CUBLASERR(cublasIsamin(handle, DTWSize, &pD[DTWWhere], 1, &pos_min));
419 CUBLASERR(cublasIdamin(handle, DTWSize, &pD[DTWWhere], 1, &pos_min));
429 numThreads=
OneImin(blockdat, blockpos, &pD[DTWWhere], maxGrid, DTWSize);
430 blockSize =
FirstImin(blockdat, blockpos, &pD[DTWWhere], maxGrid, DTWSize);
431 sharedSize=
LastImin(blockdat, blockpos, &pD[DTWWhere], maxGrid, DTWSize);
433 printf(
"Frame %d, pos cuBlas %d, pos FirstMin %d, pos LastMin %d, pos OneMin %d\n",
434 i, pos_min, blockSize, sharedSize, numThreads);
441 CHECKERR(ComputeTempoOSCRL(&TEMPORL, i, pos_min, preload[pos_min], states_corr, DirOSC, Param.
NCliOSC));
443 ComputeTempoRL(&TEMPORL, i, pos_min, preload[pos_min], states_corr);
445 ComputeTempoRL(&TEMPORL, i, pos_min, preload[pos_min], states_corr);
456 CHECKERR(ReadAlsaGPU(frame, tmpframe, SoundHandle, fp));
463 CUDAERR(cudaEventRecord(stop, 0));
464 CUDAERR(cudaEventSynchronize(stop));
465 CUDAERR(cudaEventElapsedTime(&time, start, stop));
467 printf(
"%f msec.\n", time);
472 for (i=0; i<Param.
NCliOSC; i++) { CHECKERR(SendTempo(DirOSC[i], 100)); CHECKERR(SendPlay(DirOSC[i])); }
483 CHECKERR(snd_pcm_close(SoundHandle));
484 }
else { fclose(fp); }
496 cudaEventDestroy(start);
497 cudaEventDestroy(stop);
499 CUBLASERR(cublasDestroy(handle));
501 CUFFTERR(cufftDestroy(plan));
503 if (!(BETA>=(MyType)0.0 && BETA<=(MyType)0.0) && !(BETA>=(MyType)1.0 && BETA<=(MyType)1.0)) {
504 CUDAERR(cudaFree(tauxi));
505 CUDAERR(cudaFree(ts_fk));
507 CUDAERR(cudaFree(I_SxD));
508 CUDAERR(cudaFree(Mod_fft));
509 CUDAERR(cudaFree(norms));
510 CUDAERR(cudaFree(Out_fft));
511 CUDAERR(cudaFree(pD));
512 CUDAERR(cudaFree(sdata));
513 CUDAERR(cudaFree(v_cfreq));
514 CUDAERR(cudaFree(v_dxState));
515 CUDAERR(cudaFree(v_hanning));
516 CUDAERR(cudaFree(s_fk));
517 CUDAERR(cudaFree(frame));
518 CUDAERR(cudaFreeHost(tmpframe));
519 CUDAERR(cudaFree(v_SxD));
520 CUDAERR(cudaFree(X_fft));
Struct for store the name of input/verificaton files. Each composition needs a file with values for ...
Struct for Compute tempos.
int ReadWavGPU1st(short *GPUframe, short *CPUframe, FILE *fp)
ReadWavGPU1st reads first audio (frame) from WAV file when NVIDIA GPU is used.
int Read_WAVHeader(WAVHeader *, FILE *)
Read_WAVHeader reads header of a WAVE file, checks its compability and fill Header struct...
int FirstImin(MyType *odata, int *opos, MyType *idata, const int maxGrid, const int size)
This function launches cuda kernels to find the first minimun and its position.
int OneImin(MyType *odata, int *opos, MyType *idata, const int maxGrid, const int size)
This function launches cuda kernels to find one minimun and its position.
void FreeFiles(DTWfiles *NameFiles)
FreeFiles frees the reserved memory of a struct.
int FFTGPU(MyType *X_fft, MyType *Out_fft, MyFFTGPUType *plan)
FFTGPU computes FFT.
Header file for using ReMAS with Nvidia GPUs.
int AllocS_fkGPU(MyType **s_fk, MyType **tauxi, MyType **ts_fk, const MyType BETA, const int nmidi, const int nbases, DTWfiles NameFiles)
AllocS_fkGPU Allocates memory for S_fk vector, read its data from file and initializes other auxiliar...
int AllocFFTGPU(MyFFTGPUType *plan, MyType **X_fft, MyType **Out_fft, MyType **Mod_fft, int *kmin_fft, int *kmax_fft, const int nfft, DTWfiles NameFiles)
AllocFFTGPU Allocates "Unified" GPU memory for FFT vector and reads some fft information from files...
int AllocAuxiGPU(MyType **norms, short **GPUframe, short **CPUframe, MyType **v_cfreq, MyType **v_dxState, const int nbases, const int tamframe, const int nmidi)
AllocAuxiGPU memory reservation for norms, frame, v_cfreq and v_dxState vectors.
int ReadWavGPU(short *GPUframe, short *CPUframe, FILE *fp)
ReadFileGPU reads current audio (frame) from WAV file when NVIDIA GPU is used.
int AllocDataGPU(MyType **v_hanning, int **states_time_i, int **states_time_e, int **states_seq, int **states_corr, int **I_SxD, int *DTWSize, const int tamtrama, const int nstates, DTWfiles NameFiles)
AllocDataGPU Allocates memory and initializes some structures reading info from files.
void ComputeTempoRL(STempoRL *, int, int, int, int *)
ComputeTempoRL calculates tempo and controls synthesizer speed using linear regression.
int main(int argc, char *argv[])
main is the entry point to ReMAS, classical C main program.
int HaveCompatibleGPU(int &maxGrid)
HaveCompatibleGPU checks if the system has an appropiate GPU for ReMAS.
bool DetectSilence(MyType, MyType *, MyType *)
DetectSilence checks whether audio (frame) is silence or audio.
void InitSxD(MyType *odata, MyType *v_SxD, const MyType *__restrict__ v_dxState, const int *__restrict__ I_SxD, const int maxGrid, const int size)
InitSxD launches the cuda kernel that sets up the vector SxD when "Unified" GPU memory is used...
int ReadParameters(DTWconst *Param, DTWfiles *NameFiles, const char *filename)
ReadParameters reads ReMAS global parameters from file.
int LastImin(MyType *odata, int *opos, MyType *idata, const int maxGrid, const int size)
This function launches cuda kernels to find the last minimun and its position.
int AllocDTWGPU(MyType **pV, MyType **v_SxD, MyType **sdata, const int maxGrid, const int DTWSize, const int DTWSizePlusPad)
AllocDTWGPU Allocates memory for DTW vectors and auxiliar structures.
Struct for store global information of the problem. Each composition needs a file with values for th...
File with ReMAS kernels for Nvidia GPUs.
__global__ void kernel_InitDTW(MyType *__restrict__ pV, const int pos, const int size)
kernel_InitDTW This cuda kernel initializes DTW vector
void BlocksAndThreads(int *blocks, int *threads, int *sharedsize, const int maxGrid, const int size)
BlocksAndThreads calculates the suitable number of blocks and threads, and the needed shared memory...