42 void dmemset_x86(
const int n,
double *__restrict__ x,
const double val)
47 #pragma loop_count min=1024 51 #pragma omp parallel for 66 void smemset_x86(
const int n,
float *__restrict__ x,
const float val)
71 #pragma loop_count min=1024 75 #pragma omp parallel for 91 void ddiv_x86(
const int n,
const double *x,
double *__restrict__ y)
98 #pragma loop_count min=32 102 #pragma omp parallel for 110 assert(isfinite(y[i]));
127 void sdiv_x86(
const int n,
const float *x ,
float *__restrict__ y)
134 #pragma loop_count min=32 138 #pragma omp parallel for 146 assert(isfinite(y[i]));
162 void dsub_x86(
const int n,
const double *x,
double *__restrict__ y)
169 #pragma loop_count min=512 173 #pragma omp parallel for 190 void ssub_x86(
const int n,
const float *x,
float *__restrict__ y)
197 #pragma loop_count min=512 201 #pragma omp parallel for 227 distribucion_elem=seed % 4095;
229 if ((distribucion_elem % 2) == 0) distribucion_elem++;
231 iseed[0]=iseed[1]=iseed[2]=iseed[3]=distribucion_elem;
232 distribucion_elem = m*n;
235 dlarnv(&distribucion_tipo, iseed, &distribucion_elem, X);
237 dlarnv_(&distribucion_tipo, iseed, &distribucion_elem, X);
258 distribucion_elem=seed % 4095;
260 if ((distribucion_elem % 2) == 0) distribucion_elem++;
262 iseed[0]=iseed[1]=iseed[2]=iseed[3]=distribucion_elem;
263 distribucion_elem = m*n;
266 slarnv(&distribucion_tipo, iseed, &distribucion_elem, X);
268 slarnv_(&distribucion_tipo, iseed, &distribucion_elem, X);
283 double derror_x86(
const int m,
const int n,
const int k,
const double *A,
const double *W,
const double *H)
290 tmp = (
double *)mkl_malloc(m*n*
sizeof(
double), WRDLEN);
293 tmp = (
double *)malloc(m*n*
sizeof(
double));
295 tmp = (
double *)_mm_malloc(m*n*
sizeof(
double), WRDLEN);
299 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W, m, H, k, 0.0, tmp, m);
303 error=cblas_dnrm2(m*n, tmp, 1);
315 return (error / sqrt(m*n));
329 float serror_x86(
const int m,
const int n,
const int k,
const float *A,
const float *W,
const float *H)
336 tmp = (
float *)mkl_malloc(m*n*
sizeof(
float), WRDLEN);
339 tmp = (
float *)malloc(m*n*
sizeof(
float));
341 tmp = (
float *)_mm_malloc(m*n*
sizeof(
float), WRDLEN);
345 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W, m, H, k, 0.0, tmp, m);
349 error=cblas_snrm2(m*n, tmp, 1);
361 return (error / sqrtf((
float)m*n));
void dlarnv_(int *, int *, int *, double *)
float serror_x86(const int m, const int n, const int k, const float *A, const float *W, const float *H)
serror_x86 returns simple precision "2norm(A - WH) / sqrt(m x n)"
void dlarngenn_x86(const int m, const int n, const int seed, double *X)
dlarngenn_x86 returns an (m x n) random double precision matrix. An uniform (0, 1) distribution is us...
void dsub_x86(const int n, const double *x, double *__restrict__ y)
This function performs double precision element-wise substraction y[i]=x[i]-y[i]. ...
void ssub_x86(const int n, const float *x, float *__restrict__ y)
This function performs simple precision element-wise substraction y[i]=x[i]-y[i]. ...
Header file for using utility modules from CPU/MIC source codes.
void slarnv_(int *, int *, int *, float *)
void ddiv_x86(const int n, const double *x, double *__restrict__ y)
This function calls the appropiate funtions to performs double precision element-wise y[i]=x[i]/y[i] ...
double derror_x86(const int m, const int n, const int k, const double *A, const double *W, const double *H)
derror_x86 returns double precision "2norm(A - WH) / sqrt(m x n)"
void sdiv_x86(const int n, const float *x, float *__restrict__ y)
This function calls the appropiate funtions to performs simple precision element-wise x[i]=x[i]/y[i] ...
void smemset_x86(const int n, float *__restrict__ x, const float val)
This function fills all positions of x with val.
void dmemset_x86(const int n, double *__restrict__ x, const double val)
This function fills all positions of x with val.
void slarngenn_x86(const int m, const int n, const int seed, float *X)
slarngenn_x86 returns an (m x n) random simple precision matrix. An uniform (0, 1) distribution is us...