NMFPack/utils__x86_8c_source.html

 /***************************************************************************
  *   Copyright (C) 2014 by PIR (University of Oviedo) and                  *
  *   INCO2 (Polytechnic University of Valencia) groups.                    *
  *   nnmfpack@gmail.com                                                    *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   the Free Software Foundation; either version 2 of the License, or     *
  *   (at your option) any later version.                                   *
  *                                                                         *
  *   This program is distributed in the hope that it will be useful,       *
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  *   GNU General Public License for more details.                          *
  *                                                                         *
  *   You should have received a copy of the GNU General Public License     *
  *   along with this program; if not, write to the                         *
  *   Free Software Foundation, Inc.,                                       *
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************
 */
 #include "utils_x86.h"


 void dmemset_x86(const int n, double *__restrict__ x, const double val)
 {
   int i;

   #ifdef With_ICC
     #pragma loop_count min=1024
     #pragma simd
   #else
     #ifdef With_OMP
       #pragma omp parallel for
     #endif
   #endif
   for (i=0; i<n; i++)
     x[i]=val;
 }


 void smemset_x86(const int n, float *__restrict__ x, const float val)
 {
   int i;

   #ifdef With_ICC
     #pragma loop_count min=1024
     #pragma simd
   #else
     #ifdef With_OMP
       #pragma omp parallel for
     #endif
   #endif
   for (i=0; i<n; i++)
     x[i]=val;
 }


 void ddiv_x86(const int n, const double *x,  double *__restrict__ y)
 {
   #ifdef With_MKL
     vdDiv(n, x, y, y);
   #else
     int i;
     #ifdef With_ICC
       #pragma loop_count min=32
       #pragma simd
     #else
     #ifdef With_OMP
       #pragma omp parallel for
     #endif
     #endif
     for (i=0; i<n; i++)
     {
       #ifdef With_Check
         /* Here we can have NaN and Inf if y(i) and/or x(i)=0 */
         y[i]=x[i] / y[i];
         assert(isfinite(y[i]));
       #else
         y[i]=x[i] / y[i];
       #endif
     }
   #endif
 }

 void sdiv_x86(const int n, const float *x , float *__restrict__ y)
 {
   #ifdef With_MKL
     vsDiv(n, x, y, y);
   #else
     int i;
     #ifdef With_ICC
       #pragma loop_count min=32
       #pragma simd
     #else
     #ifdef With_OMP
       #pragma omp parallel for
     #endif
     #endif
     for (i=0; i<n; i++)
     {
       #ifdef With_Check
         /* Here we can have NaN and Inf if y(i) and/or x(i)=0 */
         y[i]=x[i] / y[i];
         assert(isfinite(y[i]));
       #else
         y[i]=x[i] / y[i];
       #endif
     }
   #endif
 }


 void dsub_x86(const int n, const double *x, double *__restrict__ y)
 {
   #ifdef With_MKL
     vdSub(n, x, y, y);
   #else
     int i;
     #ifdef With_ICC
       #pragma loop_count min=512
       #pragma simd
     #else
     #ifdef With_OMP
       #pragma omp parallel for
     #endif
     #endif
     for (i=0; i<n; i++)
       /* ask for x[i] or y[i] = 0.0 don't give improvements. We don't do it */
       y[i] = x[i] - y[i];
   #endif
 }


 void ssub_x86(const int n, const float *x, float *__restrict__ y)
 {
   #ifdef With_MKL
     vsSub(n, x, y, y);
   #else
     int i;
     #ifdef With_ICC
       #pragma loop_count min=512
       #pragma simd
     #else
     #ifdef With_OMP
       #pragma omp parallel for
     #endif
     #endif
     for (i=0; i<n; i++)
       /* ask for x[i] or y[i] = 0.0 don't give improvements. We don't do it */
       y[i] = x[i] - y[i];
   #endif
 }


 void dlarngenn_x86(const int m, const int n, const int seed, double *X)
 {
   int
     iseed[4],
     distribucion_tipo=1,
     distribucion_elem=0;

   distribucion_elem=seed % 4095;

   if ((distribucion_elem % 2) == 0) distribucion_elem++;

   iseed[0]=iseed[1]=iseed[2]=iseed[3]=distribucion_elem;
   distribucion_elem = m*n;

   #ifdef With_MKL
     dlarnv(&distribucion_tipo, iseed, &distribucion_elem, X);
   #else
     dlarnv_(&distribucion_tipo, iseed, &distribucion_elem, X);
   #endif
 }


 void slarngenn_x86(const int m, const int n, const int seed, float *X)
 {
   int
     iseed[4],
     distribucion_tipo=1,
     distribucion_elem=0;

   distribucion_elem=seed % 4095;

   if ((distribucion_elem % 2) == 0) distribucion_elem++;

   iseed[0]=iseed[1]=iseed[2]=iseed[3]=distribucion_elem;
   distribucion_elem = m*n;

   #ifdef With_MKL
     slarnv(&distribucion_tipo, iseed, &distribucion_elem, X);
   #else
     slarnv_(&distribucion_tipo, iseed, &distribucion_elem, X);
   #endif
 }


 double derror_x86(const int m, const int n, const int k, const double *A, const double *W, const double *H)
 {
   double
     error=0.0,
     *tmp =NULL;

   #ifdef With_MKL
     tmp = (double *)mkl_malloc(m*n*sizeof(double), WRDLEN);
   #else
     #ifdef With_ARM
       tmp = (double *)malloc(m*n*sizeof(double));
     #else
       tmp = (double *)_mm_malloc(m*n*sizeof(double), WRDLEN);
     #endif
   #endif

   cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W, m, H, k, 0.0, tmp, m);

   dsub_x86(m*n, A, tmp);

   error=cblas_dnrm2(m*n, tmp, 1);

   #ifdef With_MKL
     mkl_free(tmp);
   #else
     #ifdef With_ARM
       free(tmp);
     #else
       _mm_free(tmp);
     #endif
   #endif

   return (error / sqrt(m*n));
 }


 float serror_x86(const int m, const int n, const int k, const float *A, const float *W, const float *H)
 {
   float
     error=0.0,
     *tmp =NULL;

   #ifdef With_MKL
     tmp = (float *)mkl_malloc(m*n*sizeof(float), WRDLEN);
   #else
     #ifdef With_ARM
       tmp = (float *)malloc(m*n*sizeof(float));
     #else
       tmp = (float *)_mm_malloc(m*n*sizeof(float), WRDLEN);
     #endif
   #endif

   cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W, m, H, k, 0.0, tmp, m);

   ssub_x86(m*n, A, tmp);

   error=cblas_snrm2(m*n, tmp, 1);

   #ifdef With_MKL
     mkl_free(tmp);
   #else
     #ifdef With_ARM
       free(tmp);
     #else
       _mm_free(tmp);
     #endif
   #endif

   return (error / sqrtf((float)m*n));
 }


dlarnv_
void dlarnv_(int *, int *, int *, double *)

serror_x86
float serror_x86(const int m, const int n, const int k, const float *A, const float *W, const float *H)
serror_x86 returns simple precision "2norm(A - WH) / sqrt(m x n)"
Definition: utils_x86.c:329

dlarngenn_x86
void dlarngenn_x86(const int m, const int n, const int seed, double *X)
dlarngenn_x86 returns an (m x n) random double precision matrix. An uniform (0, 1) distribution is us...
Definition: utils_x86.c:220

dsub_x86
void dsub_x86(const int n, const double *x, double *__restrict__ y)
This function performs double precision element-wise substraction y[i]=x[i]-y[i]. ...
Definition: utils_x86.c:162

ssub_x86
void ssub_x86(const int n, const float *x, float *__restrict__ y)
This function performs simple precision element-wise substraction y[i]=x[i]-y[i]. ...
Definition: utils_x86.c:190

utils_x86.h
Header file for using utility modules from CPU/MIC source codes.

slarnv_
void slarnv_(int *, int *, int *, float *)

ddiv_x86
void ddiv_x86(const int n, const double *x, double *__restrict__ y)
This function calls the appropiate funtions to performs double precision element-wise y[i]=x[i]/y[i] ...
Definition: utils_x86.c:91

derror_x86
double derror_x86(const int m, const int n, const int k, const double *A, const double *W, const double *H)
derror_x86 returns double precision "2norm(A - WH) / sqrt(m x n)"
Definition: utils_x86.c:283

sdiv_x86
void sdiv_x86(const int n, const float *x, float *__restrict__ y)
This function calls the appropiate funtions to performs simple precision element-wise x[i]=x[i]/y[i] ...
Definition: utils_x86.c:127

smemset_x86
void smemset_x86(const int n, float *__restrict__ x, const float val)
This function fills all positions of x with val.
Definition: utils_x86.c:66

dmemset_x86
void dmemset_x86(const int n, double *__restrict__ x, const double val)
This function fills all positions of x with val.
Definition: utils_x86.c:42

slarngenn_x86
void slarngenn_x86(const int m, const int n, const int seed, float *X)
slarngenn_x86 returns an (m x n) random simple precision matrix. An uniform (0, 1) distribution is us...
Definition: utils_x86.c:251