NnmfPack  2.1
utils_x86.c
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2014 by PIR (University of Oviedo) and *
3  * INCO2 (Polytechnic University of Valencia) groups. *
4  * nnmfpack@gmail.com *
5  * *
6  * This program is free software; you can redistribute it and/or modify *
7  * it under the terms of the GNU General Public License as published by *
8  * the Free Software Foundation; either version 2 of the License, or *
9  * (at your option) any later version. *
10  * *
11  * This program is distributed in the hope that it will be useful, *
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14  * GNU General Public License for more details. *
15  * *
16  * You should have received a copy of the GNU General Public License *
17  * along with this program; if not, write to the *
18  * Free Software Foundation, Inc., *
19  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
20  ***************************************************************************
21 */
32 #include "utils_x86.h"
33 
34 
42 void dmemset_x86(const int n, double *__restrict__ x, const double val)
43 {
44  int i;
45 
46  #ifdef With_ICC
47  #pragma loop_count min=1024
48  #pragma simd
49  #else
50  #ifdef With_OMP
51  #pragma omp parallel for
52  #endif
53  #endif
54  for (i=0; i<n; i++)
55  x[i]=val;
56 }
57 
58 
66 void smemset_x86(const int n, float *__restrict__ x, const float val)
67 {
68  int i;
69 
70  #ifdef With_ICC
71  #pragma loop_count min=1024
72  #pragma simd
73  #else
74  #ifdef With_OMP
75  #pragma omp parallel for
76  #endif
77  #endif
78  for (i=0; i<n; i++)
79  x[i]=val;
80 }
81 
82 
91 void ddiv_x86(const int n, const double *x, double *__restrict__ y)
92 {
93  #ifdef With_MKL
94  vdDiv(n, x, y, y);
95  #else
96  int i;
97  #ifdef With_ICC
98  #pragma loop_count min=32
99  #pragma simd
100  #else
101  #ifdef With_OMP
102  #pragma omp parallel for
103  #endif
104  #endif
105  for (i=0; i<n; i++)
106  {
107  #ifdef With_Check
108  /* Here we can have NaN and Inf if y(i) and/or x(i)=0 */
109  y[i]=x[i] / y[i];
110  assert(isfinite(y[i]));
111  #else
112  y[i]=x[i] / y[i];
113  #endif
114  }
115  #endif
116 }
117 
127 void sdiv_x86(const int n, const float *x , float *__restrict__ y)
128 {
129  #ifdef With_MKL
130  vsDiv(n, x, y, y);
131  #else
132  int i;
133  #ifdef With_ICC
134  #pragma loop_count min=32
135  #pragma simd
136  #else
137  #ifdef With_OMP
138  #pragma omp parallel for
139  #endif
140  #endif
141  for (i=0; i<n; i++)
142  {
143  #ifdef With_Check
144  /* Here we can have NaN and Inf if y(i) and/or x(i)=0 */
145  y[i]=x[i] / y[i];
146  assert(isfinite(y[i]));
147  #else
148  y[i]=x[i] / y[i];
149  #endif
150  }
151  #endif
152 }
153 
154 
162 void dsub_x86(const int n, const double *x, double *__restrict__ y)
163 {
164  #ifdef With_MKL
165  vdSub(n, x, y, y);
166  #else
167  int i;
168  #ifdef With_ICC
169  #pragma loop_count min=512
170  #pragma simd
171  #else
172  #ifdef With_OMP
173  #pragma omp parallel for
174  #endif
175  #endif
176  for (i=0; i<n; i++)
177  /* ask for x[i] or y[i] = 0.0 don't give improvements. We don't do it */
178  y[i] = x[i] - y[i];
179  #endif
180 }
181 
182 
190 void ssub_x86(const int n, const float *x, float *__restrict__ y)
191 {
192  #ifdef With_MKL
193  vsSub(n, x, y, y);
194  #else
195  int i;
196  #ifdef With_ICC
197  #pragma loop_count min=512
198  #pragma simd
199  #else
200  #ifdef With_OMP
201  #pragma omp parallel for
202  #endif
203  #endif
204  for (i=0; i<n; i++)
205  /* ask for x[i] or y[i] = 0.0 don't give improvements. We don't do it */
206  y[i] = x[i] - y[i];
207  #endif
208 }
209 
210 
220 void dlarngenn_x86(const int m, const int n, const int seed, double *X)
221 {
222  int
223  iseed[4],
224  distribucion_tipo=1,
225  distribucion_elem=0;
226 
227  distribucion_elem=seed % 4095;
228 
229  if ((distribucion_elem % 2) == 0) distribucion_elem++;
230 
231  iseed[0]=iseed[1]=iseed[2]=iseed[3]=distribucion_elem;
232  distribucion_elem = m*n;
233 
234  #ifdef With_MKL
235  dlarnv(&distribucion_tipo, iseed, &distribucion_elem, X);
236  #else
237  dlarnv_(&distribucion_tipo, iseed, &distribucion_elem, X);
238  #endif
239 }
240 
241 
251 void slarngenn_x86(const int m, const int n, const int seed, float *X)
252 {
253  int
254  iseed[4],
255  distribucion_tipo=1,
256  distribucion_elem=0;
257 
258  distribucion_elem=seed % 4095;
259 
260  if ((distribucion_elem % 2) == 0) distribucion_elem++;
261 
262  iseed[0]=iseed[1]=iseed[2]=iseed[3]=distribucion_elem;
263  distribucion_elem = m*n;
264 
265  #ifdef With_MKL
266  slarnv(&distribucion_tipo, iseed, &distribucion_elem, X);
267  #else
268  slarnv_(&distribucion_tipo, iseed, &distribucion_elem, X);
269  #endif
270 }
271 
272 
283 double derror_x86(const int m, const int n, const int k, const double *A, const double *W, const double *H)
284 {
285  double
286  error=0.0,
287  *tmp =NULL;
288 
289  #ifdef With_MKL
290  tmp = (double *)mkl_malloc(m*n*sizeof(double), WRDLEN);
291  #else
292  #ifdef With_ARM
293  tmp = (double *)malloc(m*n*sizeof(double));
294  #else
295  tmp = (double *)_mm_malloc(m*n*sizeof(double), WRDLEN);
296  #endif
297  #endif
298 
299  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W, m, H, k, 0.0, tmp, m);
300 
301  dsub_x86(m*n, A, tmp);
302 
303  error=cblas_dnrm2(m*n, tmp, 1);
304 
305  #ifdef With_MKL
306  mkl_free(tmp);
307  #else
308  #ifdef With_ARM
309  free(tmp);
310  #else
311  _mm_free(tmp);
312  #endif
313  #endif
314 
315  return (error / sqrt(m*n));
316 }
317 
318 
329 float serror_x86(const int m, const int n, const int k, const float *A, const float *W, const float *H)
330 {
331  float
332  error=0.0,
333  *tmp =NULL;
334 
335  #ifdef With_MKL
336  tmp = (float *)mkl_malloc(m*n*sizeof(float), WRDLEN);
337  #else
338  #ifdef With_ARM
339  tmp = (float *)malloc(m*n*sizeof(float));
340  #else
341  tmp = (float *)_mm_malloc(m*n*sizeof(float), WRDLEN);
342  #endif
343  #endif
344 
345  cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W, m, H, k, 0.0, tmp, m);
346 
347  ssub_x86(m*n, A, tmp);
348 
349  error=cblas_snrm2(m*n, tmp, 1);
350 
351  #ifdef With_MKL
352  mkl_free(tmp);
353  #else
354  #ifdef With_ARM
355  free(tmp);
356  #else
357  _mm_free(tmp);
358  #endif
359  #endif
360 
361  return (error / sqrtf((float)m*n));
362 }
363 
364 
365 
void dlarnv_(int *, int *, int *, double *)
float serror_x86(const int m, const int n, const int k, const float *A, const float *W, const float *H)
serror_x86 returns simple precision "2norm(A - WH) / sqrt(m x n)"
Definition: utils_x86.c:329
void dlarngenn_x86(const int m, const int n, const int seed, double *X)
dlarngenn_x86 returns an (m x n) random double precision matrix. An uniform (0, 1) distribution is us...
Definition: utils_x86.c:220
void dsub_x86(const int n, const double *x, double *__restrict__ y)
This function performs double precision element-wise substraction y[i]=x[i]-y[i]. ...
Definition: utils_x86.c:162
void ssub_x86(const int n, const float *x, float *__restrict__ y)
This function performs simple precision element-wise substraction y[i]=x[i]-y[i]. ...
Definition: utils_x86.c:190
Header file for using utility modules from CPU/MIC source codes.
void slarnv_(int *, int *, int *, float *)
void ddiv_x86(const int n, const double *x, double *__restrict__ y)
This function calls the appropiate funtions to performs double precision element-wise y[i]=x[i]/y[i] ...
Definition: utils_x86.c:91
double derror_x86(const int m, const int n, const int k, const double *A, const double *W, const double *H)
derror_x86 returns double precision "2norm(A - WH) / sqrt(m x n)"
Definition: utils_x86.c:283
void sdiv_x86(const int n, const float *x, float *__restrict__ y)
This function calls the appropiate funtions to performs simple precision element-wise x[i]=x[i]/y[i] ...
Definition: utils_x86.c:127
void smemset_x86(const int n, float *__restrict__ x, const float val)
This function fills all positions of x with val.
Definition: utils_x86.c:66
void dmemset_x86(const int n, double *__restrict__ x, const double val)
This function fills all positions of x with val.
Definition: utils_x86.c:42
void slarngenn_x86(const int m, const int n, const int seed, float *X)
slarngenn_x86 returns an (m x n) random simple precision matrix. An uniform (0, 1) distribution is us...
Definition: utils_x86.c:251