74 int dmlsa_cpu(
const int m,
const int n,
const int k,
const double *A,
double *W,
double *H,
const int uType,
const int nIter)
87 B = (
double *)mkl_malloc(max(m,n) * k *
sizeof(double), WRDLEN);
88 C = (
double *)mkl_malloc(max(m,n) * k *
sizeof(double), WRDLEN);
89 L = (
double *)mkl_malloc( k * k *
sizeof(
double), WRDLEN);
92 B = (
double *)malloc(max(m,n) * k *
sizeof(double));
93 C = (
double *)malloc(max(m,n) * k *
sizeof(double));
94 L = (
double *)malloc( k * k *
sizeof(
double));
96 B = (
double *)_mm_malloc(max(m,n) * k *
sizeof(double), WRDLEN);
97 C = (
double *)_mm_malloc(max(m,n) * k *
sizeof(double), WRDLEN);
98 L = (
double *)_mm_malloc( k * k *
sizeof(
double), WRDLEN);
102 if (B == NULL || C == NULL || L == NULL)
109 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, k, n, m, 1.0, W, m, A, m, 0.0, B, k);
112 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, k, k, m, 1.0, W, m, W, m, 0.0, L, k);
115 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, k, n, k, 1.0, L, k, H, k, 0.0, C, k);
123 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, k, n, 1.0, A, m, H, k, 0.0, B, m);
126 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, k, k, n, 1.0, H, k, H, k, 0.0, L, k);
129 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, k, k, 1.0, W, m, L, k, 0.0, C, m);
153 B = (
double *)mkl_malloc(m * k *
sizeof(
double), WRDLEN);
154 C = (
double *)mkl_malloc(m * k *
sizeof(
double), WRDLEN);
155 L = (
double *)mkl_malloc(k * k *
sizeof(
double), WRDLEN);
158 B = (
double *)malloc(m * k *
sizeof(
double));
159 C = (
double *)malloc(m * k *
sizeof(
double));
160 L = (
double *)malloc(k * k *
sizeof(
double));
162 B = (
double *)_mm_malloc(m * k *
sizeof(
double), WRDLEN);
163 C = (
double *)_mm_malloc(m * k *
sizeof(
double), WRDLEN);
164 L = (
double *)_mm_malloc(k * k *
sizeof(
double), WRDLEN);
168 if (B == NULL || C == NULL || L == NULL)
175 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, k, n, 1.0, A, m, H, k, 0.0, B, m);
178 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, k, k, n, 1.0, H, k, H, k, 0.0, L, k);
181 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, k, k, 1.0, W, m, L, k, 0.0, C, m);
205 B = (
double *)mkl_malloc(n * k *
sizeof(
double), WRDLEN);
206 C = (
double *)mkl_malloc(n * k *
sizeof(
double), WRDLEN);
207 L = (
double *)mkl_malloc(k * k *
sizeof(
double), WRDLEN);
210 B = (
double *)malloc(n * k *
sizeof(
double));
211 C = (
double *)malloc(n * k *
sizeof(
double));
212 L = (
double *)malloc(k * k *
sizeof(
double));
214 B = (
double *)_mm_malloc(n * k *
sizeof(
double), WRDLEN);
215 C = (
double *)_mm_malloc(n * k *
sizeof(
double), WRDLEN);
216 L = (
double *)_mm_malloc(k * k *
sizeof(
double), WRDLEN);
220 if (B == NULL || C == NULL || L == NULL)
227 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, k, n, m, 1.0, W, m, A, m, 0.0, B, k);
230 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, k, k, m, 1.0, W, m, W, m, 0.0, L, k);
233 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, k, n, k, 1.0, L, k, H, k, 0.0, C, k);
275 int smlsa_cpu(
const int m,
const int n,
const int k,
const float *A,
float *W,
float *H,
const int uType,
const int nIter)
288 B = (
float *)mkl_malloc(max(m,n) * k *
sizeof(float), WRDLEN);
289 C = (
float *)mkl_malloc(max(m,n) * k *
sizeof(float), WRDLEN);
290 L = (
float *)mkl_malloc( m * n *
sizeof(
float), WRDLEN);
293 B = (
float *)malloc(max(m,n) * k *
sizeof(float));
294 C = (
float *)malloc(max(m,n) * k *
sizeof(float));
295 L = (
float *)malloc( m * n *
sizeof(
float));
297 B = (
float *)_mm_malloc(max(m,n) * k *
sizeof(float), WRDLEN);
298 C = (
float *)_mm_malloc(max(m,n) * k *
sizeof(float), WRDLEN);
299 L = (
float *)_mm_malloc( m * n *
sizeof(
float), WRDLEN);
303 if (B == NULL || C == NULL || L == NULL)
310 cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, k, n, m, 1.0, W, m, A, m, 0.0, B, k);
313 cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, k, k, m, 1.0, W, m, W, m, 0.0, L, k);
316 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, k, n, k, 1.0, L, k, H, k, 0.0, C, k);
323 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, k, n, 1.0, A, m, H, k, 0.0, B, m);
326 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, k, k, n, 1.0, H, k, H, k, 0.0, L, k);
329 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, k, k, 1.0, W, m, L, k, 0.0, C, m);
353 B = (
float *)mkl_malloc(m * k *
sizeof(
float), WRDLEN);
354 C = (
float *)mkl_malloc(m * k *
sizeof(
float), WRDLEN);
355 L = (
float *)mkl_malloc(m * n *
sizeof(
float), WRDLEN);
358 B = (
float *)malloc(m * k *
sizeof(
float));
359 C = (
float *)malloc(m * k *
sizeof(
float));
360 L = (
float *)malloc(m * n *
sizeof(
float));
362 B = (
float *)_mm_malloc(m * k *
sizeof(
float), WRDLEN);
363 C = (
float *)_mm_malloc(m * k *
sizeof(
float), WRDLEN);
364 L = (
float *)_mm_malloc(m * n *
sizeof(
float), WRDLEN);
368 if (B == NULL || C == NULL || L == NULL)
375 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, k, n, 1.0, A, m, H, k, 0.0, B, m);
378 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, k, k, n, 1.0, H, k, H, k, 0.0, L, k);
381 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, k, k, 1.0, W, m, L, k, 0.0, C, m);
405 B = (
float *)mkl_malloc(n * k *
sizeof(
float), WRDLEN);
406 C = (
float *)mkl_malloc(n * k *
sizeof(
float), WRDLEN);
407 L = (
float *)mkl_malloc(m * n *
sizeof(
float), WRDLEN);
410 B = (
float *)malloc(n * k *
sizeof(
float));
411 C = (
float *)malloc(n * k *
sizeof(
float));
412 L = (
float *)malloc(m * n *
sizeof(
float));
414 B = (
float *)_mm_malloc(n * k *
sizeof(
float), WRDLEN);
415 C = (
float *)_mm_malloc(n * k *
sizeof(
float), WRDLEN);
416 L = (
float *)_mm_malloc(m * n *
sizeof(
float), WRDLEN);
420 if (B == NULL || C == NULL || L == NULL)
427 cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, k, n, m, 1.0, W, m, A, m, 0.0, B, k);
430 cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, k, k, m, 1.0, W, m, W, m, 0.0, L, k);
433 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, k, n, k, 1.0, L, k, H, k, 0.0, C, k);
471 void ddotdiv_x86(
const int n,
const double *x,
const double *y,
double *__restrict__ z)
479 #pragma loop_count min=32 483 #pragma omp parallel for 490 z[i]=z[i] * x[i] / y[i];
491 assert(isfinite(z[i]));
493 z[i]=z[i] * x[i] / y[i];
509 void sdotdiv_x86(
const int n,
const float *x,
const float *y,
float *__restrict__ z)
517 #pragma loop_count min=32 521 #pragma omp parallel for 528 z[i]=z[i] * x[i] / y[i];
529 assert(isfinite(z[i]));
531 z[i]=z[i] * x[i] / y[i];
int dmlsa_cpu(const int m, const int n, const int k, const double *A, double *W, double *H, const int uType, const int nIter)
dmlsa_cpu performs NNMF using betadivergence when beta=2 using double precision
void ddotdiv_x86(const int n, const double *x, const double *y, double *__restrict__ z)
This function calls the appropiate funtions to performs double precision element-wise z[i]=z[i]*x[i]/...
File with functions to calcule NNMF using the mlsa algorithm for CPUs.
int smlsa_cpu(const int m, const int n, const int k, const float *A, float *W, float *H, const int uType, const int nIter)
smlsa_cpu performs NNMF using betadivergence when beta=2 using simple precision
void sdotdiv_x86(const int n, const float *x, const float *y, float *__restrict__ z)
This function calls the appropiate funtions to performs simple precision element-wise z[i]=z[i]*x[i]/...