void nv_vector_copy(nv_matrix_t *dest, int dm, const nv_matrix_t *src, int sm) { NV_ASSERT(dest->n >= src->n); memmove(&NV_MAT_V(dest, dm, 0), &NV_MAT_V(src, sm, 0), src->step * sizeof(float)); }
void nv_vector_fill(nv_matrix_t *mat, int m, float v) { if (mat->n < 64) { int i; for (i = 0; i < mat->n; ++i) { NV_MAT_V(mat, m, i) = v; } } else { int j, k; float *p = &NV_MAT_V(mat, m, 0); p[0] = v; k = 1; j = 1; while (k * 2 < mat->n) { memmove(&p[j], &p[0], k * sizeof(float)); j += k; k *= 2; } if (k > 1) { k /= 2; } while (j < mat->n) { if (j + k <= mat->n) { memmove(&p[j], &p[0], k * sizeof(float)); j += k; } else { if (k > 1) { k /= 2; } } } } }
void nv_contrast(nv_matrix_t *dest, int dch, const nv_matrix_t *src, int sch, float angle)// 0.0f-90.0f { int i, j; float deg_scale = NV_PI / 180.0f; nv_matrix_t *conv = nv_matrix_alloc(256, 1); NV_ASSERT(dest->m == src->m); NV_ASSERT(dch <= dest->n); NV_ASSERT(sch <= src->n); for (i = 0; i < conv->n; ++i) { NV_MAT_V(conv, 0, i) = tanf(angle * deg_scale) * ((float)i - 127.0f) + 127.0f; if (NV_MAT_V(conv, 0, i) > 255.0f) { NV_MAT_V(conv, 0, i) = 255.0f; } if (NV_MAT_V(conv, 0, i) < 0.0f) { NV_MAT_V(conv, 0, i) = 0.0f; } } for (j = 0; j < src->m; ++j) { NV_ASSERT(NV_MAT_V(src, j, sch) < 256.0f); NV_ASSERT(NV_MAT_V(src, j, sch) > 0.0f); NV_MAT_V(dest, j, dch) = NV_MAT_V(conv, 0, (int)NV_MAT_V(src, j, sch)); } }
static nv_matrix_t * conv_image2vec(const nv_bgseg_t *bg, const nv_matrix_t *image) { nv_matrix_t *vec; nv_matrix_t *smooth; nv_matrix_t *resize = NULL, *gray = NULL; int i; float scale = (float)bg->size / (float)NV_MAX(image->rows, image->cols); if (scale != 1.0f) { resize = nv_matrix3d_alloc(image->n, NV_ROUND_INT(image->rows * scale), NV_ROUND_INT(image->cols * scale)); nv_resize(resize, image); image = resize; } if (image->n != 1) { gray = nv_matrix3d_alloc(1, image->rows, image->cols); nv_gray(gray, image); image = gray; } vec = nv_matrix_alloc(image->rows * image->cols, 1); smooth = nv_matrix_clone(image); nv_gaussian5x5(smooth, 0, image, 0); for (i = 0; i < image->m; ++i) { NV_MAT_V(vec, 0, i) = NV_MAT_V(smooth, i, 0); } nv_matrix_free(&smooth); nv_matrix_free(&gray); nv_matrix_free(&resize); return vec; }
// Mahalanobis distance float nv_mahalanobis(const nv_cov_t *cov, const nv_matrix_t *x, int xm) { int n; nv_matrix_t *y = nv_matrix_alloc(x->n, 1); nv_matrix_t *x2 = nv_matrix_alloc(x->n, 1); float distance; float delta2 = 0.0f; nv_matrix_zero(y); nv_matrix_zero(x2); for (n = 0; n < x2->n; ++n) { NV_MAT_V(x2, 0, n) = NV_MAT_V(x, xm, n) - NV_MAT_V(cov->u, 0, n); } nv_gemv(y, 0, NV_MAT_TR, cov->eigen_vec, x2, xm); for (n = 0; n < x->n; ++n) { float ev = NV_MAT_V(cov->eigen_val, n, 0); float xv = NV_MAT_V(y, 0, n); delta2 += (xv * xv) / ev; } distance = sqrtf(delta2); nv_matrix_free(&x2); nv_matrix_free(&y); return distance; }
void nv_contrast_sigmoid(nv_matrix_t *dest, int dch, const nv_matrix_t *src, int sch, float gain) // 0.1f-1.0f { int i, j; nv_matrix_t *conv = nv_matrix_alloc(256, 1); float max_v = -FLT_MAX; float min_v = FLT_MAX; NV_ASSERT(dest->m == src->m); NV_ASSERT(dch <= dest->n); NV_ASSERT(sch <= src->n); for (j = 0; j < src->m; ++j) { if (max_v < NV_MAT_V(src, j, sch)) { max_v = NV_MAT_V(src, j, sch); } if (min_v > NV_MAT_V(src, j, sch)) { min_v = NV_MAT_V(src, j, sch); } } for (i = 0; i < conv->n; ++i) { float x = 255.0f * ((float)i - min_v) / max_v; NV_MAT_V(conv, 0, i) = 255.0f * (1.0f / (1.0f + expf(-gain * (x - 128.0f) * 0.078125f))); } for (j = 0; j < src->m; ++j) { NV_ASSERT(NV_MAT_V(src, j, sch) < 256.0f); NV_ASSERT(NV_MAT_V(src, j, sch) > 0.0f); NV_MAT_V(dest, j, dch) = NV_MAT_V(conv, 0, (int)NV_MAT_V(src, j, sch)); } }
/* 回帰 */ void nv_mlp_regression(const nv_mlp_t *mlp, const nv_matrix_t *x, int xm, nv_matrix_t *out, int om) { int m; float y; nv_matrix_t *input_y = nv_matrix_alloc(mlp->input_w->m, 1); nv_matrix_t *hidden_y = nv_matrix_alloc(mlp->hidden_w->m, 1); #ifdef _OPENMP #pragma omp parallel for private(y) #endif for (m = 0; m < mlp->input_w->m; ++m) { y = NV_MAT_V(mlp->input_bias, m, 0) * NV_MLP_BIAS; y += nv_vector_dot(x, xm, mlp->input_w, m); NV_MAT_V(input_y, 0, m) = nv_mlp_sigmoid(y); } for (m = 0; m < mlp->hidden_w->m; ++m) { y = NV_MAT_V(mlp->hidden_bias, m, 0) * NV_MLP_BIAS; y += nv_vector_dot(input_y, 0, mlp->hidden_w, m); NV_MAT_V(hidden_y, 0, m) = y; } nv_vector_copy(out, om, hidden_y, 0); nv_matrix_free(&input_y); nv_matrix_free(&hidden_y); }
float nv_vector_dot(const nv_matrix_t *vec1, int m1, const nv_matrix_t *vec2, int m2) { NV_ASSERT(vec1->n == vec2->n); #if NV_ENABLE_AVX { NV_ALIGNED(float, mm[8], 32); __m256 x, u; int n; int pk_lp = (vec1->n & 0xfffffff8); float dp = 0.0f; u = _mm256_setzero_ps(); for (n = 0; n < pk_lp; n += 8) { x = _mm256_load_ps(&NV_MAT_V(vec2, m2, n)); u = _mm256_add_ps(u, _mm256_mul_ps(x, *(__m256 *)&NV_MAT_V(vec1, m1, n))); } _mm256_store_ps(mm, u); dp = mm[0] + mm[1] + mm[2] + mm[3] + mm[4] + mm[5] + mm[6] + mm[7]; for (n = pk_lp; n < vec1->n; ++n) { dp += NV_MAT_V(vec1, m1, n) * NV_MAT_V(vec2, m2, n); } return dp; } #elif NV_ENABLE_SSE2 { NV_ALIGNED(float, mm[4], 16); __m128 x, u; int n; int pk_lp = (vec1->n & 0xfffffffc); float dp = 0.0f; u = _mm_setzero_ps(); for (n = 0; n < pk_lp; n += 4) { x = _mm_load_ps(&NV_MAT_V(vec2, m2, n)); u = _mm_add_ps(u, _mm_mul_ps(x, *(__m128 *)&NV_MAT_V(vec1, m1, n))); } _mm_store_ps(mm, u); dp = mm[0] + mm[1] + mm[2] + mm[3]; for (n = pk_lp; n < vec1->n; ++n) { dp += NV_MAT_V(vec1, m1, n) * NV_MAT_V(vec2, m2, n); } return dp; } #else { int n; float dp = 0.0f; for (n = 0; n < vec1->n; ++n) { dp += NV_MAT_V(vec1, m1, n) * NV_MAT_V(vec2, m2, n); } return dp; } #endif }
void nv_mlp_predict_vector(const nv_mlp_t *mlp, nv_matrix_t *p, int p_j, const nv_matrix_t *x, int x_j) { int m; float y; nv_matrix_t *input_y = nv_matrix_alloc(mlp->hidden, 1); nv_matrix_t *hidden_y = nv_matrix_alloc(mlp->output, 1); float dropout_scale = 1.0f - mlp->dropout; float noise_scale = 1.0f - mlp->noise; #ifdef _OPENMP #pragma omp parallel for private(y) #endif for (m = 0; m < mlp->hidden; ++m) { y = NV_MAT_V(mlp->input_bias, m, 0) * NV_MLP_BIAS; y += nv_vector_dot(x, x_j, mlp->input_w, m) * noise_scale; NV_MAT_V(input_y, 0, m) = nv_mlp_sigmoid(y) * dropout_scale; } for (m = 0; m < mlp->output; ++m) { y = NV_MAT_V(mlp->hidden_bias, m, 0) * NV_MLP_BIAS; y += nv_vector_dot(input_y, 0, mlp->hidden_w, m); NV_MAT_V(hidden_y, 0, m) = nv_mlp_sigmoid(y); } nv_mlp_softmax(p, p_j, hidden_y, 0); nv_matrix_free(&input_y); nv_matrix_free(&hidden_y); }
float nv_mlp_predict(const nv_mlp_t *mlp, const nv_matrix_t *x, int xm, int cls) { int m; float y; nv_matrix_t *input_y = nv_matrix_alloc(mlp->input_w->m, 1); nv_matrix_t *hidden_y = nv_matrix_alloc(mlp->output, 1); nv_matrix_t *output_y = nv_matrix_alloc(mlp->output, 1); float p; double dropout_scale = 1.0 - mlp->dropout; float noise_scale = 1.0f - mlp->noise; #ifdef _OPENMP #pragma omp parallel for private(y) #endif for (m = 0; m < mlp->hidden; ++m) { y = NV_MAT_V(mlp->input_bias, m, 0) * NV_MLP_BIAS; y += nv_vector_dot(x, xm, mlp->input_w, m) * noise_scale; NV_MAT_V(input_y, 0, m) = nv_mlp_sigmoid(y) * dropout_scale; } for (m = 0; m < mlp->output; ++m) { y = NV_MAT_V(mlp->hidden_bias, m, 0) * NV_MLP_BIAS; y += nv_vector_dot(input_y, 0, mlp->hidden_w, m); NV_MAT_V(hidden_y, 0, m) = nv_mlp_sigmoid(y); } nv_mlp_softmax(output_y, 0, hidden_y, 0); p = NV_MAT_V(output_y, 0, cls); nv_matrix_free(&input_y); nv_matrix_free(&hidden_y); nv_matrix_free(&output_y); return p; }
int nv_mlp_predict_label(const nv_mlp_t *mlp, const nv_matrix_t *x, int xm) { int m; int label = -1; float max_output = -FLT_MAX; nv_matrix_t *input_y = nv_matrix_alloc(mlp->input_w->m, 1); float dropout_scale = 1.0f - mlp->dropout; float noise_scale = 1.0f - mlp->noise; #ifdef _OPENMP #pragma omp parallel for #endif for (m = 0; m < mlp->hidden; ++m) { float y = NV_MAT_V(mlp->input_bias, m, 0) * NV_MLP_BIAS; y += nv_vector_dot(x, xm, mlp->input_w, m) * noise_scale; NV_MAT_V(input_y, 0, m) = nv_mlp_sigmoid(y) * dropout_scale; } for (m = 0; m < mlp->output; ++m) { float y = NV_MAT_V(mlp->hidden_bias, m, 0) * NV_MLP_BIAS; y += nv_vector_dot(input_y, 0, mlp->hidden_w, m); if (max_output < y) { label = m; max_output = y; } } nv_matrix_free(&input_y); return label; }
/* この関数は次元が高いと確率が小さくなりすぎて数値計算できないので * 次元が高い場合は, この関数値の対数であるnv_gaussian_log_predictを使うこと. */ float nv_gaussian_predict(const nv_cov_t *cov, const nv_matrix_t *x, int xm) { int n; nv_matrix_t *y = nv_matrix_alloc(x->n, 2); float p = 1.0f; float d = (float)x->n; float delta2 = 0.0f; float lambda = 1.0f; nv_vector_sub(y, 0, x, xm, cov->u, 0); nv_matrix_mulv(y, 1, cov->eigen_vec, NV_MAT_TR, y, 0); for (n = 0; n < x->n; ++n) { float ev = NV_MAT_V(cov->eigen_val, n, 0); float xv = NV_MAT_V(y, 1, n); if (ev > 0.0f) { delta2 += (xv * xv) / ev; lambda *= sqrtf(ev); } } p = (1.0f / powf(2.0f * NV_PI, d / 2.0f)) * (1.0f / lambda) * expf(-0.5f * delta2); nv_matrix_free(&y); return p; }
void nv_matrix_sort(nv_matrix_t *mat, int sort_column_n, nv_sort_dir_e dir) { nv_matrix_t *sort_data = nv_matrix_alloc(2, mat->m); nv_matrix_t *tmp = nv_matrix_alloc(mat->n, mat->m); int m; for (m = 0; m < mat->m; ++m) { NV_MAT_V(sort_data, m, 0) = NV_MAT_V(mat, m, sort_column_n); NV_MAT_V(sort_data, m, 1) = (float)m; } if (dir == NV_SORT_DIR_ASC) { qsort(sort_data->v, sort_data->m, sort_data->step * sizeof(float), nv_column_cmp_asc); } else { qsort(sort_data->v, sort_data->m, sort_data->step * sizeof(float), nv_column_cmp_desc); } for (m = 0; m < mat->m; ++m) { nv_vector_copy(tmp, m, mat, (int)NV_MAT_V(sort_data, m, 1)); } nv_matrix_copy(mat, 0, tmp, 0, mat->m); nv_matrix_free(&sort_data); nv_matrix_free(&tmp); }
/* * 45°回転したIntegral Image */ void nv_integral_tilted(nv_matrix_t *integral, const nv_matrix_t *img, int channel) { int row, col, scol, srow; int erow = img->rows + 1; int ecol = img->cols + 1; nv_matrix_t *prev_tilted = nv_matrix_alloc(img->cols + 1, 1); NV_ASSERT( integral->rows - 1 == img->rows && integral->cols - 1 == img->cols ); nv_matrix_zero(prev_tilted); nv_matrix_zero(integral); for (scol = img->cols; scol > 0; --scol) { float tilted_sum = 0.0f; for (row = 1, col = scol; row < erow && col < ecol; ++row, ++col) { float tilted_val = NV_MAT3D_V(img, row - 1, col - 1, channel); if (col + 1 == ecol) { NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col, 0) + tilted_sum + tilted_val; } else { NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col + 1, 0) + NV_MAT_V(prev_tilted, 0, col) + tilted_sum + tilted_val; } tilted_sum += tilted_val; NV_MAT_V(prev_tilted, 0, col) = tilted_sum; } } for (srow = 2; srow < erow; ++srow) { float tilted_sum = 0.0f; for (row = srow, col = 1; row < erow && col < ecol; ++row, ++col) { float tilted_val = NV_MAT3D_V(img, row - 1, col - 1, channel); if (col + 1 == ecol) { NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col, 0) + tilted_sum + tilted_val; } else { NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col + 1, 0) + NV_MAT_V(prev_tilted, 0, col) + tilted_sum + tilted_val; } tilted_sum += tilted_val; NV_MAT_V(prev_tilted, 0, col) = tilted_sum; } } nv_matrix_free(&prev_tilted); }
void nv_color_bgr2euclidean_scalar(nv_matrix_t *ec, int ec_m, const nv_matrix_t *bgr, int bgr_m) { assert(ec->n == bgr->n && ec->n == 3); NV_MAT_V(ec, ec_m, 0) = floorf((NV_MAT_V(bgr, bgr_m, NV_CH_R) + NV_MAT_V(bgr, bgr_m, NV_CH_G) + NV_MAT_V(bgr, bgr_m, NV_CH_B)) / 3.0f); NV_MAT_V(ec, ec_m, 1) = floorf((NV_MAT_V(bgr, bgr_m, NV_CH_R) + (255.0f - NV_MAT_V(bgr, bgr_m, NV_CH_B))) / 2.0f); NV_MAT_V(ec, ec_m, 2) = floorf((NV_MAT_V(bgr, bgr_m, NV_CH_R) + 2.0f * (255.0f - NV_MAT_V(bgr, bgr_m, NV_CH_G)) + NV_MAT_V(bgr, bgr_m, NV_CH_B)) / 4.0f); }
void nv_matrix_split(nv_matrix_t *mat1, int dest_n, const nv_matrix_t *mat2, int src_n) { int m; NV_ASSERT(mat1->m == mat2->m); for (m = 0; m < mat1->m; ++m) { NV_MAT_V(mat1, m, dest_n) = NV_MAT_V(mat2, m, src_n); } }
void nv_matrix_reshape_vec(nv_matrix_t *vec, int vec_j, const nv_matrix_t *mat) { int j; NV_ASSERT(mat->n * mat->m == vec->n); for (j = 0; j < mat->m; ++j) { memmove(&NV_MAT_V(vec, vec_j, mat->n * j), &NV_MAT_V(mat, j, 0), sizeof(float) * mat->n); } }
static float nv_mlp_error(const nv_matrix_t *output_y, int oj, const nv_matrix_t *t, int dj) { int n; float e = 0.0f; for (n = 0; n < output_y->n; ++n) { if (NV_MAT_V(output_y, oj, n) > FLT_EPSILON) { e += -NV_MAT_V(t, dj, n) * logf(NV_MAT_V(output_y, oj, n)); } } return e; }
// Euclidean distance^2 float nv_euclidean2(const nv_matrix_t *vec1, int m1, const nv_matrix_t *vec2, int m2) { int n; float dist = 0.0f; assert(vec1->n == vec2->n); for (n = 0; n < vec1->n; ++n) { dist += (NV_MAT_V(vec1, m1, n) - NV_MAT_V(vec2, m2, n)) * (NV_MAT_V(vec1, m1, n) - NV_MAT_V(vec2, m2, n)); } return dist; }
static void nv_mlp_softmax(nv_matrix_t *output_y, int oj, const nv_matrix_t *hidden_y, int hj) { float base = NV_MAT_V(hidden_y, hj, nv_vector_max_n(hidden_y, hj)); float z = 0.0f; int n; for (n = 0; n < output_y->n; ++n) { NV_MAT_V(output_y, oj, n) = expf(NV_MAT_V(hidden_y, hj, n) - base); z += NV_MAT_V(output_y, oj, n); } nv_vector_divs(output_y, oj, output_y, oj, z); }
static float nv_lr_error(const nv_matrix_t *t, int tm, const nv_matrix_t *y, int ym) { float err = 0.0f; int n; for (n = 0; n < t->n; ++n) { float p = NV_MAT_V(y, ym, n); if (p > FLT_EPSILON) { err += NV_MAT_V(t, tm, n) * logf(p); } } return -err; }
void nv_vector_sqrt(nv_matrix_t *vec0, int m0, const nv_matrix_t *vec1, int m1) { int i; NV_ASSERT(vec0->n == vec1->n); #ifdef _OPENMP //#pragma omp parallel for #endif for (i = 0; i < vec0->n; ++i) { NV_MAT_V(vec0, m0, i) = sqrtf(NV_MAT_V(vec1, m1, i)); } }
void nv_vector_avg(nv_matrix_t *mean, int mean_m, const nv_matrix_t *mat) { float factor = 1.0f / mat->m; int m; NV_ASSERT(mean->n == mat->n); nv_vector_zero(mean, mean_m); for (m = 0; m < mat->m; ++m) { int n; for (n = 0; n < mat->n; ++n) { NV_MAT_V(mean, mean_m, n) += factor * NV_MAT_V(mat, m, n); } } }
int nv_vector_min_n(const nv_matrix_t *v, int m) { float v_min = nv_vector_min(v, m); int min_n = (int)nv_float_find_index(&NV_MAT_V(v, m, 0), 0, v->n, v_min); return min_n; }
void nv_vector_pows(nv_matrix_t *vec0, int m0, const nv_matrix_t *vec1, int m1, float x) { int n; NV_ASSERT(vec1->n == vec1->n); #ifdef _OPENMP //#pragma omp parallel for #endif for (n = 0; n < vec0->n; ++n) { NV_MAT_V(vec0, m0, n) = powf(NV_MAT_V(vec1, m1, n), x); } }
static void nv_mlp_train_accuracy(const nv_mlp_t *mlp, const nv_matrix_t *data, const nv_matrix_t *label) { int i; int output = mlp->output; int *correct_count = nv_alloc_type(int, output); int *error_count = nv_alloc_type(int, output); memset(correct_count, 0, sizeof(int) * output); memset(error_count, 0, sizeof(int) * output); for (i = 0; i < data->m; ++i) { int predict = nv_mlp_predict_label(mlp, data, i); int teach = (int)NV_MAT_V(label, i, 0); if (predict == teach) { ++correct_count[teach]; } else { ++error_count[teach]; } } for (i = 0; i < output; ++i) { if (correct_count[i] + error_count[i] > 0) { printf("%d: correct: %d, ng: %d, %f\n", i, correct_count[i], error_count[i], (float)correct_count[i] / (float)(correct_count[i] + error_count[i])); } else { printf("%d: no data found\n", i); } } nv_free(correct_count); nv_free(error_count); }
static void nv_test_klr_tree_inherit(const nv_matrix_t *data,const nv_matrix_t *labels, nv_klr_tree_t *tree, const nv_klr_tree_t *base) { nv_matrix_t *cluster_labels = nv_matrix_alloc(1, data->m); float purity; int i; NV_TEST_NAME; printf("tree: "); for (i = 0; i < tree->height; ++i) { if (i != 0) { printf(", "); } printf("%d", tree->dim[i]); } printf("\n"); if (base == NULL) { nv_klr_tree_train(tree, data, NV_LR_PARAM(4, 0.1f, NV_LR_REG_L1, 0.0001f, 1), 100); } else { nv_klr_tree_inherit_train(tree, base, data, NV_LR_PARAM(4, 0.1f, NV_LR_REG_L1, 0.0001f, 1), 100); } for (i = 0; i < data->m; ++i) { NV_MAT_V(cluster_labels, i, 0) = (float)nv_klr_tree_predict_label(tree, data, i); } purity = nv_purity(K, NV_TEST_DATA_K, cluster_labels, labels); printf("purity: %f\n", purity); NV_ASSERT(purity > 0.5f); nv_matrix_free(&cluster_labels); }
static void nv_test_klr_tree_ex(const nv_matrix_t *data, const nv_matrix_t *labels, int *width, int height) { nv_matrix_t *cluster_labels = nv_matrix_alloc(1, data->m); nv_klr_tree_t *tree = nv_klr_tree_alloc(data->n, width, height); float purity; int i; NV_TEST_NAME; printf("tree: "); for (i = 0; i < height; ++i) { if (i != 0) { printf(", "); } printf("%d", width[i]); } printf("\n"); nv_klr_tree_train(tree, data, NV_LR_PARAM(4, 0.1f, NV_LR_REG_L1, 0.0001f, 1), 100); for (i = 0; i < data->m; ++i) { NV_MAT_V(cluster_labels, i, 0) = (float)nv_klr_tree_predict_label(tree, data, i); } purity = nv_purity(K, NV_TEST_DATA_K, cluster_labels, labels); printf("purity: %f\n", purity); NV_ASSERT(purity > 0.5f); nv_klr_tree_free(&tree); nv_matrix_free(&cluster_labels); }
float nv_gaussian_log_predict(int npca, const nv_cov_t *cov, const nv_matrix_t *x, int xm) { static const float LOG_PI_0_5_NEGA = -0.5723649f; nv_matrix_t *y = nv_matrix_alloc(x->n, 1); int n; float p; NV_ASSERT(npca <= x->n); if (npca == 0) { npca = (int)(x->n * 0.4f); } p = LOG_PI_0_5_NEGA * npca; nv_vector_sub(y, 0, x, xm, cov->u, 0); for (n = 0; n < npca; ++n) { float xv = nv_vector_dot(cov->eigen_vec, n, y, 0); float ev = NV_MAT_V(cov->eigen_val, n, 0) * 2.0f; p += -(0.5f * logf(ev)) - (xv * xv) / (ev); } nv_matrix_free(&y); return p; }
nv_matrix_t * nv_vector_shallow_reshape3d(nv_matrix_t *vec, int vec_j, int n, int rows, int cols) { nv_matrix_t *mat; NV_ASSERT(vec->n == n * rows * cols); NV_ASSERT(n < 8 || n % 8 == 0); // n >= 8 && n % 8 != 0, AVX does not work if (!(vec->n == n * rows * cols && (n < 8 || n % 8 == 0))) { return NULL; } mat = (nv_matrix_t *)nv_malloc(sizeof(nv_matrix_t)); mat->list = 1; mat->n = n; mat->m = rows * cols; mat->rows = rows; mat->cols = cols; mat->v = &NV_MAT_V(vec, vec_j, 0); mat->step = n; mat->list_step = (int64_t)mat->step * mat->m; mat->alias = 1; return mat; }