// Mahalanobis distance float nv_mahalanobis(const nv_cov_t *cov, const nv_matrix_t *x, int xm) { int n; nv_matrix_t *y = nv_matrix_alloc(x->n, 1); nv_matrix_t *x2 = nv_matrix_alloc(x->n, 1); float distance; float delta2 = 0.0f; nv_matrix_zero(y); nv_matrix_zero(x2); for (n = 0; n < x2->n; ++n) { NV_MAT_V(x2, 0, n) = NV_MAT_V(x, xm, n) - NV_MAT_V(cov->u, 0, n); } nv_gemv(y, 0, NV_MAT_TR, cov->eigen_vec, x2, xm); for (n = 0; n < x->n; ++n) { float ev = NV_MAT_V(cov->eigen_val, n, 0); float xv = NV_MAT_V(y, 0, n); delta2 += (xv * xv) / ev; } distance = sqrtf(delta2); nv_matrix_free(&x2); nv_matrix_free(&y); return distance; }
/* * 45°回転したIntegral Image */ void nv_integral_tilted(nv_matrix_t *integral, const nv_matrix_t *img, int channel) { int row, col, scol, srow; int erow = img->rows + 1; int ecol = img->cols + 1; nv_matrix_t *prev_tilted = nv_matrix_alloc(img->cols + 1, 1); NV_ASSERT( integral->rows - 1 == img->rows && integral->cols - 1 == img->cols ); nv_matrix_zero(prev_tilted); nv_matrix_zero(integral); for (scol = img->cols; scol > 0; --scol) { float tilted_sum = 0.0f; for (row = 1, col = scol; row < erow && col < ecol; ++row, ++col) { float tilted_val = NV_MAT3D_V(img, row - 1, col - 1, channel); if (col + 1 == ecol) { NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col, 0) + tilted_sum + tilted_val; } else { NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col + 1, 0) + NV_MAT_V(prev_tilted, 0, col) + tilted_sum + tilted_val; } tilted_sum += tilted_val; NV_MAT_V(prev_tilted, 0, col) = tilted_sum; } } for (srow = 2; srow < erow; ++srow) { float tilted_sum = 0.0f; for (row = srow, col = 1; row < erow && col < ecol; ++row, ++col) { float tilted_val = NV_MAT3D_V(img, row - 1, col - 1, channel); if (col + 1 == ecol) { NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col, 0) + tilted_sum + tilted_val; } else { NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col + 1, 0) + NV_MAT_V(prev_tilted, 0, col) + tilted_sum + tilted_val; } tilted_sum += tilted_val; NV_MAT_V(prev_tilted, 0, col) = tilted_sum; } } nv_matrix_free(&prev_tilted); }
void extract_dense(nv_matrix_t *vlad, int j, const nv_matrix_t *image, nv_keypoint_dense_t *dense, int ndense ) { NV_ASSERT(vlad->n == DIM); int desc_m; nv_matrix_t *key_vec; nv_matrix_t *desc_vec; nv_matrix_t *resize, *gray, *smooth; int i; int km = 0; if (m_fit_area == 0) { float scale = IMG_SIZE() / (float)NV_MAX(image->rows, image->cols); resize = nv_matrix3d_alloc(3, (int)(image->rows * scale), (int)(image->cols * scale)); } else { float axis_ratio = (float)image->rows / image->cols; int new_cols = (int)sqrtf(m_fit_area / axis_ratio); int new_rows = (int)((float)m_fit_area / new_cols); resize = nv_matrix3d_alloc(3, new_rows, new_cols); } gray = nv_matrix3d_alloc(1, resize->rows, resize->cols); smooth = nv_matrix3d_alloc(1, resize->rows, resize->cols); for (i = 0; i < ndense; ++i) { km += dense[i].rows * dense[i].cols; } km *= 2; key_vec = nv_matrix_alloc(NV_KEYPOINT_KEYPOINT_N, km); desc_vec = nv_matrix_alloc(NV_KEYPOINT_DESC_N, km); nv_resize(resize, image); nv_gray(gray, resize); nv_gaussian5x5(smooth, 0, gray, 0); nv_matrix_zero(desc_vec); nv_matrix_zero(key_vec); desc_m = nv_keypoint_dense_ex(m_ctx, key_vec, desc_vec, smooth, 0, dense, ndense); feature_vector(vlad, j, key_vec, desc_vec, desc_m); nv_matrix_free(&gray); nv_matrix_free(&resize); nv_matrix_free(&smooth); nv_matrix_free(&key_vec); nv_matrix_free(&desc_vec); }
nv_bgseg_t * nv_bgseg_alloc(int frame_rows, int frame_cols, float zeta, float bg_v, float fg_v, int size ) { nv_bgseg_t *bg = nv_alloc_type(nv_bgseg_t, 1); float scale = (float)size / (float)NV_MAX(frame_rows, frame_cols); bg->init_1st = 0; bg->init_2nd = 0; bg->init_1st_finished = 0; bg->init_2nd_finished = 0; bg->frame_rows = frame_rows; bg->frame_cols = frame_cols; bg->rows = NV_ROUND_INT(frame_rows * scale); bg->cols = NV_ROUND_INT(frame_cols * scale); bg->zeta = zeta; bg->bg_v = bg_v; bg->fg_v = fg_v; bg->size = size; bg->av = nv_matrix_alloc(1 * bg->rows * bg->cols, 1); nv_matrix_zero(bg->av); bg->sgm = nv_matrix_dup(bg->av); return bg; }
void kmeans(nv_matrix_t *centroids, const nv_matrix_t *data) { nv_matrix_t *cluster_labels = nv_matrix_alloc(1, data->m); nv_matrix_t *count = nv_matrix_alloc(1, CENTROIDS); nv_matrix_zero(count); nv_matrix_zero(centroids); nv_matrix_zero(cluster_labels); nv_kmeans_progress(1); nv_kmeans(centroids, count, cluster_labels, data, CENTROIDS, 50); nv_matrix_free(&cluster_labels); nv_matrix_free(&count); }
nv_lr_t * nv_lr_alloc(int n, int k) { nv_lr_t *lr = (nv_lr_t *)nv_malloc(sizeof(nv_lr_t)); lr->n = n; lr->k = k; lr->w = nv_matrix_alloc(lr->n, k); nv_matrix_zero(lr->w); return lr; }
/* 乱数で初期化 */ void nv_mlp_init_rand(nv_mlp_t *mlp, const nv_matrix_t *data) { const float data_scale = 1.0f / data->m; const float input_norm_mean = sqrtf(0.8f * (mlp->input_w->m + 1)); const float hidden_norm_mean = sqrtf(0.8f * (mlp->hidden_w->m + 1)); float data_norm_mean; float input_scale, hidden_scale; int j; data_norm_mean = 0.0f; for (j = 0; j < data->m; ++j) { data_norm_mean += nv_vector_norm(data, j) * data_scale; } input_scale = 1.0f / (data_norm_mean * input_norm_mean); hidden_scale = 1.0f / hidden_norm_mean; nv_matrix_rand(mlp->input_w, -0.5f * input_scale, 0.5f * input_scale); nv_matrix_rand(mlp->hidden_w, -0.5f * hidden_scale, 0.5f * hidden_scale); nv_matrix_zero(mlp->input_bias); nv_matrix_zero(mlp->hidden_bias); }
void nv_klr_init(nv_lr_t *lr, // k nv_matrix_t *count, // k nv_matrix_t *labels, // data->m const nv_matrix_t *data, const nv_lr_param_t param) { nv_matrix_t *means = nv_matrix_alloc(lr->n, lr->k); long t; NV_ASSERT(labels->m >= data->m); nv_matrix_zero(means); nv_matrix_zero(labels); nv_matrix_zero(count); if (nv_klr_progress_flag) { printf("nv_klr: 0: init++\n"); } t = nv_clock(); nv_kmeans(means, count, labels, data, lr->k, 50); //nv_lbgu(means, count, labels, data, lr->k, 5, 10); if (nv_klr_progress_flag) { printf("nv_klr: 0: init end: %ldms\n", nv_clock() - t); } nv_lr_init(lr, data); nv_lr_train(lr, data, labels, param); nv_matrix_free(&means); if (nv_klr_progress_flag) { printf("nv_klr: 0: first step: %ldms\n", nv_clock() - t); fflush(stdout); } }
void feature_vector(nv_matrix_t *vec, int vec_j, nv_matrix_t *key_vec, nv_matrix_t *desc_vec, int desc_m ) { int i; int procs = nv_omp_procs(); nv_matrix_t *vec_tmp = nv_matrix_alloc(vec->n, procs); const nv_matrix_t *posi = POSI(); const nv_matrix_t *nega = NEGA(); nv_matrix_zero(vec_tmp); #ifdef _OPENMP #pragma omp parallel for num_threads(procs) #endif for (i = 0; i < desc_m; ++i) { int j; int thread_id = nv_omp_thread_id(); nv_vector_normalize(desc_vec, i); if (NV_MAT_V(key_vec, i, NV_KEYPOINT_RESPONSE_IDX) > 0.0f) { int label = nv_nn(posi, desc_vec, i); for (j = 0; j < posi->n; ++j) { NV_MAT_V(vec_tmp, thread_id, label * NV_KEYPOINT_DESC_N + j) += NV_MAT_V(desc_vec, i, j) - NV_MAT_V(posi, label, j); } } else { int label = nv_nn(nega, desc_vec, i); int vl = (KP + label) * NV_KEYPOINT_DESC_N; for (j = 0; j < nega->n; ++j) { NV_MAT_V(vec_tmp, thread_id, (vl + j)) += NV_MAT_V(desc_vec, i, j) - NV_MAT_V(nega, label, j); } } } nv_vector_zero(vec, vec_j); for (i = 0; i < procs; ++i) { nv_vector_add(vec, vec_j, vec, vec_j, vec_tmp, i); } nv_vector_normalize(vec, vec_j); nv_matrix_free(&vec_tmp); }
static void nv_lbgu_u(nv_matrix_t *u, const nv_matrix_t *means, const nv_matrix_t *data, const nv_matrix_t *labels, const nv_matrix_t *count) { int m; nv_matrix_t *scale = nv_matrix_alloc(1, count->m); nv_matrix_zero(u); for (m = 0; m < count->m; ++m) { NV_MAT_V(scale, m, 0) = 1.0f / NV_MAT_V(count, m, 0); } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1) #endif for (m = 0; m < data->m; ++m) { int k; float diff, min_error = FLT_MAX; int i = NV_MAT_VI(labels, m, 0); for (k = 0; k < means->m; ++k) { float dist; if (k == i) { continue; } dist = nv_euclidean2(means, k, data, m); if (min_error > dist) { min_error = dist; } } diff = min_error - nv_euclidean2(means, i, data, m); #ifdef _OPENMP #pragma omp critical (nv_lbgu_u) #endif { NV_MAT_V(u, i, 0) += diff; } } nv_matrix_free(&scale); }
void patch_sampling(nv_matrix_t *samples, std::vector<fileinfo_t> &list) { nv_matrix_t *data = nv_matrix_alloc(PATCH_SIZE * PATCH_SIZE * 3, (int)((IMG_SIZE-PATCH_SIZE) * (IMG_SIZE-PATCH_SIZE) * list.size())); int data_index = 0; int i; nv_matrix_zero(data); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1) #endif for (i = 0; i < (int)list.size(); ++i) { nv_matrix_t *src; nv_matrix_t *patches; src = nv_load_image(list[i].file.c_str()); if (!src) { fprintf(stderr, "open filed: %s\n", list[i].file.c_str()); exit(-1); } patches = nv_patch_matrix_alloc(src, PATCH_SIZE); nv_patch_extract(patches, src, PATCH_SIZE); #ifdef _OPENMP #pragma omp critical (patch_sampling) #endif { int j; for (j = 0; j < patches->m; ++j) { nv_vector_copy(data, data_index, patches, j); data_index += 1; } } nv_matrix_free(&src); nv_matrix_free(&patches); } nv_vector_shuffle(data); nv_matrix_m(data, NV_MIN(samples->m, data_index)); nv_matrix_copy_all(samples, data); nv_matrix_free(&data); }
void nv_histgram_equalization(nv_matrix_t *eq, const nv_matrix_t *img, int channel) { float freq[256] = {0}; float fm; int m, i; float min_freq = FLT_MAX; NV_ASSERT(eq->m == img->m); if (img->m == 0) { nv_matrix_zero(eq); return ; } // freq fm = 1.0f / (float )img->m; for (m = 0; m < img->m; ++m) { int idx = (int)NV_MAT_V(img, m, channel); freq[idx] += 1.0f; } for (i = 1; i < 256; ++i) { freq[i] = freq[i] + freq[i - 1]; } for (i = 0; i < 256; ++i) { freq[i] *= fm; if (freq[i] < min_freq) { min_freq = freq[i]; } } if (min_freq == 1.0) { min_freq = 0.999999f; } // equalization for (m = 0; m < img->m; ++m) { int idx = (int)NV_MAT_V(img, m, channel); float v = (freq[idx] - min_freq) * 255.0f / (1.0f - min_freq);//255.0f * freq[idx]; v = NV_MIN(NV_MAX(v, 0.0f), 255.0f); NV_MAT_V(eq, m, channel) = v; } }
static void nv_lbgu_update(nv_matrix_t *means, const nv_matrix_t *data, const nv_matrix_t *labels, const nv_matrix_t *count, int max_error_class, int min_error_class, int kmeans_max_epoch) { /* * 分割対象のクラスタを2つにクラスタリングしてそのセントロイドで更新する * (Bernd Fritzkeの論文とは異なる実装) */ int m, j; int c = NV_MAT_VI(count, max_error_class, 0); nv_matrix_t *data_tmp = nv_matrix_alloc(means->n, c); nv_matrix_t *means_tmp = nv_matrix_alloc(means->n, 2); nv_matrix_t *labels_tmp = nv_matrix_alloc(1, c); nv_matrix_t *count_tmp = nv_matrix_alloc(1, 2); nv_matrix_zero(data_tmp); for (m = j = 0; m < data->m; ++m) { if (max_error_class == NV_MAT_VI(labels, m, 0)) { nv_vector_copy(data_tmp, j++, data, m); } } NV_ASSERT(j == c); nv_kmeans(means_tmp, count_tmp, labels_tmp, data_tmp, 2, kmeans_max_epoch); nv_vector_copy(means, max_error_class, means_tmp, 0); nv_vector_copy(means, min_error_class, means_tmp, 1); nv_matrix_free(&data_tmp); nv_matrix_free(&means_tmp); nv_matrix_free(&labels_tmp); nv_matrix_free(&count_tmp); }
/* * Integral Image * 積分画像 */ void nv_integral(nv_matrix_t *integral, const nv_matrix_t *img, int channel) { int row, col; int erow = img->rows + 1; int ecol = img->cols + 1; NV_ASSERT( integral->rows - 1 == img->rows && integral->cols - 1 == img->cols ); nv_matrix_zero(integral); for (row = 1; row < erow; ++row) { float col_sum = 0.0f; for (col = 1; col < ecol; ++col) { float col_val = NV_MAT3D_V(img, row - 1, col - 1, channel); NV_MAT3D_V(integral, row, col, 0) = NV_MAT3D_V(integral, row - 1, col, 0) + col_sum + col_val; col_sum += col_val; } } }
static float nv_lbgu_e(nv_matrix_t *e, const nv_matrix_t *means, const nv_matrix_t *data, const nv_matrix_t *labels, const nv_matrix_t *count) { int m; float rmse = 0.0f; nv_matrix_zero(e); for (m = 0; m < data->m; ++m) { int i = NV_MAT_VI(labels, m, 0); float dist = nv_euclidean2(means, i, data, m); { NV_MAT_V(e, i, 0) += dist; rmse += dist; } } return rmse / data->m; }
void nv_lr_init(nv_lr_t *lr, const nv_matrix_t *data) { nv_matrix_zero(lr->w); }
void nv_pa_init(nv_pa_t *pa) { nv_matrix_zero(pa->w); }
int nv_klr_em(nv_lr_t *lr, // k nv_matrix_t *count, // k nv_matrix_t *labels, // data->m const nv_matrix_t *data, const nv_lr_param_t param, const int max_epoch) { int j, l; int processing = 1, last_processing = 0; int converge, epoch; long t; int relabel_count; int empty_class; float relabel_per; int num_threads = nv_omp_procs(); nv_matrix_t *old_labels = nv_matrix_alloc(1, data->m); nv_matrix_t *count_tmp = nv_matrix_list_alloc(1, lr->k, num_threads); NV_ASSERT(labels->m >= data->m); NV_ASSERT(count->m >= lr->k); nv_matrix_copy(old_labels, 0, labels, 0, old_labels->m); epoch = 0; do { if (last_processing) { processing = 0; } t = nv_clock(); nv_matrix_zero(count); nv_matrix_zero(count_tmp); #ifdef _OPENMP #pragma omp parallel for num_threads(num_threads) #endif for (j = 0; j < data->m; ++j) { int label = nv_lr_predict_label(lr, data, j); int thread_idx = nv_omp_thread_id(); NV_ASSERT(label < lr->k); NV_MAT_V(labels, j, 0) = (float)label; NV_MAT_LIST_V(count_tmp, thread_idx, label, 0) += 1.0f; } for (l = 0; l < num_threads; ++l) { for (j = 0; j < count->m; ++j) { NV_MAT_V(count, j, 0) += NV_MAT_LIST_V(count_tmp, l, j, 0); } } ++epoch; /* 終了判定 */ relabel_count = 0; for (j = 0; j < data->m; ++j) { if (NV_MAT_V(labels, j, 0) != NV_MAT_V(old_labels, j, 0)) { ++relabel_count; } } empty_class = 0; for (j = 0; j < lr->k; ++j) { empty_class += (NV_MAT_V(count, j, 0) > 0.0f ? 0:1); } relabel_per = (float)relabel_count / data->m; if (epoch > 1) { converge = (relabel_per < 0.001f) ? 1:0; } else { converge =0; } if (nv_klr_progress_flag) { printf("nv_klr: %d: relabel: %f, empty_class: %d, %ldms\n", epoch, relabel_per, empty_class, nv_clock() -t); fflush(stdout); } t = nv_clock(); if (converge) { /* 終了 */ if (nv_klr_progress_flag) { printf("nv_klr: %d: finish:\n", epoch); fflush(stdout); } processing = 0; } else { /* ラベル更新 */ nv_matrix_copy(old_labels, 0, labels, 0, old_labels->m); /* LR再計算 */ nv_lr_train(lr, data, labels, param); /* 最大試行回数判定 */ if (max_epoch != 0 && epoch >= max_epoch) { /* 終了 */ processing = 0; } if (nv_klr_progress_flag) { printf("nv_klr: %d: train: %ldms\n", epoch, nv_clock() -t); fflush(stdout); } } } while (processing); nv_matrix_free(&old_labels); nv_matrix_free(&count_tmp); return converge; }
void kmeans_feature(nv_matrix_t *fv, int fv_j, const nv_matrix_t *src, const nv_matrix_t *zca_m, const nv_matrix_t *zca_u, const nv_matrix_t *centroids) { nv_matrix_t *patches; nv_matrix_t *conv; int y, i; NV_ASSERT(fv->n == DATA_N); patches = nv_patch_matrix_alloc(src, PATCH_SIZE); nv_patch_extract(patches, src, PATCH_SIZE); nv_standardize_local_all(patches, 10.0f); nv_zca_whitening_all(patches, zca_m, 0, zca_u); conv = nv_matrix_alloc(centroids->m, GRID); nv_matrix_zero(conv); for (y = 0; y < patches->rows; ++y) { int x; for (x = 0; x < patches->cols; ++x) { nv_matrix_t *z = nv_matrix_alloc(centroids->m, 1); nv_matrix_t *d = nv_matrix_alloc(patches->n, 1); int conv_index; int r = (int)sqrtf(GRID); int x_idx = (x / (patches->cols / r)); int y_idx = (y / (patches->rows / r)); if (x_idx >= r) { x_idx = r -1; } if (y_idx >= r) { y_idx = r -1; } conv_index = y_idx * r + x_idx; if (conv_index >= GRID) { conv_index = GRID - 1; } #if TRIANGLE_DISTANCE { float mean; float min_z = FLT_MAX; int k; for (k = 0; k < centroids->m; ++k) { NV_MAT_V(z, 0, k) = nv_euclidean(centroids, k, patches, NV_MAT_M(patches, y, x)); if (NV_MAT_V(z, 0, k) < min_z) { min_z = NV_MAT_V(z, 0, k); } } mean = nv_vector_mean(z, 0); #if TRIANGLE_DISTANCE_HALF mean = mean - (mean - min_z) / 4.0f; #endif for (k = 0; k < centroids->m; ++k) { float v = mean - NV_MAT_V(z, 0, k); if (0.0f < v) { #if TRIANGLE_DISTANCE_MAX if (NV_MAT_V(conv, conv_index, k) < v) { NV_MAT_V(conv, conv_index, k) = v; } #else NV_MAT_V(conv, conv_index, k) += v; #endif } } } #else { int nn = nv_nn(centroids, patches, NV_MAT_M(patches, y, x)); NV_MAT_V(conv, conv_index, nn) += 1.0f; } #endif nv_matrix_free(&z); nv_matrix_free(&d); } } for (i = 0; i < GRID; ++i) { memmove(&NV_MAT_V(fv, fv_j, i * conv->n), &NV_MAT_V(conv, i, 0), conv->n * sizeof(float)); } nv_matrix_free(&patches); nv_matrix_free(&conv); }
void nv_arow_init(nv_arow_t *arow) { nv_matrix_zero(arow->w); nv_matrix_zero(arow->bias); }
void nv_shapecontext_feature(nv_shapecontext_t *sctx, const nv_matrix_t *img, float r ) { int m, row, col, pc, i, l; nv_matrix_t *edge = nv_matrix3d_alloc(1, img->rows, img->cols); nv_matrix_t *points = nv_matrix_alloc(2, img->m); int *rand_idx = (int *)nv_malloc(sizeof(int) * img->m); float u_x, u_y, p_x, p_y, r_e; int pn; // 細線化 nv_matrix_zero(points); nv_shapecontext_edge_image(edge, img); pc = 0; u_x = 0.0f; u_y = 0.0f; for (row = 0; row < edge->rows; ++row) { for (col = 0; col < edge->cols; ++col) { if (NV_MAT3D_V(edge, row, col, 0) > 50.0f) { NV_MAT_V(points, pc, 0) = (float)row; NV_MAT_V(points, pc, 1) = (float)col; ++pc; u_y += (float)row; u_x += (float)col; } } } u_x /= pc; u_y /= pc; // 指定数の特徴にする(ランダム) pn = NV_MIN(pc, sctx->sctx->list); nv_shuffle_index(rand_idx, 0, pc); #if 1 { float max_x, max_y; if (pc < sctx->sctx->list) { // 足りないときはランダムに増やす for (i = pc; i < sctx->sctx->list; ++i) { rand_idx[i] = (int)(nv_rand() * pn); } } pc = pn = sctx->sctx->list; // 半径を求める max_x = 0.0f; max_y = 0.0f; for (m = 0; m < pn; ++m) { float yd = fabsf(NV_MAT_V(points, rand_idx[m], 0) - u_y); float xd = fabsf(NV_MAT_V(points, rand_idx[m], 1) - u_x); max_x = NV_MAX(max_x, xd); max_y = NV_MAX(max_y, yd); } r = (float)img->rows/2.0f;//NV_MAX(max_x, max_y) * 1.0f; } #endif // log(r) = 5の基底定数を求める r_e = powf(r, 1.0f / NV_SC_LOG_R_BIN); // histgramを計算する sctx->n = pn; nv_matrix_zero(sctx->sctx); nv_matrix_zero(sctx->tan_angle); for (l = 0; l < pn; ++l) { // tangent angle #if 0 float max_bin = 0.0f, min_bin = FLT_MAX; float tan_angle = tangent_angle( r, NV_MAT_V(points, rand_idx[l], 0), NV_MAT_V(points, rand_idx[l], 1), points, pc); #else float tan_angle = 0.0f; #endif p_y = NV_MAT_V(points, rand_idx[l], 0); p_x = NV_MAT_V(points, rand_idx[l], 1); NV_MAT_V(sctx->tan_angle, l, 0) = tan_angle; NV_MAT_V(sctx->coodinate, l, 0) = p_y; NV_MAT_V(sctx->coodinate, l, 1) = p_x; NV_MAT_V(sctx->radius, l, 0) = r; // shape context for (i = 0; i < pn; ++i) { // # i ≠ l判定はとりあえずしない float xd = NV_MAT_V(points, rand_idx[i], 1) - p_x; float yd = NV_MAT_V(points, rand_idx[i], 0) - p_y; //int row = i / img->rows; //int col = i % img->rows; //float xd = col - p_x; //float yd = row - p_y; float theta; float log_r = logf(sqrtf(xd * xd + yd * yd)) / logf(r_e); float atan_r = atan2f(xd, yd); //if (NV_MAT3D_V(img, row, col, 0) == 0.0f) { // continue; //} if (i == l) { continue; } if (atan_r < 0.0f) { atan_r = 2.0f * NV_PI + atan_r; } if (tan_angle > 0.0f) { if (atan_r + tan_angle > 2.0f * NV_PI) { atan_r = atan_r + tan_angle - 2.0f * NV_PI; } else { atan_r += tan_angle; } } else { if (atan_r + tan_angle < 0.0f) { atan_r = 2.0f * NV_PI + (atan_r + tan_angle); } else { atan_r += tan_angle; } } theta = atan_r / (2.0f * NV_PI / NV_SC_THETA_BIN); if (theta < NV_SC_THETA_BIN && log_r < NV_SC_LOG_R_BIN) { NV_MAT3D_LIST_V(sctx->sctx, l, (int)log_r, (int)theta, 0) += 1.0f; } } #if 0 for (row = 0; row < NV_SC_LOG_R_BIN; ++row) { for (col = 0; col < NV_SC_THETA_BIN; ++col) { max_bin = NV_MAX(max_bin, NV_MAT3D_LIST_V(sctx->sctx, l, row, col, 0)); min_bin = NV_MIN(min_bin, NV_MAT3D_LIST_V(sctx->sctx, l, row, col, 0)); } } if (max_bin > 0.0f) { for (row = 0; row < NV_SC_LOG_R_BIN; ++row) { for (col = 0; col < NV_SC_THETA_BIN; ++col) { NV_MAT3D_LIST_V(sctx->sctx, l, row, col, 0) = (NV_MAT3D_LIST_V(sctx->sctx, l, row, col, 0) - min_bin) / (max_bin - min_bin); } } } #endif } nv_matrix_free(&edge); nv_matrix_free(&points); nv_free(rand_idx); }
float nv_shapecontext_distance(const nv_shapecontext_t *sctx1, const nv_shapecontext_t *sctx2) { float distance = 0.0f; int points = NV_MIN(sctx1->n, sctx2->n); int m, n; nv_matrix_t *cost_matrix = nv_matrix_alloc(points, points); nv_matrix_t *mincost = nv_matrix_alloc(points, 1); #ifdef _DEBUG FILE *f1 = fopen("1.dat", "w"); FILE *f2 = fopen("2.dat", "w"); FILE *fd = fopen("d.dat", "w"); if (sctx1->n != points) { const nv_shapecontext_t *t1 = sctx1; sctx1 = sctx2; sctx2 = t1; } #endif // cosine distance nv_matrix_zero(cost_matrix); for (m = 0; m < points; ++m) { for (n = 0; n < points; ++n) { float cosdist = x2_test(sctx1->sctx, m, sctx2->sctx, n);//cos_distance(sctx1->sctx, m, sctx2->sctx, n); float dy = NV_MAT_V(sctx1->coodinate, m, 0) - NV_MAT_V(sctx2->coodinate, n, 0); float dx = NV_MAT_V(sctx1->coodinate, m, 1) - NV_MAT_V(sctx2->coodinate, n, 1); float rx2 = (NV_MAT_V(sctx1->radius, m, 0) + NV_MAT_V(sctx2->radius, n, 0)); float eudist = sqrtf(dy * dy + dx * dx)/sqrtf(rx2*rx2); float v = 1.0f * eudist + 0.9f * cosdist; NV_MAT_V(cost_matrix, m, n) = v; } } distance += nv_munkres(mincost, cost_matrix) / points; #ifdef _DEBUG for (m = 0; m < sctx1->n; ++m) { fprintf(f1, "%f %f\n", NV_MAT_V(sctx1->coodinate, m, 1), NV_MAT_V(sctx1->coodinate, m, 0)); } for (n = 0; n < sctx2->n; ++n) { fprintf(f2, "%f %f\n", NV_MAT_V(sctx2->coodinate, n, 1), NV_MAT_V(sctx2->coodinate, n, 0)); } for (n = 0; n < sctx2->n;++n) { fprintf(fd, "%f %f\n", NV_MAT_V(sctx2->coodinate, n, 1), NV_MAT_V(sctx2->coodinate, n, 0)); fprintf(fd, "%f %f\n", NV_MAT_V(sctx1->coodinate, NV_MAT_VI(mincost, 0, n), 1), NV_MAT_V(sctx1->coodinate, NV_MAT_VI(mincost, 0, n), 0)); fprintf(fd, "\n\n"); } fclose(f1); fclose(f2); fclose(fd); #endif nv_matrix_free(&cost_matrix); nv_matrix_free(&mincost); return distance; }
static void nv_mlp_backward( nv_mlp_t *mlp, nv_matrix_t *input_w_momentum, nv_matrix_t *input_bias_momentum, nv_matrix_t *hidden_w_momentum, nv_matrix_t *hidden_bias_momentum, const nv_matrix_t *output_y, const nv_matrix_t *input_y, const nv_matrix_t *corrupted_data, const nv_matrix_t *t, int *dj, const float ir, const float hr) { int n, m, j; nv_matrix_t *output_bp = nv_matrix_alloc(mlp->output, NV_MLP_BATCH_SIZE); nv_matrix_t *hidden_bp = nv_matrix_alloc(mlp->input_w->m, NV_MLP_BATCH_SIZE); nv_matrix_t *input_w_grad = nv_matrix_alloc(mlp->input_w->n, mlp->input_w->m); nv_matrix_t *input_bias_grad = nv_matrix_alloc(mlp->input_bias->n, mlp->input_bias->m); nv_matrix_t *hidden_w_grad = nv_matrix_alloc(mlp->hidden_w->n, mlp->hidden_w->m); nv_matrix_t *hidden_bias_grad = nv_matrix_alloc(mlp->hidden_bias->n, mlp->hidden_bias->m); nv_matrix_zero(input_w_grad); nv_matrix_zero(hidden_w_grad); nv_matrix_zero(input_bias_grad); nv_matrix_zero(hidden_bias_grad); #ifdef _OPENMP #pragma omp parallel for private(m, n) #endif for (j = 0; j < NV_MLP_BATCH_SIZE; ++j) { for (n = 0; n < output_bp->n; ++n) { float y_t = NV_MAT_V(output_y, j, n) - NV_MAT_V(t, dj[j], n); float bp = y_t; NV_MAT_V(output_bp, j, n) = bp; } for (m = 0; m < mlp->hidden_w->n; ++m) { float y = 0.0f; for (n = 0; n < mlp->output; ++n) { y += NV_MAT_V(output_bp, j, n) * NV_MAT_V(mlp->hidden_w, n, m); } NV_MAT_V(hidden_bp, j, m) = y * (1.0f - NV_MAT_V(input_y, j, m)) * NV_MAT_V(input_y, j, m); } } #ifdef _OPENMP #pragma omp parallel for private(m, j) #endif for (n = 0; n < mlp->hidden_w->m; ++n) { for (j = 0; j < NV_MLP_BATCH_SIZE; ++j) { const float w = hr * NV_MAT_V(output_bp, j, n); for (m = 0; m < mlp->hidden_w->n; ++m) { NV_MAT_V(hidden_w_grad, n, m) += w * NV_MAT_V(input_y, j, m); } NV_MAT_V(hidden_bias_grad, n, 0) += w * NV_MLP_BIAS; } } #ifdef _OPENMP #pragma omp parallel for private(m, j) #endif for (n = 0; n < mlp->input_w->m; ++n) { for (j = 0; j < NV_MLP_BATCH_SIZE; ++j) { const float w = ir * NV_MAT_V(hidden_bp, j, n); if (w != 0.0f) { for (m = 0; m < mlp->input_w->n; ++m) { NV_MAT_V(input_w_grad, n, m) += w * NV_MAT_V(corrupted_data, j, m); } NV_MAT_V(input_bias_grad, n, 0) += w * NV_MLP_BIAS; } // dropout } } #ifdef _OPENMP #pragma omp parallel for private(m) #endif for (n = 0; n < mlp->hidden_w->m; ++n) { for (m = 0; m < mlp->hidden_w->n; ++m) { NV_MAT_V(hidden_w_momentum, n, m) = NV_MLP_MOMENTUM * NV_MAT_V(hidden_w_momentum, n, m) + NV_MLP_WEIGHT_DECAY * hr * NV_MAT_V(mlp->hidden_w, n, m) + NV_MAT_V(hidden_w_grad, n, m); NV_MAT_V(mlp->hidden_w, n, m) -= NV_MAT_V(hidden_w_momentum, n, m) * (1.0f - NV_MLP_MOMENTUM); } NV_MAT_V(hidden_bias_momentum, n, 0) = NV_MLP_MOMENTUM * NV_MAT_V(hidden_bias_momentum, n, 0) + NV_MAT_V(hidden_bias_grad, n, 0); NV_MAT_V(mlp->hidden_bias, n, 0) -= NV_MAT_V(hidden_bias_momentum, n, 0) * (1.0f - NV_MLP_MOMENTUM); } #ifdef _OPENMP #pragma omp parallel for private(m) #endif for (n = 0; n < mlp->input_w->m; ++n) { for (m = 0; m < mlp->input_w->n; ++m) { NV_MAT_V(input_w_momentum, n, m) = NV_MLP_MOMENTUM * NV_MAT_V(input_w_momentum, n, m) + NV_MAT_V(input_w_grad, n, m); NV_MAT_V(mlp->input_w, n, m) -= NV_MAT_V(input_w_momentum, n, m) * (1.0f - NV_MLP_MOMENTUM); } NV_MAT_V(input_bias_momentum, n, 0) = NV_MLP_MOMENTUM * NV_MAT_V(input_bias_momentum, n, 0) + NV_MAT_V(input_bias_grad, n, 0); NV_MAT_V(mlp->input_bias, n, 0) -= NV_MAT_V(input_bias_momentum, n, 0) * (1.0f - NV_MLP_MOMENTUM); } nv_matrix_free(&input_w_grad); nv_matrix_free(&hidden_w_grad); nv_matrix_free(&input_bias_grad); nv_matrix_free(&hidden_bias_grad); nv_matrix_free(&output_bp); nv_matrix_free(&hidden_bp); }
float nv_mlp_train_lex(nv_mlp_t *mlp, const nv_matrix_t *data, const nv_matrix_t *label, const nv_matrix_t *t, float ir, float hr, int start_epoch, int end_epoch, int max_epoch) { int i; int epoch = 1; float p; nv_matrix_t *input_y = nv_matrix_alloc(mlp->input_w->m, NV_MLP_BATCH_SIZE); nv_matrix_t *hidden_y = nv_matrix_alloc(mlp->hidden_w->m, NV_MLP_BATCH_SIZE); nv_matrix_t *output_y = nv_matrix_alloc(mlp->output, NV_MLP_BATCH_SIZE); nv_matrix_t *corrupted_data = nv_matrix_alloc(mlp->input, NV_MLP_BATCH_SIZE); nv_matrix_t *input_w_momentum = nv_matrix_alloc(mlp->input_w->n, mlp->input_w->m); nv_matrix_t *input_bias_momentum = nv_matrix_alloc(mlp->input_bias->n, mlp->input_bias->m); nv_matrix_t *hidden_w_momentum = nv_matrix_alloc(mlp->hidden_w->n, mlp->hidden_w->m); nv_matrix_t *hidden_bias_momentum = nv_matrix_alloc(mlp->hidden_bias->n, mlp->hidden_bias->m); int *djs = nv_alloc_type(int, NV_MLP_BATCH_SIZE); int *rand_idx = nv_alloc_type(int, data->m); NV_ASSERT(data->m > NV_MLP_BATCH_SIZE); nv_matrix_zero(input_w_momentum); nv_matrix_zero(hidden_w_momentum); nv_matrix_zero(input_bias_momentum); nv_matrix_zero(hidden_bias_momentum); epoch = start_epoch + 1; do { long tm; int correct = 0; float e = 0.0f; int count = 0; tm = nv_clock(); nv_shuffle_index(rand_idx, 0, data->m); for (i = 0; i < data->m / NV_MLP_BATCH_SIZE; ++i) { int j; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1) reduction(+:correct, count, e) #endif for (j = 0; j < NV_MLP_BATCH_SIZE; ++j) { int label_correct; int dj = rand_idx[i * NV_MLP_BATCH_SIZE + j]; djs[j] = dj; nv_mlp_corrupt(mlp, corrupted_data, j, data, dj); nv_mlp_forward(mlp, input_y, j, hidden_y, j, corrupted_data, j); nv_mlp_softmax(output_y, j, hidden_y, j); e += nv_mlp_error(output_y, j, t, dj); label_correct = (int)NV_MAT_V(label, dj, 0); if (nv_vector_max_n(output_y, j) == label_correct) { ++correct; } count += 1; } nv_mlp_backward( mlp, input_w_momentum, input_bias_momentum, hidden_w_momentum, hidden_bias_momentum, output_y, input_y, corrupted_data, t, djs, ir, hr); } p = (float)correct / count; if (nv_mlp_progress_flag) { printf("%d: E:%E, %f (%d/%d), %ldms\n", epoch, e / count / mlp->output, p, correct, count, nv_clock() - tm); if (nv_mlp_progress_flag >= 2) { nv_mlp_train_accuracy(mlp, data, label); } fflush(stdout); } } while (epoch++ < end_epoch); nv_free(rand_idx); nv_free(djs); nv_matrix_free(&input_y); nv_matrix_free(&hidden_y); nv_matrix_free(&output_y); nv_matrix_free(&corrupted_data); nv_matrix_free(&input_w_momentum); nv_matrix_free(&input_bias_momentum); nv_matrix_free(&hidden_w_momentum); nv_matrix_free(&hidden_bias_momentum); return p; }
int nv_eigen(nv_matrix_t *eigen_vec, nv_matrix_t *eigen_val, const nv_matrix_t *mat, int n, int max_epoch) { int i; nv_matrix_t *a = nv_matrix_dup(mat); nv_matrix_t *vec_tmp = nv_matrix_alloc(a->m, 1); #if NV_ENABLE_SSE2 const int pk_lp = (a->n & 0xfffffffc); #endif NV_ASSERT(n > 0); NV_ASSERT(n <= mat->m); NV_ASSERT(n <= eigen_vec->m); NV_ASSERT(n <= eigen_val->m); NV_ASSERT(mat->m == mat->n); NV_ASSERT(mat->m == eigen_vec->n); nv_matrix_zero(eigen_val); nv_matrix_fill(eigen_vec, 1.0f); nv_vector_normalize_all(eigen_vec); for (i = 0; i < n; ++i) { int k, jj; float lambda_old; for (k = 0; k < max_epoch; ++k) { int j; float lambda; #ifdef _OPENMP #pragma omp parallel for #endif for (j = 0; j < a->m; ++j) { NV_MAT_V(vec_tmp, 0, j) = nv_vector_dot(a, j, eigen_vec, i); } lambda = nv_vector_norm(vec_tmp, 0); if (lambda > 0.0f) { nv_vector_muls(vec_tmp, 0, vec_tmp, 0, 1.0f / lambda); } NV_MAT_V(eigen_val, i, 0) = lambda; nv_vector_copy(eigen_vec, i, vec_tmp, 0); if (k > 0) { if (fabsf(lambda_old - lambda) < FLT_EPSILON) { break; } } lambda_old = NV_MAT_V(eigen_val, i, 0); } #if NV_ENABLE_SSE2 { const __m128 val = _mm_set1_ps(NV_MAT_V(eigen_val, i, 0)); #ifdef _OPENMP #pragma omp parallel for #endif for (jj = 0; jj < a->m; ++jj) { int ii; const __m128 vjj = _mm_set1_ps(NV_MAT_V(eigen_vec, i, jj)); for (ii = 0; ii < pk_lp; ii += 4) { _mm_store_ps(&NV_MAT_V(a, jj, ii), _mm_sub_ps(*(const __m128 *)&NV_MAT_V(a, jj, ii), _mm_mul_ps(val,_mm_mul_ps(vjj, *(const __m128 *)&NV_MAT_V(eigen_vec, i, ii))))); } for (; ii < a->n; ++ii) { NV_MAT_V(a, jj, ii) -= NV_MAT_V(eigen_val, i, 0) * NV_MAT_V(eigen_vec, i, ii) * NV_MAT_V(eigen_vec, i, jj); } } } #else #ifdef _OPENMP #pragma omp parallel for #endif for (jj = 0; jj < a->m; ++jj) { int ii; for (ii = 0; ii < a->n; ++ii) { NV_MAT_V(a, jj, ii) -= NV_MAT_V(eigen_val, i, 0) * NV_MAT_V(eigen_vec, i, ii) * NV_MAT_V(eigen_vec, i, jj); } } #endif } nv_matrix_free(&vec_tmp); nv_matrix_free(&a); return 0; }
void nv_lr_train(nv_lr_t *lr, const nv_matrix_t *data, const nv_matrix_t *label, nv_lr_param_t param) { int m, n, i, j, k, l; long tm, tm_all = nv_clock(); float oe = FLT_MAX, er = 1.0f, we; float sum_e = 0.0f; int epoch = 0; int pn = (data->m > 256) ? 128:1; int step = data->m / (pn); int threads = nv_omp_procs(); nv_matrix_t *y = nv_matrix_alloc(lr->k, threads); nv_matrix_t *t = nv_matrix_alloc(lr->k, threads); nv_matrix_t *dw = nv_matrix_list_alloc(lr->n, lr->k, threads); nv_matrix_t *count = nv_matrix_alloc(lr->k, 1); nv_matrix_t *label_weight = nv_matrix_alloc(lr->k, 1); float count_max_log; nv_matrix_zero(count); nv_matrix_fill(label_weight, 1.0f); if (param.auto_balance) { /* クラスごとに数が違う場合に更新重みをスケーリングする */ for (m = 0; m < data->m; ++m) { NV_MAT_V(count, 0, (int)NV_MAT_V(label, m, 0)) += 1.0f; } count_max_log = logf(3.0f + NV_MAT_V(count, 0, nv_vector_max_n(count, 0))); for (n = 0; n < count->n; ++n) { if (NV_MAT_V(count, 0, n) > 0.0f) { float count_log = logf(3.0f + NV_MAT_V(count, 0, n)); NV_MAT_V(label_weight, 0, n) = powf(count_max_log, NV_LR_CLASS_COUNT_PENALTY_EXP) / powf(count_log, NV_LR_CLASS_COUNT_PENALTY_EXP); } else { NV_MAT_V(label_weight, 0, n) = 1.0f; } } } do { we = 1.0f / er; tm = nv_clock(); sum_e = 0.0f; for (m = 0; m < step; ++m) { nv_matrix_zero(dw); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 4) reduction(+:sum_e) num_threads(threads) #endif for (i = 0; i < pn; ++i) { int rand_m = NV_ROUND_INT((data->m - 1) * nv_rand()); int thread_num = nv_omp_thread_id(); int label_i = (int)NV_MAT_V(label, rand_m, 0); float weight = NV_MAT_V(label_weight, 0, label_i); float yp; nv_vector_zero(t, thread_num); NV_MAT_V(t, thread_num, label_i) = 1.0f; nv_lr_predict_vector(lr, y, thread_num, data, rand_m); yp = NV_MAT_V(y, thread_num, (int)NV_MAT_V(label, rand_m, 0)); if (yp < 1.0 - NV_LR_MARGIN) { nv_lr_dw(lr, weight, dw, thread_num, data, rand_m, t, thread_num, y, thread_num); sum_e += nv_lr_error(t, thread_num, y, thread_num); } } for (l = 1; l < threads; ++l) { for (j = 0; j < dw->m; ++j) { for (i = 0; i < dw->n; ++i) { NV_MAT_LIST_V(dw, 0, j, i) += NV_MAT_LIST_V(dw, l, j, i); } } } #ifdef _OPENMP #pragma omp parallel for private(n) num_threads(threads) if (lr->k > 32) #endif for (k = 0; k < lr->k; ++k) { switch (param.reg_type) { case NV_LR_REG_NONE: for (n = 0; n < lr->n; ++n) { NV_MAT_V(lr->w, k, n) -= we * param.grad_w * NV_MAT_LIST_V(dw, 0, k, n); } break; case NV_LR_REG_L1: // FOBOS L1 for (n = 0; n < lr->n; ++n) { NV_MAT_V(lr->w, k, n) -= we * param.grad_w * NV_MAT_LIST_V(dw, 0, k, n); } for (n = 0; n < lr->n; ++n) { float w_i = NV_MAT_V(lr->w, k, n); float lambda = we * param.reg_w * (1.0f / (1.0f + epoch)); NV_MAT_V(lr->w, k, n) = nv_sign(w_i) * NV_MAX(0.0f, (fabsf(w_i) - lambda)); } break; case NV_LR_REG_L2: for (n = 0; n < lr->n; ++n) { NV_MAT_V(lr->w, k, n) -= we * (param.grad_w * (NV_MAT_LIST_V(dw, 0, k, n) + param.reg_w * NV_MAT_V(lr->w, k, n))); } break; } } } if (nv_lr_progress_flag) { printf("nv_lr:%d: E: %E, %ldms\n", epoch, sum_e / (pn * step), nv_clock() - tm); } if (nv_lr_progress_flag > 1) { int *ok = nv_alloc_type(int, lr->k); int *ng = nv_alloc_type(int, lr->k); memset(ok, 0, sizeof(int) * lr->k); memset(ng, 0, sizeof(int) * lr->k); for (i = 0; i < data->m; ++i) { int predict = nv_lr_predict_label(lr, data, i); int teach = (int)NV_MAT_V(label, i, 0); if (predict == teach) { ++ok[teach]; } else { ++ng[teach]; } } for (i = 0; i < lr->k; ++i) { printf("%d: ok: %d, ng: %d, %f\n", i, ok[i], ng[i], (float)ok[i] / (float)(ok[i] + ng[i])); } nv_free(ok); nv_free(ng); } if (nv_lr_progress_flag) { fflush(stdout); } if (sum_e > oe) { er += 1.0f; } if (er >= 20.0f) { break; } if (sum_e < FLT_EPSILON) { break; } oe = sum_e; } while (param.max_epoch > ++epoch);