Пример #1
0
	void
	feature_vector(nv_matrix_t *vec,
				   int vec_j,
				   nv_matrix_t *key_vec,
				   nv_matrix_t *desc_vec,
				   int desc_m
		)
	{
		int i;
		int procs = nv_omp_procs();
		nv_matrix_t *vec_tmp = nv_matrix_alloc(vec->n, procs);
		const nv_matrix_t *posi = POSI();
		const nv_matrix_t *nega = NEGA();

		nv_matrix_zero(vec_tmp);

#ifdef _OPENMP
#pragma omp parallel for num_threads(procs)
#endif	
		for (i = 0; i < desc_m; ++i) {
			int j;
			int thread_id = nv_omp_thread_id();
			nv_vector_normalize(desc_vec, i);
			
			if (NV_MAT_V(key_vec, i, NV_KEYPOINT_RESPONSE_IDX) > 0.0f) {
				int label = nv_nn(posi, desc_vec, i);
				
				for (j = 0; j < posi->n; ++j) {
					NV_MAT_V(vec_tmp, thread_id, label * NV_KEYPOINT_DESC_N + j) +=
						NV_MAT_V(desc_vec, i, j) - NV_MAT_V(posi, label, j);
				}
			} else {
				int label = nv_nn(nega, desc_vec, i);
				int vl = (KP + label) * NV_KEYPOINT_DESC_N;
				for (j = 0; j < nega->n; ++j) {
					NV_MAT_V(vec_tmp, thread_id, (vl + j)) +=
						NV_MAT_V(desc_vec, i, j) - NV_MAT_V(nega, label, j);
				}
			}
		}
		nv_vector_zero(vec, vec_j);
		for (i = 0; i < procs; ++i) {
			nv_vector_add(vec, vec_j, vec, vec_j, vec_tmp, i);
		}
		nv_vector_normalize(vec, vec_j);
		
		nv_matrix_free(&vec_tmp);
	}
Пример #2
0
void 
nv_vector_avg(nv_matrix_t *mean, int mean_m, const nv_matrix_t *mat)
{
	float factor = 1.0f / mat->m;
	int m;

	NV_ASSERT(mean->n == mat->n);

	nv_vector_zero(mean, mean_m);
	for (m = 0; m < mat->m; ++m) {
		int n;
		for (n = 0; n < mat->n; ++n) {
			NV_MAT_V(mean, mean_m, n) += factor * NV_MAT_V(mat, m, n);
		}
	}
}
void nv_face_haarlike(nv_face_haarlike_normalize_e normalize_type,
	nv_matrix_t *feature, 
	int feature_m,
	const nv_matrix_t *sum,
	int x, int y, int width, int height)
{
	int ix, iy, n;
	float v, vmax, vmin;
	float xscale = width / 32.0f;
	float yscale = height / 32.0f;
	float ystep = yscale;
	float xstep = xscale;
	int hystep = (32 - 8) / 2 * 8;
	int sy = NV_ROUND_INT(4.0f * ystep);
	int sx = NV_ROUND_INT(4.0f * xstep);
	int hy, hx;

	nv_vector_zero(feature, feature_m);

	// level1
#ifdef _OPENMP
	//#pragma omp parallel for private(ix)
#endif
	for (iy = 0, hy = 0; iy < 32-8; iy += 2, ++hy) {
		int py = y + NV_ROUND_INT(ystep * iy);
		int ey = py + NV_ROUND_INT(8.0f * ystep);
		const float pty = (ey - py) * 255.0f;
		for (ix = 0, hx = 0; ix < 32-8; ix += 2, ++hx) {
			int px = x + NV_ROUND_INT(xstep * ix);
			int ex = px + NV_ROUND_INT(8.0f * xstep);
			float p1, p2, area, ptx;

			// 全エリア
			area = NV_MAT3D_V(sum, ey, ex, 0)
				- NV_MAT3D_V(sum, ey, px, 0)
				- (NV_MAT3D_V(sum, py, ex, 0) - NV_MAT3D_V(sum, py, px, 0));

			// 1
			// [+]
			// [-]
			p1 = NV_MAT3D_V(sum, py + sy, ex, 0)
				- NV_MAT3D_V(sum, py + sy, px, 0)
				- (NV_MAT3D_V(sum, py, ex, 0) - NV_MAT3D_V(sum, py, px, 0));
			p2 = area - p1;
			ptx = (ex - px) * 255.0f;
			p1 /= ((py + sy) - py) * ptx;
			p2 /= (ey - (py + sy)) * ptx;
			if (p1 > p2) {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 0) = p1 - p2;
			} else {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 1) = p2 - p1;
			}

			// 2
			// [+][-]
			p1 = NV_MAT3D_V(sum, ey, px + sx, 0)
				- NV_MAT3D_V(sum, ey, px, 0)
				- (NV_MAT3D_V(sum, py, px + sx, 0) - NV_MAT3D_V(sum, py, px, 0));
			p2 = area - p1;
			p1 /= ((px + sx) - px) * pty;
			p2 /= (ex - (px + sx)) * pty;
			if (p1 > p2) {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 2) = p1 - p2;
			} else {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 3) = p2 - p1;
			}

			// 3
			p1 = nv_face_haarlike_diagonal_filter(1, sum, px, py, xscale, yscale);
			if (p1 > 0.0f) {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 4) = p1;
			} else {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 5) = -p1;
			}

			// 4
			p1 = nv_face_haarlike_diagonal_filter(2, sum, px, py, xscale, yscale);
			if (p1 > 0.0f) {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 6) = p1;
			} else {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 7) = -p1;
			}
		}
	}

	// 正規化
	switch (normalize_type) {
	case NV_NORMALIZE_MAX:
		// Maximum=1.0
		vmax = 0.0f;
		vmin = FLT_MAX;
		for (n = 0; n < feature->n; ++n) {
			if (NV_MAT_V(feature, feature_m, n) > vmax) {
				vmax = NV_MAT_V(feature, feature_m, n);
			}
			if (NV_MAT_V(feature, feature_m, n) != 0.0f
				&& NV_MAT_V(feature, feature_m, n) < vmin) 
			{
				vmin = NV_MAT_V(feature, feature_m, n);
			}
		}
		if (vmax != 0.0f && vmax > vmin) {
			v = 1.0f / (vmax - vmin);
			for (n = 0; n < feature->n; ++n) {
				if (NV_MAT_V(feature, feature_m, n) != 0.0f) {
					NV_MAT_V(feature, feature_m, n) = (NV_MAT_V(feature, feature_m, n) - vmin) * v;
				}
			}
		}
		break;
	case NV_NORMALIZE_NORM:
		// Vector Norm=1.0
		v = 0.0f;
		for (n = 0; n < feature->n; ++n) {
			v += NV_MAT_V(feature, feature_m, n) * NV_MAT_V(feature, feature_m, n);
		}
		if (v != 0.0) {
			v = 1.0f / sqrtf(v);
			for (n = 0; n < feature->n; ++n) {
				NV_MAT_V(feature, feature_m, n) *= v;
			}
		}
		break;
	case NV_NORMALIZE_NONE:
	default:
		break;
	}
}
Пример #4
0
void 
nv_lr_train(nv_lr_t *lr,
			const nv_matrix_t *data, const nv_matrix_t *label,
			nv_lr_param_t param)
{
	int m, n, i, j, k, l;
	long tm, tm_all = nv_clock();
	float oe = FLT_MAX, er = 1.0f, we;
	float sum_e = 0.0f;
	int epoch = 0;
	int pn = (data->m > 256) ? 128:1;
	int step = data->m / (pn);
	int threads = nv_omp_procs();
	nv_matrix_t *y = nv_matrix_alloc(lr->k, threads);
	nv_matrix_t *t = nv_matrix_alloc(lr->k, threads);
	nv_matrix_t *dw = nv_matrix_list_alloc(lr->n, lr->k, threads);
	nv_matrix_t *count = nv_matrix_alloc(lr->k, 1);
	nv_matrix_t *label_weight = nv_matrix_alloc(lr->k, 1);
	float count_max_log;
	
	nv_matrix_zero(count);
	nv_matrix_fill(label_weight, 1.0f);
	if (param.auto_balance) {
		/* クラスごとに数が違う場合に更新重みをスケーリングする */
		for (m = 0; m < data->m; ++m) {
			NV_MAT_V(count, 0, (int)NV_MAT_V(label, m, 0)) += 1.0f;
		}
		count_max_log = logf(3.0f + NV_MAT_V(count, 0, nv_vector_max_n(count, 0)));
		for (n = 0; n < count->n; ++n) {
			if (NV_MAT_V(count, 0, n) > 0.0f) {
				float count_log = logf(3.0f + NV_MAT_V(count, 0, n));
				NV_MAT_V(label_weight, 0, n) = 
					powf(count_max_log, NV_LR_CLASS_COUNT_PENALTY_EXP) 
					/ powf(count_log, NV_LR_CLASS_COUNT_PENALTY_EXP);
			} else {
				NV_MAT_V(label_weight, 0, n) = 1.0f;
			}
		}
	}
	do {
		we = 1.0f / er;
		tm = nv_clock();
		sum_e = 0.0f;

		for (m = 0; m < step; ++m) {
			nv_matrix_zero(dw);

#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 4) reduction(+:sum_e) num_threads(threads) 
#endif
			for (i = 0; i < pn; ++i) {
				int rand_m = NV_ROUND_INT((data->m - 1) * nv_rand());
				int thread_num = nv_omp_thread_id();
				int label_i = (int)NV_MAT_V(label, rand_m, 0);
				float weight = NV_MAT_V(label_weight, 0, label_i);
				float yp;

				nv_vector_zero(t, thread_num);
				NV_MAT_V(t, thread_num, label_i) = 1.0f;
				nv_lr_predict_vector(lr, y, thread_num, data, rand_m);
				yp = NV_MAT_V(y, thread_num, (int)NV_MAT_V(label, rand_m, 0));
				
				if (yp < 1.0 - NV_LR_MARGIN) {
					nv_lr_dw(lr, weight, dw, thread_num, data, rand_m, t, thread_num, y, thread_num);
					sum_e += nv_lr_error(t, thread_num, y, thread_num);
				}
			}

			for (l = 1; l < threads; ++l) {
				for (j = 0; j < dw->m; ++j) {
					for (i = 0; i < dw->n; ++i) {
						NV_MAT_LIST_V(dw, 0, j, i) += NV_MAT_LIST_V(dw, l, j, i);
					}
				}
			}
#ifdef _OPENMP
#pragma omp parallel for private(n)  num_threads(threads) if (lr->k > 32)
#endif
			for (k = 0; k < lr->k; ++k) {
				switch (param.reg_type) {
                case NV_LR_REG_NONE:
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * param.grad_w * NV_MAT_LIST_V(dw, 0, k, n);
					}
					break;
				case NV_LR_REG_L1:
					// FOBOS L1
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * param.grad_w * NV_MAT_LIST_V(dw, 0, k, n);
					}
					for (n = 0; n < lr->n; ++n) {
						float w_i = NV_MAT_V(lr->w, k, n);
						float lambda = we * param.reg_w * (1.0f / (1.0f + epoch));
						NV_MAT_V(lr->w, k, n) = nv_sign(w_i) * NV_MAX(0.0f, (fabsf(w_i) - lambda));
					}
					break;
				case NV_LR_REG_L2:
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * (param.grad_w * (NV_MAT_LIST_V(dw, 0, k, n)
												  + param.reg_w * NV_MAT_V(lr->w, k, n)));
					}
					break;
				}
			}
		}
		if (nv_lr_progress_flag) {
			printf("nv_lr:%d: E: %E, %ldms\n",
				epoch, sum_e / (pn * step), nv_clock() - tm);
		}
		if (nv_lr_progress_flag > 1) {
			int *ok = nv_alloc_type(int, lr->k);
			int *ng = nv_alloc_type(int, lr->k);

			memset(ok, 0, sizeof(int) * lr->k);
			memset(ng, 0, sizeof(int) * lr->k);
			for (i = 0; i < data->m; ++i) {
				int predict = nv_lr_predict_label(lr, data, i);
				int teach = (int)NV_MAT_V(label, i, 0);
				if (predict == teach) {
					++ok[teach];
				} else {
					++ng[teach];
				}
			}
			for (i = 0; i < lr->k; ++i) {
				printf("%d: ok: %d, ng: %d, %f\n", i, ok[i], ng[i], (float)ok[i] / (float)(ok[i] + ng[i]));
			}
			nv_free(ok);
			nv_free(ng);
		}
		if (nv_lr_progress_flag) {
			fflush(stdout);
		}

		if (sum_e > oe) {
			er += 1.0f;
		}
		if (er >= 20.0f) {
			break;
		}
		
		if (sum_e < FLT_EPSILON) {
			break;
		}
		oe = sum_e;
	} while (param.max_epoch > ++epoch);