Ejemplo n.º 1
0
static nv_matrix_t *
conv_image2vec(const nv_bgseg_t *bg,
			   const nv_matrix_t *image)
{
	nv_matrix_t *vec;
	nv_matrix_t *smooth;
	nv_matrix_t *resize = NULL, *gray = NULL;
	int i;
	float scale = (float)bg->size / (float)NV_MAX(image->rows, image->cols);

	if (scale != 1.0f) {
		resize = nv_matrix3d_alloc(image->n,
								   NV_ROUND_INT(image->rows * scale),
								   NV_ROUND_INT(image->cols * scale));
		nv_resize(resize, image);
		image = resize;
	}
	if (image->n != 1) {
		gray = nv_matrix3d_alloc(1, image->rows, image->cols);
		nv_gray(gray, image);
		image = gray;
	}
	vec = nv_matrix_alloc(image->rows * image->cols, 1);
	smooth = nv_matrix_clone(image);
	nv_gaussian5x5(smooth, 0, image, 0);

	for (i = 0; i < image->m; ++i) {
		NV_MAT_V(vec, 0, i) = NV_MAT_V(smooth, i, 0);
	}
	nv_matrix_free(&smooth);
	nv_matrix_free(&gray);
	nv_matrix_free(&resize);
	
	return vec;
}
Ejemplo n.º 2
0
nv_bgseg_t *
nv_bgseg_alloc(int frame_rows, int frame_cols,
			   float zeta, float bg_v, float fg_v,
			   int size
	)
{
	nv_bgseg_t *bg = nv_alloc_type(nv_bgseg_t, 1);
	float scale = (float)size / (float)NV_MAX(frame_rows, frame_cols);
	
	bg->init_1st = 0;
	bg->init_2nd = 0;
	bg->init_1st_finished = 0;
	bg->init_2nd_finished = 0;
	bg->frame_rows = frame_rows;
	bg->frame_cols = frame_cols;
	bg->rows = NV_ROUND_INT(frame_rows * scale);
	bg->cols = NV_ROUND_INT(frame_cols * scale);
	
	bg->zeta = zeta;
	bg->bg_v = bg_v;
	bg->fg_v = fg_v;
	bg->size = size;
	
	bg->av = nv_matrix_alloc(1 * bg->rows * bg->cols, 1);
	nv_matrix_zero(bg->av);
	bg->sgm = nv_matrix_dup(bg->av);
	
	return bg;
}
static float nv_face_haarlike_diagonal_filter(int type,
	const nv_matrix_t *sum,
	int px, int py,
	float xscale, float yscale)
{
	int i = 0;
	float p = 0.0f, p1 = 0.0f, p2 = 0.0f;
	int area1 = 0, area2 = 0;
	int ystep = NV_ROUND_INT(yscale);
	int xstep = NV_ROUND_INT(xscale);

	if (type == 1) {
		// |\|
		for (i = 0; i < 7; ++i) {
			int ppx = px + NV_ROUND_INT((1.0f + i) * xscale);
			int ppy = py + NV_ROUND_INT(i * yscale);
			int eex = px + NV_ROUND_INT(8.0f * xscale);
			int eey = py + NV_ROUND_INT((i + 1) * yscale);

			//printf("p1: %d, %d, %d, %d\n", 1+i,8,i,i+1);

			p1 += NV_INTEGRAL_V(sum, ppx, ppy, eex, eey);
			area1 += (eex - ppx) * (eey - ppy);	
		}
		for (i = 1; i < 8; ++i) {
			int ppx = px;
			int ppy = py + NV_ROUND_INT(i * yscale);
			int eex = px + NV_ROUND_INT(i * xscale);
			int eey = py + NV_ROUND_INT((i + 1) * yscale);

			//printf("p2: %d, %d, %d, %d\n", 0,i,i,i+1);
			p2 += NV_INTEGRAL_V(sum, ppx, ppy, eex, eey);
			area2 += (eex - ppx) * (eey - ppy);	
		}
		p = p1 / (area1 * 255.0f) - p2 / (area2 * 255.0f);
	} else {
		// |/|
		for (i = 0; i < 7; ++i) {
			int ppx = px;
			int ppy = py + NV_ROUND_INT(i * yscale);
			int eex = px + NV_ROUND_INT((7.0f - i) * xscale);
			int eey = py + NV_ROUND_INT((i + 1) * yscale);

			//printf("p1: %d, %d, %d, %d\n", 0, 7-i, i, i+1);

			p1 += NV_INTEGRAL_V(sum, ppx, ppy, eex, eey);
			area1 += (eex - ppx) * (eey - ppy);	
		}
		for (i = 1; i < 8; ++i) {
			int ppx = px + NV_ROUND_INT((8.0f - i) * xscale);
			int ppy = py + NV_ROUND_INT(i * yscale);
			int eex = px + NV_ROUND_INT(8.0f * xscale);
			int eey = py + NV_ROUND_INT((i + 1) * yscale);

			//printf("p2: %d, %d, %d, %d\n", 8-i, 8, i, i+1);

			p2 += NV_INTEGRAL_V(sum, ppx, ppy, eex, eey);
			area2 += (eex - ppx) * (eey - ppy);	
		}
		p = p1 / (area1 * 255.0f) - p2 / (area2 * 255.0f);
	}

	return p;
}
void nv_face_haarlike(nv_face_haarlike_normalize_e normalize_type,
	nv_matrix_t *feature, 
	int feature_m,
	const nv_matrix_t *sum,
	int x, int y, int width, int height)
{
	int ix, iy, n;
	float v, vmax, vmin;
	float xscale = width / 32.0f;
	float yscale = height / 32.0f;
	float ystep = yscale;
	float xstep = xscale;
	int hystep = (32 - 8) / 2 * 8;
	int sy = NV_ROUND_INT(4.0f * ystep);
	int sx = NV_ROUND_INT(4.0f * xstep);
	int hy, hx;

	nv_vector_zero(feature, feature_m);

	// level1
#ifdef _OPENMP
	//#pragma omp parallel for private(ix)
#endif
	for (iy = 0, hy = 0; iy < 32-8; iy += 2, ++hy) {
		int py = y + NV_ROUND_INT(ystep * iy);
		int ey = py + NV_ROUND_INT(8.0f * ystep);
		const float pty = (ey - py) * 255.0f;
		for (ix = 0, hx = 0; ix < 32-8; ix += 2, ++hx) {
			int px = x + NV_ROUND_INT(xstep * ix);
			int ex = px + NV_ROUND_INT(8.0f * xstep);
			float p1, p2, area, ptx;

			// 全エリア
			area = NV_MAT3D_V(sum, ey, ex, 0)
				- NV_MAT3D_V(sum, ey, px, 0)
				- (NV_MAT3D_V(sum, py, ex, 0) - NV_MAT3D_V(sum, py, px, 0));

			// 1
			// [+]
			// [-]
			p1 = NV_MAT3D_V(sum, py + sy, ex, 0)
				- NV_MAT3D_V(sum, py + sy, px, 0)
				- (NV_MAT3D_V(sum, py, ex, 0) - NV_MAT3D_V(sum, py, px, 0));
			p2 = area - p1;
			ptx = (ex - px) * 255.0f;
			p1 /= ((py + sy) - py) * ptx;
			p2 /= (ey - (py + sy)) * ptx;
			if (p1 > p2) {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 0) = p1 - p2;
			} else {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 1) = p2 - p1;
			}

			// 2
			// [+][-]
			p1 = NV_MAT3D_V(sum, ey, px + sx, 0)
				- NV_MAT3D_V(sum, ey, px, 0)
				- (NV_MAT3D_V(sum, py, px + sx, 0) - NV_MAT3D_V(sum, py, px, 0));
			p2 = area - p1;
			p1 /= ((px + sx) - px) * pty;
			p2 /= (ex - (px + sx)) * pty;
			if (p1 > p2) {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 2) = p1 - p2;
			} else {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 3) = p2 - p1;
			}

			// 3
			p1 = nv_face_haarlike_diagonal_filter(1, sum, px, py, xscale, yscale);
			if (p1 > 0.0f) {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 4) = p1;
			} else {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 5) = -p1;
			}

			// 4
			p1 = nv_face_haarlike_diagonal_filter(2, sum, px, py, xscale, yscale);
			if (p1 > 0.0f) {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 6) = p1;
			} else {
				NV_MAT_V(feature, feature_m, hy * hystep + hx * 8 + 7) = -p1;
			}
		}
	}

	// 正規化
	switch (normalize_type) {
	case NV_NORMALIZE_MAX:
		// Maximum=1.0
		vmax = 0.0f;
		vmin = FLT_MAX;
		for (n = 0; n < feature->n; ++n) {
			if (NV_MAT_V(feature, feature_m, n) > vmax) {
				vmax = NV_MAT_V(feature, feature_m, n);
			}
			if (NV_MAT_V(feature, feature_m, n) != 0.0f
				&& NV_MAT_V(feature, feature_m, n) < vmin) 
			{
				vmin = NV_MAT_V(feature, feature_m, n);
			}
		}
		if (vmax != 0.0f && vmax > vmin) {
			v = 1.0f / (vmax - vmin);
			for (n = 0; n < feature->n; ++n) {
				if (NV_MAT_V(feature, feature_m, n) != 0.0f) {
					NV_MAT_V(feature, feature_m, n) = (NV_MAT_V(feature, feature_m, n) - vmin) * v;
				}
			}
		}
		break;
	case NV_NORMALIZE_NORM:
		// Vector Norm=1.0
		v = 0.0f;
		for (n = 0; n < feature->n; ++n) {
			v += NV_MAT_V(feature, feature_m, n) * NV_MAT_V(feature, feature_m, n);
		}
		if (v != 0.0) {
			v = 1.0f / sqrtf(v);
			for (n = 0; n < feature->n; ++n) {
				NV_MAT_V(feature, feature_m, n) *= v;
			}
		}
		break;
	case NV_NORMALIZE_NONE:
	default:
		break;
	}
}
Ejemplo n.º 5
0
void 
nv_lr_train(nv_lr_t *lr,
			const nv_matrix_t *data, const nv_matrix_t *label,
			nv_lr_param_t param)
{
	int m, n, i, j, k, l;
	long tm, tm_all = nv_clock();
	float oe = FLT_MAX, er = 1.0f, we;
	float sum_e = 0.0f;
	int epoch = 0;
	int pn = (data->m > 256) ? 128:1;
	int step = data->m / (pn);
	int threads = nv_omp_procs();
	nv_matrix_t *y = nv_matrix_alloc(lr->k, threads);
	nv_matrix_t *t = nv_matrix_alloc(lr->k, threads);
	nv_matrix_t *dw = nv_matrix_list_alloc(lr->n, lr->k, threads);
	nv_matrix_t *count = nv_matrix_alloc(lr->k, 1);
	nv_matrix_t *label_weight = nv_matrix_alloc(lr->k, 1);
	float count_max_log;
	
	nv_matrix_zero(count);
	nv_matrix_fill(label_weight, 1.0f);
	if (param.auto_balance) {
		/* クラスごとに数が違う場合に更新重みをスケーリングする */
		for (m = 0; m < data->m; ++m) {
			NV_MAT_V(count, 0, (int)NV_MAT_V(label, m, 0)) += 1.0f;
		}
		count_max_log = logf(3.0f + NV_MAT_V(count, 0, nv_vector_max_n(count, 0)));
		for (n = 0; n < count->n; ++n) {
			if (NV_MAT_V(count, 0, n) > 0.0f) {
				float count_log = logf(3.0f + NV_MAT_V(count, 0, n));
				NV_MAT_V(label_weight, 0, n) = 
					powf(count_max_log, NV_LR_CLASS_COUNT_PENALTY_EXP) 
					/ powf(count_log, NV_LR_CLASS_COUNT_PENALTY_EXP);
			} else {
				NV_MAT_V(label_weight, 0, n) = 1.0f;
			}
		}
	}
	do {
		we = 1.0f / er;
		tm = nv_clock();
		sum_e = 0.0f;

		for (m = 0; m < step; ++m) {
			nv_matrix_zero(dw);

#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 4) reduction(+:sum_e) num_threads(threads) 
#endif
			for (i = 0; i < pn; ++i) {
				int rand_m = NV_ROUND_INT((data->m - 1) * nv_rand());
				int thread_num = nv_omp_thread_id();
				int label_i = (int)NV_MAT_V(label, rand_m, 0);
				float weight = NV_MAT_V(label_weight, 0, label_i);
				float yp;

				nv_vector_zero(t, thread_num);
				NV_MAT_V(t, thread_num, label_i) = 1.0f;
				nv_lr_predict_vector(lr, y, thread_num, data, rand_m);
				yp = NV_MAT_V(y, thread_num, (int)NV_MAT_V(label, rand_m, 0));
				
				if (yp < 1.0 - NV_LR_MARGIN) {
					nv_lr_dw(lr, weight, dw, thread_num, data, rand_m, t, thread_num, y, thread_num);
					sum_e += nv_lr_error(t, thread_num, y, thread_num);
				}
			}

			for (l = 1; l < threads; ++l) {
				for (j = 0; j < dw->m; ++j) {
					for (i = 0; i < dw->n; ++i) {
						NV_MAT_LIST_V(dw, 0, j, i) += NV_MAT_LIST_V(dw, l, j, i);
					}
				}
			}
#ifdef _OPENMP
#pragma omp parallel for private(n)  num_threads(threads) if (lr->k > 32)
#endif
			for (k = 0; k < lr->k; ++k) {
				switch (param.reg_type) {
                case NV_LR_REG_NONE:
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * param.grad_w * NV_MAT_LIST_V(dw, 0, k, n);
					}
					break;
				case NV_LR_REG_L1:
					// FOBOS L1
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * param.grad_w * NV_MAT_LIST_V(dw, 0, k, n);
					}
					for (n = 0; n < lr->n; ++n) {
						float w_i = NV_MAT_V(lr->w, k, n);
						float lambda = we * param.reg_w * (1.0f / (1.0f + epoch));
						NV_MAT_V(lr->w, k, n) = nv_sign(w_i) * NV_MAX(0.0f, (fabsf(w_i) - lambda));
					}
					break;
				case NV_LR_REG_L2:
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * (param.grad_w * (NV_MAT_LIST_V(dw, 0, k, n)
												  + param.reg_w * NV_MAT_V(lr->w, k, n)));
					}
					break;
				}
			}
		}
		if (nv_lr_progress_flag) {
			printf("nv_lr:%d: E: %E, %ldms\n",
				epoch, sum_e / (pn * step), nv_clock() - tm);
		}
		if (nv_lr_progress_flag > 1) {
			int *ok = nv_alloc_type(int, lr->k);
			int *ng = nv_alloc_type(int, lr->k);

			memset(ok, 0, sizeof(int) * lr->k);
			memset(ng, 0, sizeof(int) * lr->k);
			for (i = 0; i < data->m; ++i) {
				int predict = nv_lr_predict_label(lr, data, i);
				int teach = (int)NV_MAT_V(label, i, 0);
				if (predict == teach) {
					++ok[teach];
				} else {
					++ng[teach];
				}
			}
			for (i = 0; i < lr->k; ++i) {
				printf("%d: ok: %d, ng: %d, %f\n", i, ok[i], ng[i], (float)ok[i] / (float)(ok[i] + ng[i]));
			}
			nv_free(ok);
			nv_free(ng);
		}
		if (nv_lr_progress_flag) {
			fflush(stdout);
		}

		if (sum_e > oe) {
			er += 1.0f;
		}
		if (er >= 20.0f) {
			break;
		}
		
		if (sum_e < FLT_EPSILON) {
			break;
		}
		oe = sum_e;
	} while (param.max_epoch > ++epoch);