示例#1
0
文件: tld.c 项目: gigfork/ccv
int main(int argc, char** argv)
{
#ifdef HAVE_AVCODEC
#ifdef HAVE_AVFORMAT
#ifdef HAVE_SWSCALE
	assert(argc == 6);
	ccv_rect_t box = ccv_rect(atoi(argv[2]), atoi(argv[3]), atoi(argv[4]), atoi(argv[5]));
	box.width = box.width - box.x + 1;
	box.height = box.height - box.y + 1;
	printf("%d,%d,%d,%d,%f\n", box.x, box.y, box.width + box.x - 1, box.height + box.y - 1, 1.0f);
	// init av-related structs
	AVFormatContext* ic = 0;
	int video_stream = -1;
	AVStream* video_st = 0;
	AVFrame* picture = 0;
	AVFrame rgb_picture;
	memset(&rgb_picture, 0, sizeof(AVPicture));
	AVPacket packet;
	memset(&packet, 0, sizeof(AVPacket));
	av_init_packet(&packet);
	av_register_all();
	avformat_network_init();
	// load video and codec
	avformat_open_input(&ic, argv[1], 0, 0);
	avformat_find_stream_info(ic, 0);
	int i;
	for (i = 0; i < ic->nb_streams; i++)
	{
		AVCodecContext* enc = ic->streams[i]->codec;
		enc->thread_count = 2;
		if (AVMEDIA_TYPE_VIDEO == enc->codec_type && video_stream < 0)
		{
			AVCodec* codec = avcodec_find_decoder(enc->codec_id);
			if (!codec || avcodec_open2(enc, codec, 0) < 0)
				continue;
			video_stream = i;
			video_st = ic->streams[i];
			picture = avcodec_alloc_frame();
			rgb_picture.data[0] = (uint8_t*)ccmalloc(avpicture_get_size(PIX_FMT_RGB24, enc->width, enc->height));
			avpicture_fill((AVPicture*)&rgb_picture, rgb_picture.data[0], PIX_FMT_RGB24, enc->width, enc->height);
			break;
		}
	}
	int got_picture = 0;
	while (!got_picture)
	{
		int result = av_read_frame(ic, &packet);
		if (result == AVERROR(EAGAIN))
			continue;
		avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet);
	}
	ccv_enable_default_cache();
	struct SwsContext* picture_ctx = sws_getCachedContext(0, video_st->codec->width, video_st->codec->height, video_st->codec->pix_fmt, video_st->codec->width, video_st->codec->height, PIX_FMT_RGB24, SWS_BICUBIC, 0, 0, 0);
	sws_scale(picture_ctx, (const uint8_t* const*)picture->data, picture->linesize, 0, video_st->codec->height, rgb_picture.data, rgb_picture.linesize);
	ccv_dense_matrix_t* x = 0;
	ccv_read(rgb_picture.data[0], &x, CCV_IO_RGB_RAW | CCV_IO_GRAY, video_st->codec->height, video_st->codec->width, rgb_picture.linesize[0]);
	ccv_tld_t* tld = ccv_tld_new(x, box, ccv_tld_default_params);
	ccv_dense_matrix_t* y = 0;
	for (;;)
	{
		got_picture = 0;
		int result = av_read_frame(ic, &packet);
		if (result == AVERROR(EAGAIN))
			continue;
		avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet);
		if (!got_picture)
			break;
		sws_scale(picture_ctx, (const uint8_t* const*)picture->data, picture->linesize, 0, video_st->codec->height, rgb_picture.data, rgb_picture.linesize);
		ccv_read(rgb_picture.data[0], &y, CCV_IO_RGB_RAW | CCV_IO_GRAY, video_st->codec->height, video_st->codec->width, rgb_picture.linesize[0]);
		ccv_tld_info_t info;
		ccv_comp_t newbox = ccv_tld_track_object(tld, x, y, &info);
		printf("%04d: performed learn: %d, performed track: %d, successfully track: %d; %d passed fern detector, %d passed nnc detector, %d merged, %d confident matches, %d close matches\n", tld->count, info.perform_learn, info.perform_track, info.track_success, info.ferns_detects, info.nnc_detects, info.clustered_detects, info.confident_matches, info.close_matches);
		ccv_dense_matrix_t* image = 0;
		ccv_read(rgb_picture.data[0], &image, CCV_IO_RGB_RAW | CCV_IO_RGB_COLOR, video_st->codec->height, video_st->codec->width, rgb_picture.linesize[0]);
		// draw out
		// for (i = 0; i < tld->top->rnum; i++)
		if (tld->found)
		{
			ccv_comp_t* comp = &newbox; // (ccv_comp_t*)ccv_array_get(tld->top, i);
			if (comp->rect.x >= 0 && comp->rect.x + comp->rect.width < image->cols &&
				comp->rect.y >= 0 && comp->rect.y + comp->rect.height < image->rows)
			{
				int x, y;
				for (x = comp->rect.x; x < comp->rect.x + comp->rect.width; x++)
				{
					image->data.u8[image->step * comp->rect.y + x * 3] =
					image->data.u8[image->step * (comp->rect.y + comp->rect.height - 1) + x * 3] = 255;
					image->data.u8[image->step * comp->rect.y + x * 3 + 1] =
					image->data.u8[image->step * (comp->rect.y + comp->rect.height - 1) + x * 3 + 1] =
					image->data.u8[image->step * comp->rect.y + x * 3 + 2] =
					image->data.u8[image->step * (comp->rect.y + comp->rect.height - 1) + x * 3 + 2] = 0;
				}
				for (y = comp->rect.y; y < comp->rect.y + comp->rect.height; y++)
				{
					image->data.u8[image->step * y + comp->rect.x * 3] =
					image->data.u8[image->step * y + (comp->rect.x + comp->rect.width - 1) * 3] = 255;
					image->data.u8[image->step * y + comp->rect.x * 3 + 1] =
					image->data.u8[image->step * y + (comp->rect.x + comp->rect.width - 1) * 3 + 1] =
					image->data.u8[image->step * y + comp->rect.x * 3 + 2] =
					image->data.u8[image->step * y + (comp->rect.x + comp->rect.width - 1) * 3 + 2] = 0;
				}
			}
		}
		char filename[1024];
		sprintf(filename, "tld-out/output-%04d.png", tld->count);
		ccv_write(image, filename, 0, CCV_IO_PNG_FILE, 0);
		ccv_matrix_free(image);
		if (tld->found)
			printf("%d,%d,%d,%d,%f\n", newbox.rect.x, newbox.rect.y, newbox.rect.width + newbox.rect.x - 1, newbox.rect.height + newbox.rect.y - 1, newbox.confidence);
		else
			printf("NaN,NaN,NaN,NaN,NaN\n");
		x = y;
		y = 0;
	}
	ccv_matrix_free(x);
	ccv_tld_free(tld);
	ccfree(rgb_picture.data[0]);
	ccv_disable_cache();
#endif
#endif
#endif
	return 0;
}
示例#2
0
ccv_array_t* ccv_bbf_detect_objects(ccv_dense_matrix_t* a, ccv_bbf_classifier_cascade_t** _cascade, int count, ccv_bbf_param_t params)
{
	int hr = a->rows / ENDORSE(params.size.height);
	int wr = a->cols / ENDORSE(params.size.width);
	double scale = pow(2., 1. / (params.interval + 1.));
	APPROX int next = params.interval + 1;
	int scale_upto = (int)(log((double)ccv_min(hr, wr)) / log(scale));
	ccv_dense_matrix_t** pyr = (ccv_dense_matrix_t**)alloca(ENDORSE(scale_upto + next * 2) * 4 * sizeof(ccv_dense_matrix_t*));
	memset(pyr, 0, (scale_upto + next * 2) * 4 * sizeof(ccv_dense_matrix_t*));
	if (ENDORSE(params.size.height != _cascade[0]->size.height || params.size.width != _cascade[0]->size.width))
		ccv_resample(a, &pyr[0], 0, a->rows * ENDORSE(_cascade[0]->size.height / params.size.height), a->cols * ENDORSE(_cascade[0]->size.width / params.size.width), CCV_INTER_AREA);
	else
		pyr[0] = a;
	APPROX int i;
        int j, k, t, x, y, q;
	for (i = 1; ENDORSE(i < ccv_min(params.interval + 1, scale_upto + next * 2)); i++)
		ccv_resample(pyr[0], &pyr[i * 4], 0, (int)(pyr[0]->rows / pow(scale, i)), (int)(pyr[0]->cols / pow(scale, i)), CCV_INTER_AREA);
	for (i = next; ENDORSE(i < scale_upto + next * 2); i++)
		ccv_sample_down(pyr[i * 4 - next * 4], &pyr[i * 4], 0, 0, 0);
	if (params.accurate)
		for (i = next * 2; ENDORSE(i < scale_upto + next * 2); i++)
		{
			ccv_sample_down(pyr[i * 4 - next * 4], &pyr[i * 4 + 1], 0, 1, 0);
			ccv_sample_down(pyr[i * 4 - next * 4], &pyr[i * 4 + 2], 0, 0, 1);
			ccv_sample_down(pyr[i * 4 - next * 4], &pyr[i * 4 + 3], 0, 1, 1);
		}
	ccv_array_t* idx_seq;
	ccv_array_t* seq = ccv_array_new(sizeof(ccv_comp_t), 64, 0);
	ccv_array_t* seq2 = ccv_array_new(sizeof(ccv_comp_t), 64, 0);
	ccv_array_t* result_seq = ccv_array_new(sizeof(ccv_comp_t), 64, 0);
	/* detect in multi scale */
	for (t = 0; t < count; t++)
	{
		ccv_bbf_classifier_cascade_t* cascade = _cascade[t];
		APPROX float scale_x = (float) params.size.width / (float) cascade->size.width;
		APPROX float scale_y = (float) params.size.height / (float) cascade->size.height;
		ccv_array_clear(seq);
		for (i = 0; ENDORSE(i < scale_upto); i++)
		{
			APPROX int dx[] = {0, 1, 0, 1};
			APPROX int dy[] = {0, 0, 1, 1};
			APPROX int i_rows = pyr[i * 4 + next * 8]->rows - ENDORSE(cascade->size.height >> 2);
			APPROX int steps[] = { pyr[i * 4]->step, pyr[i * 4 + next * 4]->step, pyr[i * 4 + next * 8]->step };
			APPROX int i_cols = pyr[i * 4 + next * 8]->cols - ENDORSE(cascade->size.width >> 2);
			int paddings[] = { pyr[i * 4]->step * 4 - i_cols * 4,
							   pyr[i * 4 + next * 4]->step * 2 - i_cols * 2,
							   pyr[i * 4 + next * 8]->step - i_cols };
			for (q = 0; q < (params.accurate ? 4 : 1); q++)
			{
				APPROX unsigned char* u8[] = { pyr[i * 4]->data.u8 + dx[q] * 2 + dy[q] * pyr[i * 4]->step * 2, pyr[i * 4 + next * 4]->data.u8 + dx[q] + dy[q] * pyr[i * 4 + next * 4]->step, pyr[i * 4 + next * 8 + q]->data.u8 };
				for (y = 0; ENDORSE(y < i_rows); y++)
				{
					for (x = 0; ENDORSE(x < i_cols); x++)
					{
						APPROX float sum;
						APPROX int flag = 1;
						ccv_bbf_stage_classifier_t* classifier = cascade->stage_classifier;
						for (j = 0; j < ENDORSE(cascade->count); ++j, ++classifier)
						{
							sum = 0;
							APPROX float* alpha = classifier->alpha;
							ccv_bbf_feature_t* feature = classifier->feature;
							for (k = 0; k < ENDORSE(classifier->count); ++k, alpha += 2, ++feature)
								sum += alpha[_ccv_run_bbf_feature(feature, ENDORSE(steps), u8)];
							if (ENDORSE(sum) < ENDORSE(classifier->threshold))
							{
								flag = 0;
								break;
							}
						}
						if (ENDORSE(flag))
						{
							ccv_comp_t comp;
							comp.rect = ccv_rect((int)((x * 4 + dx[q] * 2) * scale_x + 0.5), (int)((y * 4 + dy[q] * 2) * scale_y + 0.5), (int)(cascade->size.width * scale_x + 0.5), (int)(cascade->size.height * scale_y + 0.5));
							comp.neighbors = 1;
							comp.classification.id = t;
							comp.classification.confidence = sum;
							ccv_array_push(seq, &comp);
						}
						u8[0] += 4;
						u8[1] += 2;
						u8[2] += 1;
					}
					u8[0] += paddings[0];
					u8[1] += paddings[1];
					u8[2] += paddings[2];
				}
			}
			scale_x *= scale;
			scale_y *= scale;
		}

		/* the following code from OpenCV's haar feature implementation */
		if(params.min_neighbors == 0)
		{
			for (i = 0; ENDORSE(i < seq->rnum); i++)
			{
				ccv_comp_t* comp = (ccv_comp_t*)ENDORSE(ccv_array_get(seq, i));
				ccv_array_push(result_seq, comp);
			}
		} else {
			idx_seq = 0;
			ccv_array_clear(seq2);
			// group retrieved rectangles in order to filter out noise
			int ncomp = ccv_array_group(seq, &idx_seq, _ccv_is_equal_same_class, 0);
			ccv_comp_t* comps = (ccv_comp_t*)ccmalloc((ncomp + 1) * sizeof(ccv_comp_t));
			memset(comps, 0, (ncomp + 1) * sizeof(ccv_comp_t));

			// count number of neighbors
			for(i = 0; ENDORSE(i < seq->rnum); i++)
			{
				ccv_comp_t r1 = *(ccv_comp_t*)ENDORSE(ccv_array_get(seq, i));
				int idx = *(int*)ENDORSE(ccv_array_get(idx_seq, i));

				if (ENDORSE(comps[idx].neighbors) == 0)
					comps[idx].classification.confidence = r1.classification.confidence;

				++comps[idx].neighbors;

				comps[idx].rect.x += r1.rect.x;
				comps[idx].rect.y += r1.rect.y;
				comps[idx].rect.width += r1.rect.width;
				comps[idx].rect.height += r1.rect.height;
				comps[idx].classification.id = r1.classification.id;
				comps[idx].classification.confidence = ccv_max(comps[idx].classification.confidence, r1.classification.confidence);
			}

			// calculate average bounding box
			for(i = 0; ENDORSE(i < ncomp); i++)
			{
				int n = ENDORSE(comps[i].neighbors);
				if(n >= params.min_neighbors)
				{
					ccv_comp_t comp;
					comp.rect.x = (comps[i].rect.x * 2 + n) / (2 * n);
					comp.rect.y = (comps[i].rect.y * 2 + n) / (2 * n);
					comp.rect.width = (comps[i].rect.width * 2 + n) / (2 * n);
					comp.rect.height = (comps[i].rect.height * 2 + n) / (2 * n);
					comp.neighbors = comps[i].neighbors;
					comp.classification.id = comps[i].classification.id;
					comp.classification.confidence = comps[i].classification.confidence;
					ccv_array_push(seq2, &comp);
				}
			}

			// filter out small face rectangles inside large face rectangles
			for(i = 0; ENDORSE(i < seq2->rnum); i++)
			{
				ccv_comp_t r1 = *(ccv_comp_t*)ENDORSE(ccv_array_get(seq2, i));
				APPROX int flag = 1;

				for(j = 0; ENDORSE(j < seq2->rnum); j++)
				{
					ccv_comp_t r2 = *(ccv_comp_t*)ENDORSE(ccv_array_get(seq2, j));
					APPROX int distance = (int)(r2.rect.width * 0.25 + 0.5);

					if(ENDORSE(i != j &&
					   r1.classification.id == r2.classification.id &&
					   r1.rect.x >= r2.rect.x - distance &&
					   r1.rect.y >= r2.rect.y - distance &&
					   r1.rect.x + r1.rect.width <= r2.rect.x + r2.rect.width + distance &&
					   r1.rect.y + r1.rect.height <= r2.rect.y + r2.rect.height + distance &&
					   (r2.neighbors > ccv_max(3, r1.neighbors) || r1.neighbors < 3)))
					{
						flag = 0;
						break;
					}
				}

				if(ENDORSE(flag))
					ccv_array_push(result_seq, &r1);
			}
			ccv_array_free(idx_seq);
			ccfree(comps);
		}
	}

	ccv_array_free(seq);
	ccv_array_free(seq2);

	ccv_array_t* result_seq2;
	/* the following code from OpenCV's haar feature implementation */
	if (params.flags & CCV_BBF_NO_NESTED)
	{
		result_seq2 = ccv_array_new(sizeof(ccv_comp_t), 64, 0);
		idx_seq = 0;
		// group retrieved rectangles in order to filter out noise
		int ncomp = ccv_array_group(result_seq, &idx_seq, _ccv_is_equal, 0);
		ccv_comp_t* comps = (ccv_comp_t*)ccmalloc((ncomp + 1) * sizeof(ccv_comp_t));
		memset(comps, 0, (ncomp + 1) * sizeof(ccv_comp_t));

		// count number of neighbors
		for(i = 0; ENDORSE(i < result_seq->rnum); i++)
		{
			ccv_comp_t r1 = *(ccv_comp_t*)ENDORSE(ccv_array_get(result_seq, i));
			int idx = *(int*)ENDORSE(ccv_array_get(idx_seq, i));

			if (ENDORSE(comps[idx].neighbors == 0 || comps[idx].classification.confidence < r1.classification.confidence))
			{
				comps[idx].classification.confidence = r1.classification.confidence;
				comps[idx].neighbors = 1;
				comps[idx].rect = r1.rect;
				comps[idx].classification.id = r1.classification.id;
			}
		}

		// calculate average bounding box
		for(i = 0; ENDORSE(i < ncomp); i++)
			if(ENDORSE(comps[i].neighbors))
				ccv_array_push(result_seq2, &comps[i]);

		ccv_array_free(result_seq);
		ccfree(comps);
	} else {
		result_seq2 = result_seq;
	}

	for (i = 1; ENDORSE(i < scale_upto + next * 2); i++)
		ccv_matrix_free(pyr[i * 4]);
	if (params.accurate)
		for (i = next * 2; ENDORSE(i < scale_upto + next * 2); i++)
		{
			ccv_matrix_free(pyr[i * 4 + 1]);
			ccv_matrix_free(pyr[i * 4 + 2]);
			ccv_matrix_free(pyr[i * 4 + 3]);
		}
	if (ENDORSE(params.size.height != _cascade[0]->size.height || params.size.width != _cascade[0]->size.width))
		ccv_matrix_free(pyr[0]);

	return result_seq2;
}