예제 #1
0
vector<vector<int>> random_shuffer_dataset_splitter ::split_impl(const dataset& data) const
{
	vector<vector<int>> batch_ids(batch_num);

	int sample_num = data.get_sample_num();
	vector<int> temp(sample_num);
	for (int i = 0;i<sample_num;i++)
		temp[i] = i;

	std::random_shuffle ( temp.begin(), temp.end() );


	int batch_size = ceil(float(sample_num)/batch_num);

	for (int i = 0;i<batch_num;i++)
	{
		int cur_batch_size = batch_size;
		if (i == batch_num-1)
			cur_batch_size = sample_num - (batch_num-1)*batch_size;
		vector<int> cur_batch_id(cur_batch_size);

		for (int j = 0;j<cur_batch_size;j++)
			cur_batch_id[j] = temp[i*batch_size + j];

		batch_ids[i] = cur_batch_id;

	}

	return batch_ids;
}
예제 #2
0
vector<vector<int>> random_shuffer_ratio_splitter ::split_impl(const dataset& data) const
{
	vector<NumericType> percent(ratio);
	
	NumericType total = std::accumulate(ratio.begin(),ratio.end(),0);

	BOOST_FOREACH(NumericType & x,percent){ x = x/total; }
//	std::transform(percent.begin(),percent.end(),percent.begin(),[total](NumericType val){return val/total;});

	vector<vector<int>> group_ids(percent.size());

	int sample_num = data.get_sample_num();
	vector<int> temp;

	std::copy(
		boost::counting_iterator<unsigned int>(0),
		boost::counting_iterator<unsigned int>(sample_num), 
		std::back_inserter(temp));

	std::random_shuffle ( temp.begin(), temp.end() );

	vector<int>::iterator cur_begin_iter = temp.begin();
	for (int i = 0;i<percent.size();i++)
	{
		int cur_group_size = floor(sample_num * percent[i]);
		vector<int>::iterator  cur_end_iter = cur_begin_iter + cur_group_size;
		if (i == percent.size()-1)
		{
			cur_end_iter = temp.end();
			cur_group_size = cur_end_iter - cur_begin_iter;
		}
		vector<int> cur_group_id(cur_group_size);

		copy(cur_begin_iter,cur_end_iter, cur_group_id.begin());

		cur_begin_iter = cur_end_iter;

		group_ids[i] = cur_group_id;

	}

	return group_ids;
	
}
예제 #3
0
vector<vector<int>> ordered_dataset_splitter ::split_impl(const dataset& data) const
{
	vector<vector<int>> batch_ids(batch_num);
	int sample_num = data.get_sample_num();
	int batch_size = ceil(float(sample_num)/batch_num);

	for (int i = 0;i<batch_num;i++)
	{
		int cur_batch_size = batch_size;
		if (i == batch_num-1)
			cur_batch_size = sample_num - (batch_num-1)*batch_size;
		vector<int> cur_batch_id(cur_batch_size);

		for (int j = 0;j<cur_batch_size;j++)
			cur_batch_id[j] = i*batch_size + j;

		batch_ids.push_back(cur_batch_id);

	}

	return batch_ids;
}