vector<vector<int>> random_shuffer_dataset_splitter ::split_impl(const dataset& data) const { vector<vector<int>> batch_ids(batch_num); int sample_num = data.get_sample_num(); vector<int> temp(sample_num); for (int i = 0;i<sample_num;i++) temp[i] = i; std::random_shuffle ( temp.begin(), temp.end() ); int batch_size = ceil(float(sample_num)/batch_num); for (int i = 0;i<batch_num;i++) { int cur_batch_size = batch_size; if (i == batch_num-1) cur_batch_size = sample_num - (batch_num-1)*batch_size; vector<int> cur_batch_id(cur_batch_size); for (int j = 0;j<cur_batch_size;j++) cur_batch_id[j] = temp[i*batch_size + j]; batch_ids[i] = cur_batch_id; } return batch_ids; }
vector<vector<int>> random_shuffer_ratio_splitter ::split_impl(const dataset& data) const { vector<NumericType> percent(ratio); NumericType total = std::accumulate(ratio.begin(),ratio.end(),0); BOOST_FOREACH(NumericType & x,percent){ x = x/total; } // std::transform(percent.begin(),percent.end(),percent.begin(),[total](NumericType val){return val/total;}); vector<vector<int>> group_ids(percent.size()); int sample_num = data.get_sample_num(); vector<int> temp; std::copy( boost::counting_iterator<unsigned int>(0), boost::counting_iterator<unsigned int>(sample_num), std::back_inserter(temp)); std::random_shuffle ( temp.begin(), temp.end() ); vector<int>::iterator cur_begin_iter = temp.begin(); for (int i = 0;i<percent.size();i++) { int cur_group_size = floor(sample_num * percent[i]); vector<int>::iterator cur_end_iter = cur_begin_iter + cur_group_size; if (i == percent.size()-1) { cur_end_iter = temp.end(); cur_group_size = cur_end_iter - cur_begin_iter; } vector<int> cur_group_id(cur_group_size); copy(cur_begin_iter,cur_end_iter, cur_group_id.begin()); cur_begin_iter = cur_end_iter; group_ids[i] = cur_group_id; } return group_ids; }
vector<vector<int>> ordered_dataset_splitter ::split_impl(const dataset& data) const { vector<vector<int>> batch_ids(batch_num); int sample_num = data.get_sample_num(); int batch_size = ceil(float(sample_num)/batch_num); for (int i = 0;i<batch_num;i++) { int cur_batch_size = batch_size; if (i == batch_num-1) cur_batch_size = sample_num - (batch_num-1)*batch_size; vector<int> cur_batch_id(cur_batch_size); for (int j = 0;j<cur_batch_size;j++) cur_batch_id[j] = i*batch_size + j; batch_ids.push_back(cur_batch_id); } return batch_ids; }