void StatisticsVector<T>::histogram(std::vector<dof_id_type>& bin_members, unsigned int n_bins) { // Must have at least 1 bin libmesh_assert (n_bins>0); const dof_id_type n = this->size(); std::sort(this->begin(), this->end()); // The StatisticsVector can hold both integer and float types. // We will define all the bins, etc. using Reals. Real min = static_cast<Real>(this->minimum()); Real max = static_cast<Real>(this->maximum()); Real bin_size = (max - min) / static_cast<Real>(n_bins); START_LOG ("histogram()", "StatisticsVector"); std::vector<Real> bin_bounds(n_bins+1); for (unsigned int i=0; i<bin_bounds.size(); i++) bin_bounds[i] = min + i * bin_size; // Give the last bin boundary a little wiggle room: we don't want // it to be just barely less than the max, otherwise our bin test below // may fail. bin_bounds.back() += 1.e-6 * bin_size; // This vector will store the number of members each bin has. bin_members.resize(n_bins); dof_id_type data_index = 0; for (unsigned int j=0; j<bin_members.size(); j++) // bin vector indexing { // libMesh::out << "(debug) Filling bin " << j << std::endl; for (dof_id_type i=data_index; i<n; i++) // data vector indexing { // libMesh::out << "(debug) Processing index=" << i << std::endl; Real current_val = static_cast<Real>( (*this)[i] ); // There may be entries in the vector smaller than the value // reported by this->minimum(). (e.g. inactive elements in an // ErrorVector.) We just skip entries like that. if ( current_val < min ) { // libMesh::out << "(debug) Skipping entry v[" << i << "]=" // << (*this)[i] // << " which is less than the min value: min=" // << min << std::endl; continue; } if ( current_val > bin_bounds[j+1] ) // if outside the current bin (bin[j] is bounded // by bin_bounds[j] and bin_bounds[j+1]) { // libMesh::out.precision(16); // libMesh::out.setf(std::ios_base::fixed); // libMesh::out << "(debug) (*this)[i]= " << (*this)[i] // << " is greater than bin_bounds[j+1]=" // << bin_bounds[j+1] << std::endl; data_index = i; // start searching here for next bin break; // go to next bin } // Otherwise, increment current bin's count bin_members[j]++; // libMesh::out << "(debug) Binned index=" << i << std::endl; } } #ifdef DEBUG // Check the number of binned entries const dof_id_type n_binned = std::accumulate(bin_members.begin(), bin_members.end(), static_cast<dof_id_type>(0), std::plus<dof_id_type>()); if (n != n_binned) { libMesh::out << "Warning: The number of binned entries, n_binned=" << n_binned << ", did not match the total number of entries, n=" << n << "." << std::endl; //libmesh_error(); } #endif STOP_LOG ("histogram()", "StatisticsVector"); }
void BinSorter<KeyType,IdxType>::binsort (const IdxType nbins, KeyType max, KeyType min) { libmesh_assert_less (min, max); // Build a histogram in parallel from our data. // Use this to create quasi-uniform bins. Parallel::Histogram<KeyType,IdxType> phist (this->comm(), data); phist.make_histogram (nbins*50, max, min); phist.build_histogram (); const std::vector<IdxType>& histogram = phist.get_histogram(); // Now we will locate the bin boundaries so // that each bin is roughly equal size { // Find the total size of the data set IdxType local_data_size = libmesh_cast_int<IdxType>(data.size()); IdxType global_data_size = libmesh_cast_int<IdxType>(local_data_size); this->comm().sum(global_data_size); std::vector<IdxType> target_bin_size (nbins, global_data_size / nbins); // Equally distribute the remainder for (IdxType i=0; i<(global_data_size % nbins); i++) ++target_bin_size[i]; // Set the iterators corresponding to the bin boundaries { std::vector<double> bin_bounds (nbins+1); bin_iters.resize (nbins+1, data.begin()); // Set the minimum bin boundary iterator bin_iters[0] = data.begin(); bin_bounds[0] = Parallel::Utils::to_double(min); // The current location in the histogram IdxType current_histogram_bin = 0; // How much above (+) or below (-) we are from the // target size for the last bin. // Note that when delta is (-) we will // accept a slightly larger size for the next bin, // the goal being to keep the whole mess average int delta = 0; // Set the internal bin boundary iterators for (IdxType b=0; b<nbins; ++b) { // The size of bin b. We want this to // be ~= target_bin_size[b] int current_bin_size = 0; // Step through the histogram until we have the // desired bin size while ((current_bin_size + histogram[current_histogram_bin] + delta) <= target_bin_size[b]) { // Don't index out of the histogram! if ((current_histogram_bin+1) == phist.n_bins()) break; current_bin_size += histogram[current_histogram_bin++]; } delta += current_bin_size - target_bin_size[b]; // Set the upper bound of the bin bin_bounds[b+1] = phist.upper_bound (current_histogram_bin); bin_iters[b+1] = std::lower_bound(bin_iters[b], data.end(), Parallel::Utils::to_key_type<KeyType>(bin_bounds[b+1])); } // Just be sure the last boundaries point to the right place bin_iters[nbins] = data.end(); bin_bounds[nbins] = Parallel::Utils::to_double(max); } } }