void SiteCounts::add_sequence( std::string const& sites, CountsIntType weight ) { if( num_seqs_ >= std::numeric_limits< CountsIntType >::max() - weight ) { throw std::runtime_error( "Cannot add Sequence to SiteCounts as it might lead to an overflow in the counts." ); } if( sites.size() != counts_.rows() ) { throw std::runtime_error( "Cannot add Sequence to SiteCounts if it has different number of sites: Expected " + std::to_string( counts_.rows() ) + " sites, but sequence has " + std::to_string( sites.size() ) + " sites." ); } for( size_t site_idx = 0; site_idx < sites.size(); ++site_idx ) { // Get the index of the char. If not found, this char is not to be counted, so continue. auto char_idx = lookup_[ static_cast< size_t >( sites[ site_idx ] ) ]; if( char_idx == characters_.size() ) { continue; } // Increase the count at that index. counts_( site_idx, char_idx ) += weight; } // We finished a sequence. Add to the counter. num_seqs_ += weight; }
void DiscreteFrustum::interpolatedUndistort(double* z) const { int idx = index(*z); double start = bin_depth_ * idx; int idx1; if(*z - start < bin_depth_ / 2) idx1 = idx; else idx1 = idx + 1; int idx0 = idx1 - 1; if(idx0 < 0 || idx1 >= num_bins_ || counts_(idx0) < 50 || counts_(idx1) < 50) { undistort(z); return; } double z0 = (idx0 + 1) * bin_depth_ - bin_depth_ * 0.5; double coeff1 = (*z - z0) / bin_depth_; double coeff0 = 1.0 - coeff1; double mult = coeff0 * multipliers_.coeffRef(idx0) + coeff1 * multipliers_.coeffRef(idx1); *z *= mult; }
void DiscreteFrustum::addExample(double ground_truth, double measurement) { double mult = ground_truth / measurement; if(mult > MAX_MULT || mult < MIN_MULT) return; int idx = min(num_bins_ - 1, (int)floor(measurement / bin_depth_)); UASSERT(idx >= 0); total_numerators_(idx) += ground_truth * ground_truth; total_denominators_(idx) += ground_truth * measurement; ++counts_(idx); multipliers_(idx) = total_numerators_(idx) / total_denominators_(idx); }
void DiscreteFrustum::addExample(double ground_truth, double measurement) { boost::unique_lock<boost::shared_mutex> ul(shared_mutex_); double mult = ground_truth / measurement; if(mult > MAX_MULT || mult < MIN_MULT) return; int idx = min(num_bins_ - 1, (int)floor(measurement / bin_depth_)); assert(idx >= 0); total_numerators_(idx) += ground_truth * ground_truth; total_denominators_(idx) += ground_truth * measurement; ++counts_(idx); multipliers_(idx) = total_numerators_(idx) / total_denominators_(idx); }
SiteCounts::CountsIntType SiteCounts::count_of( char character, size_t site_index ) const { if( site_index >= length() ) { throw std::runtime_error( "Invalid site index for retrieving count: " + std::to_string( site_index ) + "." ); } auto char_idx = lookup_[ character ]; if( char_idx == characters_.size() ) { throw std::runtime_error( "Invalid character for retrieving count: '" + std::string( 1, character ) + "'." ); } return counts_( site_index, char_idx ); }
SiteCounts::CountsIntType SiteCounts::count_at( size_t character_index, size_t site_index ) const { if( site_index >= counts_.rows() ) { throw std::runtime_error( "Invalid site index for retrieving count: " + std::to_string( site_index ) + "." ); } if( character_index > counts_.cols() ) { throw std::runtime_error( "Invalid character index for retrieving count: " + std::to_string( character_index ) + "." ); } return counts_( site_index, character_index ); }
//! Processes a single weighted data point. void process(const uint_vector& values, T weight) { assert(values.size() == 2); counts_(values[0], values[1]) += weight; }