示例#1
0
double ks_distance(const histogram<double>& sample1,const histogram<double>& sample2)
{
  //let's include the whole range 
  double _min=std::min(sample1.min(),sample2.min());
  double _max=std::max(sample1.max(),sample2.max());
  double _range=_max-_min;
  
  histogram<double> _s1(sample1);
  histogram<double> _s2(sample2);
  
  _s1.convert_to_commulative();
  _s2.convert_to_commulative();
  
  int _size=std::max(_s1.size(),_s2.size())*2;
  double _bin=_range/_size;
  
  double dist=0.0;
  double v=0.0;
  for(int i=0;i<_size;i++,v+=_bin)
  {
    double d=fabs(_s1[v]-_s2[v]);
    if(d>dist) dist=d;
  }
  return dist;
}
示例#2
0
double kl_distance(const histogram<double>& sample1,const histogram<double>& sample2)
{
  //let's include the whole range 
  double _min=std::min(sample1.min(),sample2.min());
  double _max=std::max(sample1.max(),sample2.max());
  double _range=_max-_min;
  
  //now we are going to iterate through samples
  double distance=0.0;
  int _size=std::max(sample1.size(),sample2.size());
  double _bin=_range/_size;
  double v=_min+_bin/2.0;
  
  for(int i=0;i<_size;i++,v+=_bin)
  {
    double _P=0.0;
    if(v>=sample1.min() && v<=sample1.max()) _P=sample1[v];
    double _Q=0.0;
    if(v>=sample2.min() && v<=sample2.max()) _Q=sample2[v];
    
    if(_P>0.0 && _Q>0.0)//TODO: epsilon?
      distance+=_P*log(_P/_Q);
  }
  return distance;
}
示例#3
0
  /* Add a other histogram to this one.
   * Does nothing if histogram parameters ( min, max, num_buckets ) don't match.
   */
  void accumulate( const histogram& other )
  {
    if ( min() != other.min() || max() != other.max() || data().size() != other.data().size() )
      return;

    for ( size_t j = 0, num_buckets = _data.size(); j < num_buckets; ++j )
      _data[ j ] += other.data()[ j ];

    _num_entries += other.num_entries();
  }