void WeightingFunctionContextBM25_dpfc::addWeightingFeature( const std::string& name_, PostingIteratorInterface* itr_, float weight_, const TermStatistics& stats_) { try { if (boost::algorithm::iequals( name_, "match")) { double nofMatches = stats_.documentFrequency()>=0?stats_.documentFrequency():(GlobalCounter)itr_->documentFrequency(); double idf = 0.0; bool relevant = (m_nofCollectionDocuments * m_relevant_df_factor > nofMatches); if (m_nofCollectionDocuments > nofMatches * 2) { idf = logl( (m_nofCollectionDocuments - nofMatches + 0.5) / (nofMatches + 0.5)); } if (idf < 0.00001) { idf = 0.00001; } m_weight_featar.push_back( Feature( itr_, weight_, idf, relevant)); } else if (boost::algorithm::iequals( name_, "struct")) { m_struct_featar.push_back( Feature( itr_, weight_, 0.0, false)); } else if (boost::algorithm::iequals( name_, "title")) { if (m_title_itr) throw strus::runtime_error( _TXT( "duplicate '%s' weighting function feature parameter '%s'"), "BM25_dpfc", name_.c_str()); m_title_itr = itr_; } else { throw strus::runtime_error( _TXT( "unknown '%s' weighting function feature parameter '%s'"), "BM25_dpfc", name_.c_str()); } } CATCH_ERROR_ARG1_MAP( _TXT("error adding weighting feature to '%s' weighting: %s"), "BM25_dpfc", *m_errorhnd); }
Feature( PostingIteratorInterface* itr_, double weight_, const TermStatistics& stats_) :m_itr(itr_),m_weight(weight_),m_df(stats_.documentFrequency()>=0?stats_.documentFrequency():std::numeric_limits<double>::quiet_NaN()),m_match(false){}