void WeightingFunctionContextBM25_dpfc::addWeightingFeature(
		const std::string& name_,
		PostingIteratorInterface* itr_,
		float weight_,
		const TermStatistics& stats_)
{
	try
	{
		if (boost::algorithm::iequals( name_, "match"))
		{
			double nofMatches = stats_.documentFrequency()>=0?stats_.documentFrequency():(GlobalCounter)itr_->documentFrequency();
			double idf = 0.0;
			bool relevant = (m_nofCollectionDocuments * m_relevant_df_factor > nofMatches);
	
			if (m_nofCollectionDocuments > nofMatches * 2)
			{
				idf = logl(
						(m_nofCollectionDocuments - nofMatches + 0.5)
						/ (nofMatches + 0.5));
			}
			if (idf < 0.00001)
			{
				idf = 0.00001;
			}
			m_weight_featar.push_back( Feature( itr_, weight_, idf, relevant));
		}
		else if (boost::algorithm::iequals( name_, "struct"))
		{
			m_struct_featar.push_back( Feature( itr_, weight_, 0.0, false));
		}
		else if (boost::algorithm::iequals( name_, "title"))
		{
			if (m_title_itr) throw strus::runtime_error( _TXT( "duplicate '%s' weighting function feature parameter '%s'"), "BM25_dpfc", name_.c_str());
			m_title_itr = itr_;
		}
		else
		{
			throw strus::runtime_error( _TXT( "unknown '%s' weighting function feature parameter '%s'"), "BM25_dpfc", name_.c_str());
		}
	}
	CATCH_ERROR_ARG1_MAP( _TXT("error adding weighting feature to '%s' weighting: %s"), "BM25_dpfc", *m_errorhnd);
}
Example #2
0
		Feature( PostingIteratorInterface* itr_, double weight_, const TermStatistics& stats_)
			:m_itr(itr_),m_weight(weight_),m_df(stats_.documentFrequency()>=0?stats_.documentFrequency():std::numeric_limits<double>::quiet_NaN()),m_match(false){}