C++ (Cpp) termscorefunctionfactory_parse Examples

Programming Language: C++ (Cpp)

Method/Function: termscorefunctionfactory_parse

Examples at hotexamples.com: 2

C++ (Cpp) termscorefunctionfactory_parse - 2 examples found. These are the top rated real world C++ (Cpp) examples of termscorefunctionfactory_parse extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: TermScoreFunctionFactory.cpp Project: semanticpc/matIR

matIR::scoring::TermScoreFunction* matIR::scoring::TermScoreFunctionFactory::get(
                                                                                 const std::string& stringSpec, arma::vec& collectionFrequency,
                                                                                 double contextSize, int documentOccurrences, int documentCount ) {

    indri::api::Parameters spec;
    termscorefunctionfactory_parse( spec, stringSpec );
    std::string method = spec.get( "method", "" );

    arma::vec p_t_C = collectionFrequency /contextSize;

    if( method == "dirichlet" || method == "d" || method == "dir" ) {
        double mu = spec.get( "mu", 2500 );
        double docmu=spec.get("documentMu",-1.0); // default is no doc-level smoothing
        return new matIR::scoring::DirichletTermScoreFunction( mu, p_t_C, docmu );
    } else if( method == "linear" || method == "jm" || method == "jelinek-mercer" ) {
        // jelinek-mercer -- can take parameters collectionLambda (or just lambda) and documentLambda
        double documentLambda = spec.get( "documentLambda", 0.0 );
        double collectionLambda;

        if( spec.exists( "collectionLambda" ) )
            collectionLambda = spec.get( "collectionLambda", 0.4 );
        else
            collectionLambda = spec.get( "lambda", 0.4 );

        return new matIR::scoring::JelinekMercerTermScoreFunction( p_t_C, collectionLambda, documentLambda );
    }else {
        return new matIR::scoring::SimpleTermScoreFunction();
    }


}

Example #2

Show file

File: TermScoreFunctionFactory.cpp Project: blaze3j/DocHunt

indri::query::TermScoreFunction* indri::query::TermScoreFunctionFactory::get( const std::string& stringSpec, double occurrences, double contextSize, int documentOccurrences, int documentCount ) {
  indri::api::Parameters spec;
  termscorefunctionfactory_parse( spec, stringSpec );
  std::string method = spec.get( "method", "dirichlet" );

  // this is something that never happens in our collection, so we assume that it
  // happens somewhat less often than 1./collectionSize.  I picked 1/(2*collectionSize)
  // because it seemed most appropriate (from InferenceNetworkBuilder)

  double collectionFrequency = occurrences ? (occurrences/contextSize) :
    (collectionFrequency = 1.0 / double(contextSize*2.));

  if( method == "dirichlet" || method == "d" || method == "dir" ) {
    // dirichlet -- takes parameter "mu"
    double mu = spec.get( "mu", 2500 );
    double docmu=spec.get("documentMu",-1.0); // default is no doc-level smoothing
    return new indri::query::DirichletTermScoreFunction( mu, collectionFrequency, docmu );
  } else if( method == "linear" || method == "jm" || method == "jelinek-mercer" ) {
    // jelinek-mercer -- can take parameters collectionLambda (or just lambda) and documentLambda
    double documentLambda = spec.get( "documentLambda", 0.0 );
    double collectionLambda;
    
    if( spec.exists( "collectionLambda" ) )
      collectionLambda = spec.get( "collectionLambda", 0.4 );
    else
      collectionLambda = spec.get( "lambda", 0.4 );

    return new indri::query::JelinekMercerTermScoreFunction( collectionFrequency, collectionLambda, documentLambda );
  } else if( method == "two" || method == "two-stage" || method == "twostage" ) {
    // twostage -- takes parameters mu and lambda
    double mu = spec.get( "mu", 2500 );
    double lambda = spec.get( "lambda", 0.4 );
    
    return new indri::query::TwoStageTermScoreFunction( mu, lambda, collectionFrequency );
  } else if ( method == "tfidf" ) {
    double k1 = spec.get( "k1", 1.2 );
    double b = spec.get( "b", 0.75 );
    int qtf = spec.get("qtf", 1);
    double idf = log( ( documentCount + 1 ) / ( documentOccurrences + 0.5 ) );
    double  avgDocLength = contextSize / double(documentCount);
    if (spec.exists("qtw")) {
      double weight = spec.get("qtw", 1.0);
      return new indri::query::TFIDFTermScoreFunction( idf, avgDocLength, weight, k1, b, false );
      } else {
      return new indri::query::TFIDFTermScoreFunction( idf, avgDocLength, qtf, k1, b, false );
    }
  } else if ( method == "okapi" ) {
    double k1 = spec.get( "k1", 1.2 );
    double b = spec.get( "b", 0.75 );
    double k3 = spec.get( "k3", 7 );
    int qtf = spec.get("qtf", 1);
    double idf = log( ( documentCount - documentOccurrences + 0.5 ) / ( documentOccurrences + 0.5 ) );
    double  avgDocLength = contextSize / double(documentCount);
    if (spec.exists("qtw")) {
      double weight = spec.get("qtw", 1.0);
      return new indri::query::TFIDFTermScoreFunction( idf, avgDocLength, weight, k1, b, true, k3 );
      } else {
      return new indri::query::TFIDFTermScoreFunction( idf, avgDocLength, qtf, k1, b, true, k3 );
    }
  }

  // if nothing else worked, we'll use dirichlet with mu=2500
  return new indri::query::DirichletTermScoreFunction( 2500, collectionFrequency );
}