Exemple #1
0
double
loglinear_method::run(const snp_row &row1, const snp_row &row2, float *output)
{
    arma::mat count = joint_count( row1, row2, get_data( )->phenotype, m_weight );
    size_t num_samples = arma::accu( count );
    set_num_ok_samples( num_samples );
    if( arma::min( arma::min( count ) ) < METHOD_SMALLEST_CELL_SIZE_BINOMIAL )
    {
        return -9;
    }

    std::vector<log_double> likelihood( m_models.size( ), 0.0 );
    std::vector<double> bic( m_models.size( ), 0.0 );
    for(int i = 0; i < m_models.size( ); i++)
    {
        likelihood[ i ] = m_models[ i ]->prob( count );
        bic[ i ] = -2.0 * likelihood[ i ].log_value( ) + m_models[ i ]->df( ) * log( num_samples );
    }

    unsigned int best_model = std::distance( bic.begin( ), std::min_element( bic.begin( ) + 1, bic.end( ) ) );
    double LR = -2.0*(likelihood[ best_model ].log_value( ) - likelihood[ 0 ].log_value( ));

    try
    {
        double p_value = 1.0 - chi_square_cdf( LR, m_models[ 0 ]->df( ) - m_models[ best_model ]->df( ) );
        output[ 0 ] = p_value;
        return p_value;
    }
    catch(bad_domain_value &e)
    {
    }

    return -9;
}
Exemple #2
0
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
{
//do all the necessary lexical table lookups and get counts, but don't apply weights yet

  Word null;
  if (is_input) {
    null.CreateFromString(Input, m_input, "NULL", false);
  } else {
    null.CreateFromString(Output, m_output, "NULL", false);
  }

  lexicalCache ret;

  // all target words have to be explained
  for(size_t ti=0; ti<alignment.size(); ti++) {
    const set< size_t > & srcIndices = alignment[ ti ];
    Word t_word = phraseT.GetWord(ti);

    vector<lexicalPair> ti_vector;
    if (srcIndices.empty()) {
      // explain unaligned word by NULL
      vector<float> joint_count (m_numModels);
      vector<float> marginals (m_numModels);

      FillLexicalCountsJoint(null, t_word, joint_count, tables);
      FillLexicalCountsMarginal(null, marginals, tables);

      ti_vector.push_back(make_pair(joint_count, marginals));

    } else {
      for (set< size_t >::const_iterator si(srcIndices.begin()); si != srcIndices.end(); ++si) {
        Word s_word = phraseS.GetWord(*si);
        vector<float> joint_count (m_numModels);
        vector<float> marginals (m_numModels);

        FillLexicalCountsJoint(s_word, t_word, joint_count, tables);
        FillLexicalCountsMarginal(s_word, marginals, tables);

        ti_vector.push_back(make_pair(joint_count, marginals));
      }
    }
    ret.push_back(ti_vector);
  }
  return ret;
}
Exemple #3
0
// get lexical probability for single word alignment pair
double PhraseDictionaryMultiModelCounts::GetLexicalProbability( Word &wordS, Word &wordT, const vector<lexicalTable*> &tables, vector<float> &multimodelweights ) const
{
  vector<float> joint_count (m_numModels);
  vector<float> marginals (m_numModels);

  FillLexicalCountsJoint(wordS, wordT, joint_count, tables);
  FillLexicalCountsMarginal(wordS, marginals, tables);

  double lexProb = m_combineFunction(joint_count, marginals, multimodelweights);

  return lexProb;
}
Exemple #4
0
double
stagewise_method::run(const snp_row &row1, const snp_row &row2, float *output)
{
    std::vector<log_double> likelihood( m_models.size( ), 0.0 );

    arma::mat count;
    float min_samples = 0.0;
    unsigned int sample_threshold = METHOD_SMALLEST_CELL_SIZE_BINOMIAL;
    if( m_model == "binomial" )
    {
        count = joint_count( row1, row2, get_data( )->phenotype, m_weight );
        set_num_ok_samples( (size_t) arma::accu( count ) );
        min_samples = arma::min( arma::min( count ) );
    }
    else if( m_model == "normal" )
    {
        count = joint_count_cont( row1, row2, get_data( )->phenotype, m_weight );
        set_num_ok_samples( (size_t) arma::accu( count.col( 1 ) ) );
        min_samples = arma::min( count.col( 1 ) );
        sample_threshold = METHOD_SMALLEST_CELL_SIZE_NORMAL;
    }
    
    if( min_samples < sample_threshold )
    {
        return -9;
    }
    
    for(int i = 0; i < m_models.size( ); i++)
    {
        likelihood[ i ] = m_models[ i ]->prob( count );
    }

    for(int i = 1; i < m_models.size( ); i++)
    {
        double LR = -2.0*(likelihood[ i ].log_value( ) - likelihood[ 0 ].log_value( ));

        try
        {
            output[ i - 1 ] = 1.0 - chi_square_cdf( LR, m_models[ 0 ]->df( ) - m_models[ i ]->df( ) );
        }
        catch(bad_domain_value &e)
        {
        }
    }

    return output[ 0 ];
}