double loglinear_method::run(const snp_row &row1, const snp_row &row2, float *output) { arma::mat count = joint_count( row1, row2, get_data( )->phenotype, m_weight ); size_t num_samples = arma::accu( count ); set_num_ok_samples( num_samples ); if( arma::min( arma::min( count ) ) < METHOD_SMALLEST_CELL_SIZE_BINOMIAL ) { return -9; } std::vector<log_double> likelihood( m_models.size( ), 0.0 ); std::vector<double> bic( m_models.size( ), 0.0 ); for(int i = 0; i < m_models.size( ); i++) { likelihood[ i ] = m_models[ i ]->prob( count ); bic[ i ] = -2.0 * likelihood[ i ].log_value( ) + m_models[ i ]->df( ) * log( num_samples ); } unsigned int best_model = std::distance( bic.begin( ), std::min_element( bic.begin( ) + 1, bic.end( ) ) ); double LR = -2.0*(likelihood[ best_model ].log_value( ) - likelihood[ 0 ].log_value( )); try { double p_value = 1.0 - chi_square_cdf( LR, m_models[ 0 ]->df( ) - m_models[ best_model ]->df( ) ); output[ 0 ] = p_value; return p_value; } catch(bad_domain_value &e) { } return -9; }
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input ) { //do all the necessary lexical table lookups and get counts, but don't apply weights yet Word null; if (is_input) { null.CreateFromString(Input, m_input, "NULL", false); } else { null.CreateFromString(Output, m_output, "NULL", false); } lexicalCache ret; // all target words have to be explained for(size_t ti=0; ti<alignment.size(); ti++) { const set< size_t > & srcIndices = alignment[ ti ]; Word t_word = phraseT.GetWord(ti); vector<lexicalPair> ti_vector; if (srcIndices.empty()) { // explain unaligned word by NULL vector<float> joint_count (m_numModels); vector<float> marginals (m_numModels); FillLexicalCountsJoint(null, t_word, joint_count, tables); FillLexicalCountsMarginal(null, marginals, tables); ti_vector.push_back(make_pair(joint_count, marginals)); } else { for (set< size_t >::const_iterator si(srcIndices.begin()); si != srcIndices.end(); ++si) { Word s_word = phraseS.GetWord(*si); vector<float> joint_count (m_numModels); vector<float> marginals (m_numModels); FillLexicalCountsJoint(s_word, t_word, joint_count, tables); FillLexicalCountsMarginal(s_word, marginals, tables); ti_vector.push_back(make_pair(joint_count, marginals)); } } ret.push_back(ti_vector); } return ret; }
// get lexical probability for single word alignment pair double PhraseDictionaryMultiModelCounts::GetLexicalProbability( Word &wordS, Word &wordT, const vector<lexicalTable*> &tables, vector<float> &multimodelweights ) const { vector<float> joint_count (m_numModels); vector<float> marginals (m_numModels); FillLexicalCountsJoint(wordS, wordT, joint_count, tables); FillLexicalCountsMarginal(wordS, marginals, tables); double lexProb = m_combineFunction(joint_count, marginals, multimodelweights); return lexProb; }
double stagewise_method::run(const snp_row &row1, const snp_row &row2, float *output) { std::vector<log_double> likelihood( m_models.size( ), 0.0 ); arma::mat count; float min_samples = 0.0; unsigned int sample_threshold = METHOD_SMALLEST_CELL_SIZE_BINOMIAL; if( m_model == "binomial" ) { count = joint_count( row1, row2, get_data( )->phenotype, m_weight ); set_num_ok_samples( (size_t) arma::accu( count ) ); min_samples = arma::min( arma::min( count ) ); } else if( m_model == "normal" ) { count = joint_count_cont( row1, row2, get_data( )->phenotype, m_weight ); set_num_ok_samples( (size_t) arma::accu( count.col( 1 ) ) ); min_samples = arma::min( count.col( 1 ) ); sample_threshold = METHOD_SMALLEST_CELL_SIZE_NORMAL; } if( min_samples < sample_threshold ) { return -9; } for(int i = 0; i < m_models.size( ); i++) { likelihood[ i ] = m_models[ i ]->prob( count ); } for(int i = 1; i < m_models.size( ); i++) { double LR = -2.0*(likelihood[ i ].log_value( ) - likelihood[ 0 ].log_value( )); try { output[ i - 1 ] = 1.0 - chi_square_cdf( LR, m_models[ 0 ]->df( ) - m_models[ i ]->df( ) ); } catch(bad_domain_value &e) { } } return output[ 0 ]; }