void lm_env_stepwise::run(const snp_row &row, std::ostream &output) { arma::uvec missing = get_data( )->missing; bool valid; init_matrix_with_snp( row, missing, &valid ); normal model( "identity" ); glm_info null_info; glm_fit( m_null_matrix, get_data( )->phenotype, missing, model, null_info ); glm_info snp_info; glm_fit( m_snp_matrix, get_data( )->phenotype, missing, model, snp_info ); glm_info env_info; glm_fit( m_env_matrix, get_data( )->phenotype, missing, model, env_info ); glm_info add_info; glm_fit( m_add_matrix, get_data( )->phenotype, missing, model, add_info ); glm_info alt_info; glm_fit( m_alt_matrix, get_data( )->phenotype, missing, model, alt_info ); if( null_info.success && snp_info.success && env_info.success && add_info.success && alt_info.success && valid ) { try { double LR_null = -2 *( null_info.logl - alt_info.logl ); double p_null = 1.0 - chi_square_cdf( LR_null, m_alt_matrix.n_cols - m_null_matrix.n_cols ); double LR_snp = -2 *( snp_info.logl - alt_info.logl ); double p_snp = 1.0 - chi_square_cdf( LR_snp, m_alt_matrix.n_cols - m_snp_matrix.n_cols ); double LR_env = -2 *( env_info.logl - alt_info.logl ); double p_env = 1.0 - chi_square_cdf( LR_env, m_alt_matrix.n_cols - m_env_matrix.n_cols ); double LR_add = -2 *( add_info.logl - alt_info.logl ); double p_add = 1.0 - chi_square_cdf( LR_add, m_alt_matrix.n_cols - m_add_matrix.n_cols ); output << p_null << "\t" << p_snp << "\t" << p_env << "\t" << p_add << "\t"; } catch(bad_domain_value &e) { output << "NA\tNA\tNA\tNA\t"; } } else { output << "NA\tNA\tNA\tNA\t"; } }
double loglinear_method::run(const snp_row &row1, const snp_row &row2, float *output) { arma::mat count = joint_count( row1, row2, get_data( )->phenotype, m_weight ); size_t num_samples = arma::accu( count ); set_num_ok_samples( num_samples ); if( arma::min( arma::min( count ) ) < METHOD_SMALLEST_CELL_SIZE_BINOMIAL ) { return -9; } std::vector<log_double> likelihood( m_models.size( ), 0.0 ); std::vector<double> bic( m_models.size( ), 0.0 ); for(int i = 0; i < m_models.size( ); i++) { likelihood[ i ] = m_models[ i ]->prob( count ); bic[ i ] = -2.0 * likelihood[ i ].log_value( ) + m_models[ i ]->df( ) * log( num_samples ); } unsigned int best_model = std::distance( bic.begin( ), std::min_element( bic.begin( ) + 1, bic.end( ) ) ); double LR = -2.0*(likelihood[ best_model ].log_value( ) - likelihood[ 0 ].log_value( )); try { double p_value = 1.0 - chi_square_cdf( LR, m_models[ 0 ]->df( ) - m_models[ best_model ]->df( ) ); output[ 0 ] = p_value; return p_value; } catch(bad_domain_value &e) { } return -9; }
double boxcox_method::run(const snp_row &row1, const snp_row &row2, float *output) { arma::uvec missing = get_data( )->missing; m_model_matrix.update_matrix( row1, row2, missing ); set_num_ok_samples( missing.n_elem - sum( missing ) ); double max_logl = -DBL_MAX; int best_index = -1; for(int i = 0; i < m_model.size( ); i++) { glm_info null_info; glm_fit( m_model_matrix.get_null( ), m_fixed_pheno, missing, *m_model[ i ], null_info ); if( !null_info.success ) { continue; } if( null_info.logl > max_logl ) { max_logl = null_info.logl; best_index = i; } } if( best_index == -1 ) { return -9; } /* Fit alternative model and test against best null */ glm_info alt_info; glm_fit( m_model_matrix.get_alt( ), m_fixed_pheno, missing, *m_model[ best_index ], alt_info ); if( alt_info.success ) { try { double LR = -2 * ( max_logl - alt_info.logl ); double p = 1.0 - chi_square_cdf( LR, m_model_matrix.num_df( ) ); output[ 0 ] = m_lambda[ best_index ]; if( std::abs( m_lambda[ best_index ] ) < 1e-5 ) { output[ 0 ] = 0.0; } output[ 1 ] = LR; output[ 2 ] = p; return p; } catch(bad_domain_value &e) { } } return -9; }
double glm_method::run(const snp_row &row1, const snp_row &row2, float *output) { arma::uvec missing = get_data( )->missing; m_model_matrix.update_matrix( row1, row2, missing ); glm_info null_info; arma::vec b1 = glm_fit( m_model_matrix.get_null( ), get_data( )->phenotype, missing, m_model, null_info, get_data( )->fast_inversion ); glm_info alt_info; arma::vec b = glm_fit( m_model_matrix.get_alt( ), get_data( )->phenotype, missing, m_model, alt_info, get_data( )->fast_inversion ); set_num_ok_samples( missing.n_elem - sum( missing ) ); if( null_info.success && alt_info.success ) { double LR = -2 * ( null_info.logl - alt_info.logl ); try { output[ 0 ] = LR; output[ 1 ] = 1.0 - chi_square_cdf( LR, m_model_matrix.num_df( ) ); return output[ 1 ]; } catch(bad_domain_value &e) { } } return -9; }
CochranQResult CochranQ(vector<RealSet> const & v1) { CochranQResult result; double Li = 0.0; double LiTmp = 0.0; double Li2 = 0.0; double nrOfElements = v1.size(); double sizeOfElements = v1[0].size(); vector<size_t> Gj(nrOfElements); double Gj2 = 0.0; for(size_t itSize = 0; itSize < sizeOfElements; itSize++){ for(size_t itNr = 0; itNr < nrOfElements; itNr++){ LiTmp = LiTmp + v1[itNr].data(itSize); Gj[itNr] = Gj[itNr] + (size_t)(v1[itNr].data(itSize)); } Li = Li + LiTmp; Li2 = Li2 + (LiTmp * LiTmp); LiTmp = 0.0; } for(size_t itGj = 0; itGj < Gj.size(); itGj++){ Gj2 = Gj2 + (Gj[itGj] * Gj[itGj]); } result.df = (int)(nrOfElements - 1.0); result.Q = (result.df * ((nrOfElements * Gj2) - (Li * Li))) / ((nrOfElements * Li) - Li2); result.p = 1.0 - chi_square_cdf(result.df, result.Q); return result; }
SCM scm_cdf_chi_square(SCM sdf, SCM sx) { SCM_ASSERT(gh_exact_p(sdf), sdf, SCM_ARG1, "cdf-chi-square"); SCM_ASSERT(gh_number_p(sx), sx, SCM_ARG2, "cdf-chi-square"); int df = gh_scm2int(sdf); double x = gh_scm2double(sx); double p = chi_square_cdf(df,x); return gh_double2scm(p); }
double stagewise_method::run(const snp_row &row1, const snp_row &row2, float *output) { std::vector<log_double> likelihood( m_models.size( ), 0.0 ); arma::mat count; float min_samples = 0.0; unsigned int sample_threshold = METHOD_SMALLEST_CELL_SIZE_BINOMIAL; if( m_model == "binomial" ) { count = joint_count( row1, row2, get_data( )->phenotype, m_weight ); set_num_ok_samples( (size_t) arma::accu( count ) ); min_samples = arma::min( arma::min( count ) ); } else if( m_model == "normal" ) { count = joint_count_cont( row1, row2, get_data( )->phenotype, m_weight ); set_num_ok_samples( (size_t) arma::accu( count.col( 1 ) ) ); min_samples = arma::min( count.col( 1 ) ); sample_threshold = METHOD_SMALLEST_CELL_SIZE_NORMAL; } if( min_samples < sample_threshold ) { return -9; } for(int i = 0; i < m_models.size( ); i++) { likelihood[ i ] = m_models[ i ]->prob( count ); } for(int i = 1; i < m_models.size( ); i++) { double LR = -2.0*(likelihood[ i ].log_value( ) - likelihood[ 0 ].log_value( )); try { output[ i - 1 ] = 1.0 - chi_square_cdf( LR, m_models[ 0 ]->df( ) - m_models[ i ]->df( ) ); } catch(bad_domain_value &e) { } } return output[ 0 ]; }
double wald_method::run(const snp_row &row1, const snp_row &row2, float *output) { arma::mat n0 = arma::zeros<arma::mat>( 3, 3 ); arma::mat n1 = arma::zeros<arma::mat>( 3, 3 ); for(int i = 0; i < row1.size( ); i++) { unsigned char snp1 = row1[ i ]; unsigned char snp2 = row2[ i ]; if( snp1 == 3 || snp2 == 3 || m_missing[ i ] == 1 ) { continue; } unsigned int pheno = m_pheno[ i ]; if( pheno == 0 ) { n0( snp1, snp2 ) += 1; } else if( pheno == 1 ) { n1( snp1, snp2 ) += 1; } } arma::mat eta( 3, 3 ); double num_samples = 0.0; for(int i = 0; i < 3; i++) { for(int j = 0; j < 3; j++) { if( n0( i, j ) < METHOD_SMALLEST_CELL_SIZE_BINOMIAL || n1( i, j ) < METHOD_SMALLEST_CELL_SIZE_BINOMIAL ) { continue; } eta( i, j ) = log( n1( i, j ) / n0( i, j ) ); num_samples += n1( i, j ) + n0( i, j ); } } /* Find valid parameters and estimate beta */ int num_valid = 0; arma::uvec valid( 4 ); m_beta = arma::zeros<arma::vec>( 4 ); int i_map[] = { 1, 1, 2, 2 }; int j_map[] = { 1, 2, 1, 2 }; for(int i = 0; i < 4; i++) { int c_i = i_map[ i ]; int c_j = j_map[ i ]; if( n0( 0, 0 ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL && n0( 0, c_j ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL && n0( c_i, 0 ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL && n0( c_i, c_j) >= METHOD_SMALLEST_CELL_SIZE_NORMAL && n1( 0, 0 ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL && n1( 0, c_j ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL && n1( c_i, 0 ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL && n1( c_i, c_j) >= METHOD_SMALLEST_CELL_SIZE_NORMAL ) { valid[ num_valid ] = i; m_beta[ num_valid ] = eta( 0, 0 ) - eta( 0, c_j ) - eta( c_i, 0 ) + eta( c_i, c_j ); num_valid++; } } set_num_ok_samples( (size_t)num_samples ); if( num_valid <= 0 ) { return -9; } valid.resize( num_valid ); m_beta.resize( num_valid ); /* Construct covariance matrix */ m_C = arma::zeros<arma::mat>( num_valid, num_valid ); for(int iv = 0; iv < num_valid; iv++) { int i = valid[ iv ]; int c_i = i_map[ i ]; int c_j = j_map[ i ]; for(int jv = 0; jv < num_valid; jv++) { int j = valid[ jv ]; int o_i = i_map[ j ]; int o_j = j_map[ j ]; int same_row = c_i == o_i; int same_col = c_j == o_j; int in_cell = i == j; m_C( iv, jv ) = 1.0 / n0( 0, 0 ) + same_col / n0( 0, c_j ) + same_row / n0( c_i, 0 ) + in_cell / n0( c_i, c_j ); m_C( iv, jv ) += 1.0 / n1( 0, 0 ) + same_col / n1( 0, c_j ) + same_row / n1( c_i, 0 ) + in_cell / n1( c_i, c_j ); } } arma::mat Cinv( num_valid, num_valid ); if( !inv( Cinv, m_C ) ) { return -9; } /* Test if b != 0 with Wald test */ double chi = dot( m_beta, Cinv * m_beta ); output[ 0 ] = chi; output[ 1 ] = 1.0 - chi_square_cdf( chi, num_valid ); output[ 2 ] = valid.n_elem; return output[ 1 ]; }