Beispiel #1
0
void lm_env_stepwise::run(const snp_row &row, std::ostream &output)
{
    arma::uvec missing = get_data( )->missing;

    bool valid;
    init_matrix_with_snp( row, missing, &valid );

    normal model( "identity" ); 
    glm_info null_info;
    glm_fit( m_null_matrix, get_data( )->phenotype, missing, model, null_info );
    
    glm_info snp_info;
    glm_fit( m_snp_matrix, get_data( )->phenotype, missing, model, snp_info );
    
    glm_info env_info;
    glm_fit( m_env_matrix, get_data( )->phenotype, missing, model, env_info );
    
    glm_info add_info;
    glm_fit( m_add_matrix, get_data( )->phenotype, missing, model, add_info );

    glm_info alt_info;
    glm_fit( m_alt_matrix, get_data( )->phenotype, missing, model, alt_info );

    if( null_info.success && snp_info.success && env_info.success && add_info.success && alt_info.success && valid )
    {
        try
        {
            double LR_null = -2 *( null_info.logl - alt_info.logl );
            double p_null = 1.0 - chi_square_cdf( LR_null, m_alt_matrix.n_cols - m_null_matrix.n_cols );

            double LR_snp = -2 *( snp_info.logl - alt_info.logl );
            double p_snp = 1.0 - chi_square_cdf( LR_snp, m_alt_matrix.n_cols - m_snp_matrix.n_cols );

            double LR_env = -2 *( env_info.logl - alt_info.logl );
            double p_env = 1.0 - chi_square_cdf( LR_env, m_alt_matrix.n_cols - m_env_matrix.n_cols );

            double LR_add = -2 *( add_info.logl - alt_info.logl );
            double p_add = 1.0 - chi_square_cdf( LR_add, m_alt_matrix.n_cols - m_add_matrix.n_cols );

            output << p_null << "\t" << p_snp << "\t" << p_env << "\t" << p_add << "\t";
        }
        catch(bad_domain_value &e)
        {
            output << "NA\tNA\tNA\tNA\t";
        }
    }
    else
    {
        output << "NA\tNA\tNA\tNA\t";
    }
}
Beispiel #2
0
double
loglinear_method::run(const snp_row &row1, const snp_row &row2, float *output)
{
    arma::mat count = joint_count( row1, row2, get_data( )->phenotype, m_weight );
    size_t num_samples = arma::accu( count );
    set_num_ok_samples( num_samples );
    if( arma::min( arma::min( count ) ) < METHOD_SMALLEST_CELL_SIZE_BINOMIAL )
    {
        return -9;
    }

    std::vector<log_double> likelihood( m_models.size( ), 0.0 );
    std::vector<double> bic( m_models.size( ), 0.0 );
    for(int i = 0; i < m_models.size( ); i++)
    {
        likelihood[ i ] = m_models[ i ]->prob( count );
        bic[ i ] = -2.0 * likelihood[ i ].log_value( ) + m_models[ i ]->df( ) * log( num_samples );
    }

    unsigned int best_model = std::distance( bic.begin( ), std::min_element( bic.begin( ) + 1, bic.end( ) ) );
    double LR = -2.0*(likelihood[ best_model ].log_value( ) - likelihood[ 0 ].log_value( ));

    try
    {
        double p_value = 1.0 - chi_square_cdf( LR, m_models[ 0 ]->df( ) - m_models[ best_model ]->df( ) );
        output[ 0 ] = p_value;
        return p_value;
    }
    catch(bad_domain_value &e)
    {
    }

    return -9;
}
Beispiel #3
0
double boxcox_method::run(const snp_row &row1, const snp_row &row2, float *output)
{
    arma::uvec missing = get_data( )->missing;
    m_model_matrix.update_matrix( row1, row2, missing );
    set_num_ok_samples( missing.n_elem - sum( missing ) );

    double max_logl = -DBL_MAX;
    int best_index = -1;
    
    for(int i = 0; i < m_model.size( ); i++)
    {
        glm_info null_info;
        glm_fit( m_model_matrix.get_null( ), m_fixed_pheno, missing, *m_model[ i ], null_info );

        if( !null_info.success )
        {
            continue;
        }

        if( null_info.logl > max_logl )
        {
            max_logl = null_info.logl;
            best_index = i;
        }
    }
    
    if( best_index == -1 )
    {
        return -9;
    }

    /* Fit alternative model and test against best null */
    glm_info alt_info;
    glm_fit( m_model_matrix.get_alt( ), m_fixed_pheno, missing, *m_model[ best_index ], alt_info );

    if( alt_info.success )
    {
        try
        {
            double LR = -2 * ( max_logl - alt_info.logl );
            double p = 1.0 - chi_square_cdf( LR, m_model_matrix.num_df( ) );

            output[ 0 ] = m_lambda[ best_index ];
            if( std::abs( m_lambda[ best_index ] ) < 1e-5 )
            {
                output[ 0 ] = 0.0;
            }

            output[ 1 ] = LR;
            output[ 2 ] = p;

            return p;
        }
        catch(bad_domain_value &e)
        {
        }
    }

    return -9;
}
Beispiel #4
0
double glm_method::run(const snp_row &row1, const snp_row &row2, float *output)
{ 
    arma::uvec missing = get_data( )->missing;

    m_model_matrix.update_matrix( row1, row2, missing );

    glm_info null_info;
    arma::vec b1 = glm_fit( m_model_matrix.get_null( ), get_data( )->phenotype, missing, m_model, null_info, get_data( )->fast_inversion );

    glm_info alt_info;
    arma::vec b = glm_fit( m_model_matrix.get_alt( ), get_data( )->phenotype, missing, m_model, alt_info, get_data( )->fast_inversion );

    set_num_ok_samples( missing.n_elem - sum( missing ) );

    if( null_info.success && alt_info.success )
    {
        double LR = -2 * ( null_info.logl - alt_info.logl );

        try
        {
            output[ 0 ] = LR;
            output[ 1 ] = 1.0 - chi_square_cdf( LR, m_model_matrix.num_df( ) );
            return output[ 1 ];
        }
        catch(bad_domain_value &e)
        {

        }
    }

    return -9;
}
Beispiel #5
0
CochranQResult CochranQ(vector<RealSet> const & v1) {
  CochranQResult result;
  double Li = 0.0;
  double LiTmp = 0.0;
  double Li2 = 0.0;
  double nrOfElements = v1.size();
  double sizeOfElements = v1[0].size();
  vector<size_t> Gj(nrOfElements);
  double Gj2 = 0.0;
  for(size_t itSize = 0; itSize < sizeOfElements; itSize++){
    for(size_t itNr = 0; itNr < nrOfElements; itNr++){
      LiTmp = LiTmp + v1[itNr].data(itSize);
      Gj[itNr] = Gj[itNr] + (size_t)(v1[itNr].data(itSize));
    }
    Li = Li + LiTmp;
    Li2 = Li2 + (LiTmp * LiTmp);
    LiTmp = 0.0;
  }
  for(size_t itGj = 0; itGj < Gj.size(); itGj++){
    Gj2 = Gj2 + (Gj[itGj] * Gj[itGj]);
  }
  result.df = (int)(nrOfElements - 1.0);
  result.Q = (result.df * ((nrOfElements * Gj2) - (Li * Li))) / ((nrOfElements * Li) - Li2);
  result.p = 1.0 - chi_square_cdf(result.df, result.Q);
  return result;
}
Beispiel #6
0
SCM
scm_cdf_chi_square(SCM sdf, SCM sx)
{
  SCM_ASSERT(gh_exact_p(sdf), sdf, SCM_ARG1, "cdf-chi-square");
  SCM_ASSERT(gh_number_p(sx), sx, SCM_ARG2, "cdf-chi-square");

  int df = gh_scm2int(sdf);
  double x = gh_scm2double(sx);
  double p = chi_square_cdf(df,x);

  return gh_double2scm(p);
}
Beispiel #7
0
double
stagewise_method::run(const snp_row &row1, const snp_row &row2, float *output)
{
    std::vector<log_double> likelihood( m_models.size( ), 0.0 );

    arma::mat count;
    float min_samples = 0.0;
    unsigned int sample_threshold = METHOD_SMALLEST_CELL_SIZE_BINOMIAL;
    if( m_model == "binomial" )
    {
        count = joint_count( row1, row2, get_data( )->phenotype, m_weight );
        set_num_ok_samples( (size_t) arma::accu( count ) );
        min_samples = arma::min( arma::min( count ) );
    }
    else if( m_model == "normal" )
    {
        count = joint_count_cont( row1, row2, get_data( )->phenotype, m_weight );
        set_num_ok_samples( (size_t) arma::accu( count.col( 1 ) ) );
        min_samples = arma::min( count.col( 1 ) );
        sample_threshold = METHOD_SMALLEST_CELL_SIZE_NORMAL;
    }
    
    if( min_samples < sample_threshold )
    {
        return -9;
    }
    
    for(int i = 0; i < m_models.size( ); i++)
    {
        likelihood[ i ] = m_models[ i ]->prob( count );
    }

    for(int i = 1; i < m_models.size( ); i++)
    {
        double LR = -2.0*(likelihood[ i ].log_value( ) - likelihood[ 0 ].log_value( ));

        try
        {
            output[ i - 1 ] = 1.0 - chi_square_cdf( LR, m_models[ 0 ]->df( ) - m_models[ i ]->df( ) );
        }
        catch(bad_domain_value &e)
        {
        }
    }

    return output[ 0 ];
}
Beispiel #8
0
double
wald_method::run(const snp_row &row1, const snp_row &row2, float *output)
{
    arma::mat n0 = arma::zeros<arma::mat>( 3, 3 );
    arma::mat n1 = arma::zeros<arma::mat>( 3, 3 );
    for(int i = 0; i < row1.size( ); i++)
    {
        unsigned char snp1 = row1[ i ];
        unsigned char snp2 = row2[ i ];
        if( snp1 == 3 || snp2 == 3 || m_missing[ i ] == 1 )
        {
            continue;
        }

        unsigned int pheno = m_pheno[ i ];
        if( pheno == 0 )
        {
            n0( snp1, snp2 ) += 1;
        }
        else if( pheno == 1 )
        {
            n1( snp1, snp2 ) += 1;
        }
    }

    arma::mat eta( 3, 3 );
    double num_samples = 0.0;
    for(int i = 0; i < 3; i++)
    {
        for(int j = 0; j < 3; j++)
        {
            if( n0( i, j ) < METHOD_SMALLEST_CELL_SIZE_BINOMIAL || n1( i, j ) < METHOD_SMALLEST_CELL_SIZE_BINOMIAL )
            {
                continue;
            }

            eta( i, j ) = log( n1( i, j ) / n0( i, j ) );
            num_samples += n1( i, j ) + n0( i, j );
        }
    }

    /* Find valid parameters and estimate beta */
    int num_valid = 0;
    arma::uvec valid( 4 );
    m_beta = arma::zeros<arma::vec>( 4 );
    int i_map[] = { 1, 1, 2, 2 };
    int j_map[] = { 1, 2, 1, 2 };
    for(int i = 0; i < 4; i++)
    {
        int c_i = i_map[ i ];
        int c_j = j_map[ i ];
        if( n0( 0, 0 ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL &&
            n0( 0, c_j ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL &&
            n0( c_i, 0 ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL &&
            n0( c_i, c_j) >= METHOD_SMALLEST_CELL_SIZE_NORMAL &&
            n1( 0, 0 ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL &&
            n1( 0, c_j ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL &&
            n1( c_i, 0 ) >= METHOD_SMALLEST_CELL_SIZE_NORMAL &&
            n1( c_i, c_j) >= METHOD_SMALLEST_CELL_SIZE_NORMAL )
        {
            valid[ num_valid ] = i;
            m_beta[ num_valid ] = eta( 0, 0 ) - eta( 0, c_j ) - eta( c_i, 0 ) + eta( c_i, c_j );
            num_valid++;
        }
    }
    set_num_ok_samples( (size_t)num_samples );
    if( num_valid <= 0 )
    {
        return -9;
    }
    
    valid.resize( num_valid );
    m_beta.resize( num_valid );

    /* Construct covariance matrix */
    m_C = arma::zeros<arma::mat>( num_valid, num_valid );
    for(int iv = 0; iv < num_valid; iv++)
    {
        int i = valid[ iv ];
        int c_i = i_map[ i ];
        int c_j = j_map[ i ];

        for(int jv = 0; jv < num_valid; jv++)
        {
            int j = valid[ jv ];
            int o_i = i_map[ j ];
            int o_j = j_map[ j ];

            int same_row = c_i == o_i;
            int same_col = c_j == o_j;
            int in_cell = i == j;

            m_C( iv, jv ) = 1.0 / n0( 0, 0 ) + same_col / n0( 0, c_j ) + same_row / n0( c_i, 0 ) + in_cell / n0( c_i, c_j );
            m_C( iv, jv ) += 1.0 / n1( 0, 0 ) + same_col / n1( 0, c_j ) + same_row / n1( c_i, 0 ) + in_cell / n1( c_i, c_j );
        }
    }

    arma::mat Cinv( num_valid, num_valid );
    if( !inv( Cinv, m_C ) )
    {
        return -9;
    }
    
    /* Test if b != 0 with Wald test */
    double chi = dot( m_beta, Cinv * m_beta );
    output[ 0 ] = chi;
    output[ 1 ] = 1.0 - chi_square_cdf( chi, num_valid );
    output[ 2 ] = valid.n_elem;

    return output[ 1 ];
}