예제 #1
0
int
main(int argc, char *argv[])
{
    OptionParser parser = create_common_options( USAGE, DESCRIPTION, false );
    
    OptionGroup group = OptionGroup( parser, "Options for bayes", "These options will change the behaviour of bayes and fine." );
    group.add_option( "-n", "--num-interactions" ).type( "int" ).help( "The number of interactions to correct for, this is used in the model prior (default: all)." );
    group.add_option( "-s", "--num-single" ).type( "int" ).help( "The number of snps to consider when correcting (default: proportional to square of the number of interactions)." );
    group.add_option( "-t", "--single-prior" ).type( "float" ).help( "The probability that a single snp is associated (default: %default)." ).set_default( 0.0 );
    group.add_option( "-i", "--mc-iterations" ).type( "int" ).help( "The number of monte carlo iterations to use in the fine method (default: %default)." ).set_default( 4000000 );
    group.add_option( "-a", "--beta-prior-param1" ).type( "float" ).help( "First shape parameter of beta prior (default: %default)." ).set_default( 2.0 );
    group.add_option( "-b", "--beta-prior-param2" ).type( "float" ).help( "Second shape parameter of beta prior (default: %default)." ).set_default( 2.0 );
    group.add_option( "-e", "--estimate-prior-params" ).action( "store_true" ).help( "Estimate prior parameters from data by permuting phenotype (default: off)." );
    parser.add_option( "--additive" ).action( "store_true" ).help( "Use an additive model (slow)." );
    parser.add_option_group( group );

    Values options = parser.parse_args( argc, argv );
    if( parser.args( ).size( ) != 2 )
    {
        parser.print_help( );
        exit( 1 );
    }

    shared_ptr<common_options> parsed_data = parse_common_options( options, parser.args( ) );

    /* Read prior parameters */
    arma::vec alpha = arma::ones<arma::vec>( 2 );
    alpha[ 0 ] = (float) options.get( "beta_prior_param1" );
    alpha[ 1 ] = (float) options.get( "beta_prior_param2" );
    if( options.is_set( "estimate_prior_params" ) )
    {
        alpha = estimate_prior_parameters( parsed_data->genotypes, parsed_data->data->phenotype, parsed_data->data->missing, 5000 );
    }

    /* Count the number of interactions to adjust for */
    parsed_data->data->single_prior = (float) options.get( "single_prior" );
    parsed_data->data->num_single = (unsigned int) options.get( "num_single" );
    parsed_data->data->num_interactions = (unsigned int) options.get( "num_interactions" );

    method_type *m = new besiq_method( parsed_data->data, alpha );
    if( options.is_set( "additive" ) )
    {
        m = new besiq_method( parsed_data->data, alpha );
    }
    else
    {
        m = new besiq_fine_method( parsed_data->data, (int) options.get( "mc_iterations" ), alpha );
    }
    
    run_method( *m, parsed_data->genotypes, *parsed_data->pairs, *parsed_data->result_file );

    delete m;

    return 0;
}
예제 #2
0
int
main(int argc, char *argv[])
{
    OptionParser parser = create_options( );
    
    Values options = parser.parse_args( argc, argv );
    if( parser.args( ).size( ) < 2 )
    {
        parser.print_help( );
        exit( 1 );
    }
    
    std::vector<plink_file_ptr> plink_files = open_plink_file( parser.args( ) );
    std::vector<genotype_matrix_ptr> genotypes = create_genotype_matrices( plink_files );
    
    /** 
     * Create pair iterator 
     */
    size_t split = (size_t) options.get( "split" );
    size_t num_splits = (size_t) options.get( "num_splits" );
    if( split > num_splits || split == 0 || num_splits == 0 )
    {
        std::cerr << "besiq: error: Num splits and split must be > 0, and split <= num_splits." << std::endl;
        exit( 1 );
    }
   
    std::vector<std::string> loci =  plink_files[ 0 ]->get_locus_names( );
    pairfile *pairs = open_pair_file( parser.args( )[ 0 ].c_str( ), loci );
    if( pairs == NULL || !pairs->open( split, num_splits ) )
    {
        std::cerr << "besiq: error: Could not open pair file." << std::endl;
        exit( 1 );
    }
    logp_grid grid( plink_files[ 0 ]->get_loci( ), 7000, 500000 );

    double threshold = (double) options.get( "threshold" );

    /**
     * Set up method
     */
    std::vector<wald_method *> methods;
    for(int i = 0; i < plink_files.size( ); i++)
    {
        method_data_ptr data( new method_data( ) );
        data->missing = zeros<uvec>( plink_files[ i ]->get_samples( ).size( ) );
        data->phenotype = create_phenotype_vector( plink_files[ i ]->get_samples( ), data->missing );

        methods.push_back( new wald_method( data ) );
    }

    /**
     * Open results.
     */
    resultfile *result = NULL;
    if( options.is_set( "out" ) )
    {
        result = new bresultfile( options[ "out" ], loci );
    }
    else
    {
        std::ios_base::sync_with_stdio( false );
        result = new tresultfile( "-", "w" );
    }
    if( result == NULL || !result->open( ) )
    {
        std::cerr << "besiq: error: Can not open result file." << std::endl;
        exit( 1 );
    }
    std::vector<std::string> header;
    header.push_back( "W" );
    header.push_back( "P" );
    header.push_back( "N" );
    result->set_header( header );

    /**
     * Run analysis
     */
    std::pair<std::string, std::string> pair;
    float *output = new float[ methods[ 0 ]->init( ).size( ) ];
    float meta_output[ header.size( ) ];
    while( pairs->read( pair ) )
    {
        /* Compute betas and covariances */
        std::vector<arma::vec> betas( methods.size( ), arma::zeros<arma::vec>( 4 ) );
        std::vector<arma::mat> weights( methods.size( ), arma::zeros<arma::mat>( 4, 4 ) );
        size_t N = 0;
        bool all_valid = true;
        for(int i = 0; i < methods.size( ); i++)
        {
            snp_row const *row1 = genotypes[ i ]->get_row( pair.first );
            snp_row const *row2 = genotypes[ i ]->get_row( pair.second );

            methods[ i ]->run( *row1, *row2, output );

            betas[ i ] = methods[ i ]->get_last_beta( );

            arma::mat C = methods[ i ]->get_last_C( );
            arma::mat Cinv;

            if( C.n_cols != 4 || C.n_rows != 4 || !arma::inv( Cinv, C ) )
            {
                all_valid = false;
            }
            
            weights[ i ] = Cinv;

            N += methods[ i ]->num_ok_samples( *row1, *row2 );
        }

        if( !all_valid )
        {
            continue;
        }

        /* Computed weighted average of betas (fixed effect assumption) */
        arma::mat Csum = arma::zeros<arma::mat>( 4, 4 );
        for(int i = 0; i < weights.size( ); i++)
        {
            Csum += weights[ i ];
        }
        arma::mat Csum_inv;
        if( !arma::inv( Csum_inv, Csum ) )
        {
            continue;
        }

        arma::vec final_beta = arma::zeros<arma::vec>( 4 );
        arma::mat final_C = Csum_inv;
        for(int i = 0; i < weights.size( ); i++)
        {
            final_beta += weights[ i ] * betas[ i ];
        }
        final_beta = Csum_inv * final_beta;

        arma::mat final_C_inv;
        if( !arma::inv( final_C_inv, final_C ) )
        {
            continue;
        }

        double chi = dot( final_beta, final_C_inv * final_beta );
        double final_p = 1.0 - chi_square_cdf( chi, 4 );
        
        grid.add_pvalue( pair.first, pair.second, final_p );

        if( threshold != -9 && final_p > threshold )
        {
            continue;
        }

        meta_output[ 0 ] = chi;
        meta_output[ 1 ] = final_p;
        meta_output[ 2 ] = N;
        
        result->write( pair, meta_output );
    }

    result->close( );

    if( options.is_set( "grid" ) )
    {
        std::ofstream grid_file( options[ "grid" ] );
        grid.write_grid( grid_file );
        grid_file.close( );
    }
    /* Delete allocated stuff */
    
    return 0;
}