Exemple #1
0
/*Takes in as input the vector of iterations from coarse BLAST, an index into
 *the vector (representing the index of which query we want to get the results
 *from), and the database we are using for search and returns a vector of
 *every original sequence section re-created from the calls to
 *cb_coarse_expand for the hits in the iteration we are expanding.
 */
struct DSVector *expand_blast_hits(struct DSVector *iterations, int index,
                                   struct cb_database_r *db){
    struct DSVector *expanded_hits = ds_vector_create(),
                    *hits = get_blast_hits(
                              (xmlNode *)ds_vector_get(iterations, index));
    int i = 0, j = 0, k = 0;

    for (i = 0; i < hits->size; i++) {
        struct hit *current_hit = (struct hit *)ds_vector_get(hits, i);
        struct DSVector *hsps = current_hit->hsps;
        for (j = 0; j < hsps->size; j++) {
            struct DSVector *oseqs;
            struct hsp *h = (struct hsp *)ds_vector_get(hsps, j);
            int32_t coarse_start  = h->hit_from-1, coarse_end = h->hit_to-1,
                    coarse_seq_id = current_hit->accession;

            oseqs = cb_coarse_expand(db->coarse_db, db->com_db, coarse_seq_id,
                                     coarse_start, coarse_end, 50);
            for (k = 0; k < oseqs->size; k++)
                ds_vector_append(expanded_hits, ds_vector_get(oseqs, k));

            ds_vector_free_no_data(oseqs);
        }

        ds_vector_free(current_hit->hsps);
        free(current_hit);
    }
    ds_vector_free_no_data(hits);

    return expanded_hits;
}
static void php_ds_vector_free_object(zend_object *object)
{
    php_ds_vector_t *obj = (php_ds_vector_t*) object;
    zend_object_std_dtor(&obj->std);
    ds_vector_free(obj->vector);
}
Exemple #3
0
int main(int argc, char **argv){
    FILE *query_file = NULL, *test_hits_file = NULL;
    struct cb_database_r *db = NULL;
    struct opt_config *conf;
    struct opt_args *args;
    struct DSVector *iterations = NULL, *expanded_hits = NULL, *queries = NULL,
                    *oseqs = ds_vector_create();
    struct fasta_seq *query = NULL;
    xmlDoc *doc = NULL;
    xmlNode *root = NULL;
    uint64_t dbsize = 0;
    int i = 0, j = 0;

    conf = load_search_args();
    args = opt_config_parse(conf, argc, argv);

    if (args->nargs < 2) {
        fprintf(stderr, 
                "Usage: %s [flags] database-dir fasta-file "
                "[ --blast_args BLASTN_ARGUMENTS ]\n", argv[0]);
        opt_config_print_usage(conf);
        exit(1);
    }

    system("rm CaBLAST_results.xml");

    if (!search_flags.hide_progress)
        fprintf(stderr, "Loading database data\n\n");


    db = cb_database_r_init(args->args[0],
                            (search_flags.load_coarse_db ||
                             search_flags.load_coarse_residues),
                            (search_flags.load_coarse_db ||
                             search_flags.load_coarse_links),
                            search_flags.load_compressed_db,
                            search_flags.link_block_size);
    dbsize = read_int_from_file(8, db->coarse_db->db->file_params);

    if (!search_flags.hide_progress)
        fprintf(stderr, "Running coarse BLAST\n\n");
    blast_coarse(args, dbsize);

    if (NULL == (query_file = fopen(args->args[1], "r"))) {
        fprintf(stderr, "fopen: 'fopen %s' failed: %s\n",
                args->args[1], strerror(errno));
        exit(1);
    }

    queries = ds_vector_create();
    query = fasta_read_next(query_file, "");
    while (query) {
        ds_vector_append(queries, (void *)query);
        query = fasta_read_next(query_file, "");
    }

    fclose(query_file);

    if (!search_flags.hide_progress)
        fprintf(stderr, "Processing coarse BLAST hits for fine BLAST\n\n");
    if (search_flags.show_hit_info)
        if (NULL == (test_hits_file = fopen("CaBLAST_hits.txt", "w"))) {
            fprintf(stderr, "fopen: 'fopen %s' failed: %s\n",
                    "CaBLAST_hits.txt", strerror(errno));
            exit(1);
        }

    //Parse the XML file generated from coarse BLAST and get its iterations.
    doc = xmlReadFile("CaBLAST_temp_blast_results.xml", NULL, 0);
    if (doc == NULL) {
        fprintf(stderr, "Could not parse CaBLAST_temp_blast_results.xml\n");
        return 0;
    }
    root = xmlDocGetRootElement(doc);
    iterations = get_blast_iterations(root);

    for (i = 0; i < iterations->size; i++) {
        if (!search_flags.hide_progress) {
            int32_t digits_full = floor(log10((double)iterations->size)),
                    digits_i    = floor(log10((double)i)), spaces;
            char *bar = progress_bar(i, iterations->size);
            spaces = digits_full - digits_i;
            fprintf(stderr, "\r");
            fprintf(stderr, "iteration: %d/%d", i+1, iterations->size);
            for (j = 0; j < spaces; j++)
                putc(' ', stderr);
            fprintf(stderr, " %s ", bar);
            free(bar);
        }

        /*Expand any BLAST hits we got from the current query sequence during
          coarse BLAST.*/
        expanded_hits = expand_blast_hits(iterations, i, db);

        for (j = 0; j < expanded_hits->size; j++)
            ds_vector_append(oseqs, ds_vector_get(expanded_hits, j));
        ds_vector_free_no_data(expanded_hits);
    }

    write_fine_fasta(oseqs);

    for (i = 0; i < oseqs->size; i++)
        cb_hit_expansion_free(
          (struct cb_hit_expansion *)ds_vector_get(oseqs, i));
    ds_vector_free_no_data(oseqs);

    blast_fine(args, dbsize);

    if (!search_flags.hide_progress)
        fprintf(stderr, "\n"); //Make a newline after the progress bar

    for (i = 0; i < queries->size; i++)
        fasta_free_seq((struct fasta_seq *)ds_vector_get(queries, i));
    ds_vector_free_no_data(queries);

    if (search_flags.show_hit_info)
        fclose(test_hits_file);

    //Free the XML data and expanded hits
    for (i = 0; i < iterations->size; i++) {
        struct DSVector *iteration =
          (struct DSVector *)ds_vector_get(iterations, i);
        for (j = 0; j < iteration->size; j++) {
            struct hit *h = (struct hit *)ds_vector_get(iteration, j);
            ds_vector_free(h->hsps);
            free(h);
        }
    }
    ds_vector_free_no_data(iterations);

    cb_database_r_free(db);
    xmlFreeDoc(doc);

    /*Free the coarse BLAST results file if the --no-cleanup flag is not being
      used.*/
    if (!search_flags.no_cleanup)
        system("rm CaBLAST_temp_blast_results.xml");

    opt_args_free(args);
    opt_config_free(conf);

    return 0;
}