Exemplo n.º 1
0
static boolean continue_traversing(pathStep * current_step,
                                   pathStep * next_step,
                                   pathStep * reverse_step, Path * temp_path,
                                   dBGraph * db_graph)
{
	pathStep first;
	
	boolean cont;
    cont = current_step->label != Undefined;
    //cont = cont && db_node_has_precisely_one_edge_all_colours(current_step->node, current_step->orientation, &n);
    path_get_step_at_index(0, &first, temp_path);
    int n_fwd, n_rev;
    /* We don't do these checks for the first node - in case it's a Y node */
    if(temp_path->length > 1) {
        if (db_node_check_for_any_flag(next_step->node, next_step->orientation == forward? VISITED_FORWARD:VISITED_REVERSE)) {
            cont = false;
        }
        
        /* Check for a cycle - as this is a perfect path, we only need to check the first node. If we come
           back in at one of the other nodes, then it will result in two edges in one orientation */
        
        if (path_step_equals_without_label(&first, current_step)) {
            path_add_stop_reason(LAST, PATH_FLAG_IS_CYCLE, temp_path);
            cont = false;
        }
        /* Now check for more than one edge in either direction */
        n_fwd = db_node_edges_count_all_colours(current_step->node, current_step->orientation);
        n_rev = db_node_edges_count_all_colours(current_step->node, opposite_orientation(current_step->orientation));
        
        if (n_fwd == 0) {
            path_add_stop_reason(LAST, PATH_FLAG_STOP_BLUNT_END, temp_path);
            cont = false;
        }
        if (n_fwd > 1) {
            path_add_stop_reason(LAST, PATH_FLAG_DIVERGING_PATHS, temp_path);
            cont = false;
        }
        if (n_rev > 1) {
            path_add_stop_reason(LAST, PATH_FLAG_CONVERGING_PATHS, temp_path);
            cont = false;
        }
        if (!path_has_space(temp_path)) {
            path_add_stop_reason(LAST, PATH_FLAG_LONGER_THAN_BUFFER, temp_path);
            cont = false;
        }      
    }
                                                  
    
    
	return cont;
}
Exemplo n.º 2
0
/*----------------------------------------------------------------------*
 * Function:                                                            *
 * Purpose:                                                             *
 * Params:                                                              *
 * Returns:                                                             *
 *----------------------------------------------------------------------*/
static boolean coverage_walk_continue_traversing(pathStep * current_step,
                                                 pathStep * next_step,
                                                 pathStep * reverse_step,
                                                 Path * temp_path,
                                                 dBGraph * db_graph)
{
	pathStep first;
	
	boolean cont;
    cont = current_step->label != Undefined;
    
    /* We don't do these checks for the first node - in case it's a Y node */
    if(temp_path->length > 1) {
        /* Check for a cycle - as this is a perfect path, we only need to check the first node. If we come
           back in at one of the other nodes, then it will result in two edges in one orientation */
        path_get_step_at_index(0, &first, temp_path);
        if (path_step_equals_without_label(&first, current_step)) {
            //char seq[1024];
            //binary_kmer_to_seq(&(current_step->node->kmer), db_graph->kmer_size, seq);
            //log_printf("  Stopped for cycle at %s\n", seq);
            path_add_stop_reason(LAST, PATH_FLAG_IS_CYCLE, temp_path);
            cont = false;
        }
        
        /* Check for visited flag */
        if (db_node_check_for_any_flag(next_step->node, next_step->orientation == forward? VISITED_FORWARD:VISITED_REVERSE)) {
            cont = false;
        }
        
        /* Now check for one or more edges moving forward */
        if (db_node_edges_count_all_colours(current_step->node, current_step->orientation) == 0) {
            //char seq[1024];
            //binary_kmer_to_seq(&(current_step->node->kmer), db_graph->kmer_size, seq);
            //log_printf("  Stopped for blunt end at %s\n", seq);
            path_add_stop_reason(LAST, PATH_FLAG_STOP_BLUNT_END, temp_path);
            cont = false;
        }
        
        /* Check path has space */
        if (!path_has_space(temp_path)) {
            //char seq[1024];
            //binary_kmer_to_seq(&(current_step->node->kmer), db_graph->kmer_size, seq);
            //log_printf("  Stopped for longer than buffer at %s\n", seq);
            path_add_stop_reason(LAST, PATH_FLAG_LONGER_THAN_BUFFER, temp_path);
            cont = false;
        }
    }
    
	return cont;
}
Exemplo n.º 3
0
static boolean continue_traversing(pathStep * current_step,
								   pathStep * next_step,
								   pathStep * reverse_step, Path * temp_path,
								   dBGraph * db_graph)
{
	pathStep first;
	
	boolean cont = true;
    cont = current_step->label != Undefined;
    //cont = cont && db_node_has_precisely_one_edge(current_step->node, current_step->orientation, &n);
    path_get_step_at_index(0, &first, temp_path);
    int n_fwd, n_rev;
    /* We don't do these checks for the first node - in case it's a Y node */
    if(temp_path->length > 1) {
        if (db_node_check_for_any_flag(next_step->node, next_step->orientation == forward? VISITED_FORWARD:VISITED_REVERSE)) {
            cont = false;
        }
        
        if (path_step_equals_without_label(&first, current_step) || path_has_in_step(next_step, temp_path)) {
            path_add_stop_reason(LAST, PATH_FLAG_IS_CYCLE, temp_path);
            cont = false;
        }
        /* Now check for more than one edge in either direction */
        n_fwd = db_node_edges_count_all_colours(current_step->node, current_step->orientation);
        n_rev = db_node_edges_count_all_colours(current_step->node, opposite_orientation(current_step->orientation));
        
        if (n_fwd == 0) {
            path_add_stop_reason(LAST, PATH_FLAG_STOP_BLUNT_END, temp_path);
            cont = false;
        }
       
    }
    
    if(path_get_length(temp_path) >= path_get_limit(temp_path)){
        cont = false;
        path_add_stop_reason(LAST, PATH_FLAG_LONGER_THAN_BUFFER, temp_path);
    }
    
    if(temp_path->in_nodes_count > db_graph->max_double_y_complexity && temp_path->out_nodes_count > db_graph->max_double_y_complexity){
        cont = false;
        path_add_stop_reason(LAST, PATH_TOO_COMPEX, temp_path);
    }
    
    
	return cont;
}
Exemplo n.º 4
0
void metacortex_find_subgraphs(dBGraph* graph, char* consensus_contigs_filename, int min_subgraph_kmers)
{
    SubGraphInfo* sub_graphs;
    FILE* fp;
    Path *path_fwd = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size);
    Path *path_rev = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size);
    Path *final_path = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size);
    char seq[256];
    char analysis_filename[strlen(consensus_contigs_filename) + 10];
    long int total_nodes = 0;
    int n_seeds = 0;
    int i;
    
    sprintf(analysis_filename, "%s.analysis", consensus_contigs_filename);
    log_and_screen_printf("Running metacortex subgraph analysis...\n");
    log_and_screen_printf("          Contig file: %s\n", consensus_contigs_filename);
    log_and_screen_printf("        Analysis file: %s\n", analysis_filename);
    log_and_screen_printf("Minimum subgraph size: %i\n", min_subgraph_kmers);
    
    /* Initialise temporaray path array buffers */
    path_array_initialise_buffers(graph->kmer_size);
    
    /* Create a list of subgraphs */
    log_and_screen_printf("Allocating %d Mb to store subgraph information (max %d seeds)...\n", ((MAX_SEEDS * sizeof(SubGraphInfo)) / 1024) / 1024, MAX_SEEDS);
    sub_graphs = calloc(MAX_SEEDS, sizeof(SubGraphInfo));
    if (!sub_graphs) {
        log_and_screen_printf("ERROR: Can't get memory for subgraphs\n");
        exit(-1);
    }

    /* Open the analysis file */
    fp = fopen(analysis_filename, "w");
    if (!fp) {
        log_and_screen_printf("ERROR: Can't open analysis file.\n");
        exit(-1);
    }
        
    /* For each node, if it's not pruned or visited, try and grow a graph */
    void explore_node(dBNode * node) {
        if (node == NULL) {
            log_and_screen_printf("Error: NULL node passed to explore_node.\n");
            exit(-1);
        }
        
        if (db_node_check_for_any_flag(node, PRUNED | VISITED) == false) {
            int nodes_in_graph;
            
            /* Grow graph from this node, returning the 'best' (highest coverage) node to store as seed point */
            nodes_in_graph = grow_graph_from_node(node, &(sub_graphs[n_seeds].seed_node), graph);
            total_nodes += nodes_in_graph;
            
            if (sub_graphs[n_seeds].seed_node == NULL) {
                printf("ERROR: Seed node is NULL, nodes in graph is %d\n", nodes_in_graph);
            } else {
                /* Write data to analysis file */
                binary_kmer_to_seq(&(node->kmer), graph->kmer_size, seq);            
                fprintf(fp, "%i\t%i\t%ld\t%s\t", n_seeds, nodes_in_graph, total_nodes, seq);
                binary_kmer_to_seq(&(sub_graphs[n_seeds].seed_node->kmer), graph->kmer_size, seq);
                fprintf(fp, "%s\n", seq);

                /* Store nodes in this subgraph */
                sub_graphs[n_seeds].graph_size = nodes_in_graph;
                n_seeds++;
                
                /* Check we've not run out of seed storage - in future, this should dynamically allocate */
                if (n_seeds == MAX_SEEDS) {
                    log_and_screen_printf("Error: MAX_SEEDS exceeded. Quitting.\n");
                    exit(-1);
                }
            }
        }
    }
    
    /* Traverse each node... */
    log_and_screen_printf("Finding subgraphs...\n");
    hash_table_traverse(&explore_node, graph);
    log_and_screen_printf("Finished. Total: %ld\n", total_nodes);
    fclose(fp);    
    
    /* Open consensus contigs file */
    fp = fopen(consensus_contigs_filename, "w");
    if (!fp) {
        log_and_screen_printf("ERROR: Can't open contig file.\n");
        exit(-1);
    }
    
    /* Now go through all the seed points and generate the consensus contigs by walking forward and backward from the seed */
    db_graph_reset_flags(graph);    
    log_and_screen_printf("Outputting contigs...\n");
	log_progress_bar(0);
	long long one_percent = n_seeds/100;
    int percent;
    
    if (one_percent < 1) {
        one_percent = 1;
    }
    
    for (i=0; i<n_seeds; i++) {
        if (i % one_percent == 0) {
            percent = (100 * i) / n_seeds;
            log_progress_bar(percent);
        } 
        
        //log_printf("Graph %i\n", i);           
        if (sub_graphs[i].graph_size >= min_subgraph_kmers) {            
            binary_kmer_to_seq(&(sub_graphs[i].seed_node->kmer), graph->kmer_size, seq);
            coverage_walk_get_path(sub_graphs[i].seed_node, forward, NULL, graph, path_fwd);
            coverage_walk_get_path(sub_graphs[i].seed_node, reverse, NULL, graph, path_rev);
            path_reverse(path_fwd, final_path);
            path_append(final_path, path_rev);
            final_path->id = i;
            path_to_fasta(final_path, fp);
            //log_printf("  Seed %s\tFwd path length %i\tRev path length %i\tFinal path length %i\n", seq, path_fwd->length, path_rev->length, final_path->length);
            path_reset(path_fwd);
            perfect_path_get_path(sub_graphs[i].seed_node, forward, &db_node_action_do_nothing, graph, path_fwd);
            //log_printf("\t\tPerfect path fwd length %i\n", path_fwd->length);
            path_reset(path_rev);
            path_reset(final_path);
        } else {
            log_printf("  Number of nodes (%i} too small. Not outputting contig.\n", sub_graphs[i].graph_size);
        }
        
    }
	log_progress_bar(100);
	printf("\n");
    log_and_screen_printf("Finished contig output.\n");    
    fclose(fp);
    
    free(sub_graphs);
}