Ejemplo n.º 1
0
static boolean continue_traversing(pathStep * current_step,
                                   pathStep * next_step,
                                   pathStep * reverse_step, Path * temp_path,
                                   dBGraph * db_graph)
{
	pathStep first;
	
	boolean cont;
    cont = current_step->label != Undefined;
    //cont = cont && db_node_has_precisely_one_edge_all_colours(current_step->node, current_step->orientation, &n);
    path_get_step_at_index(0, &first, temp_path);
    int n_fwd, n_rev;
    /* We don't do these checks for the first node - in case it's a Y node */
    if(temp_path->length > 1) {
        if (db_node_check_for_any_flag(next_step->node, next_step->orientation == forward? VISITED_FORWARD:VISITED_REVERSE)) {
            cont = false;
        }
        
        /* Check for a cycle - as this is a perfect path, we only need to check the first node. If we come
           back in at one of the other nodes, then it will result in two edges in one orientation */
        
        if (path_step_equals_without_label(&first, current_step)) {
            path_add_stop_reason(LAST, PATH_FLAG_IS_CYCLE, temp_path);
            cont = false;
        }
        /* Now check for more than one edge in either direction */
        n_fwd = db_node_edges_count_all_colours(current_step->node, current_step->orientation);
        n_rev = db_node_edges_count_all_colours(current_step->node, opposite_orientation(current_step->orientation));
        
        if (n_fwd == 0) {
            path_add_stop_reason(LAST, PATH_FLAG_STOP_BLUNT_END, temp_path);
            cont = false;
        }
        if (n_fwd > 1) {
            path_add_stop_reason(LAST, PATH_FLAG_DIVERGING_PATHS, temp_path);
            cont = false;
        }
        if (n_rev > 1) {
            path_add_stop_reason(LAST, PATH_FLAG_CONVERGING_PATHS, temp_path);
            cont = false;
        }
        if (!path_has_space(temp_path)) {
            path_add_stop_reason(LAST, PATH_FLAG_LONGER_THAN_BUFFER, temp_path);
            cont = false;
        }      
    }
                                                  
    
    
	return cont;
}
Ejemplo n.º 2
0
void perfect_path_base_callback(Path * p)
{
	pathStep p1, p2;
	path_get_last_step(&p1, p);
	path_get_step_at_index(0, &p2, p);
	
	//Remove the repeated node in a cycle
	if (path_is_cycle(p)) {
		
		if (path_step_equals
		    (&p1,
		     &p2)) {
				p1.label = Undefined;
				path_remove_last(p);
				path_add_node(&p1, p);
			}
	}
	
	int n_fwd = db_node_edges_count_all_colours(p2.node, p2.orientation);
    int n_rev = db_node_edges_count_all_colours(p2.node, opposite_orientation(p2.orientation));
    
    int n_fwd_f = db_node_edges_count_all_colours(p1.node, p1.orientation);
    int n_rev_f = db_node_edges_count_all_colours(p1.node, opposite_orientation(p1.orientation));
    
        
    if (n_fwd == 0) {
        path_add_stop_reason(FIRST, PATH_FLAG_STOP_BLUNT_END, p);
    }
    if (n_fwd > 1) {
        path_add_stop_reason(FIRST, PATH_FLAG_DIVERGING_PATHS, p);
    }
    
    if (n_rev > 1) {
        path_add_stop_reason(FIRST, PATH_FLAG_CONVERGING_PATHS, p);
    }
        
	if(n_rev  > 1 && n_fwd_f > 1 ){
		path_add_stop_reason(FIRST, PATH_FLAG_IS_DOUBLE_Y,p);
	}
    
    if(n_rev_f > 1){
        path_add_stop_reason(LAST, PATH_FLAG_CONVERGING_PATHS, p);
    }
    
    if(n_fwd_f > 1){
        path_add_stop_reason(LAST, PATH_FLAG_DIVERGING_PATHS, p);
    }
	
	
}
Ejemplo n.º 3
0
static boolean continue_traversing(pathStep * current_step,
								   pathStep * next_step,
								   pathStep * reverse_step, Path * temp_path,
								   dBGraph * db_graph)
{
	pathStep first;
	
	boolean cont = true;
    cont = current_step->label != Undefined;
    //cont = cont && db_node_has_precisely_one_edge(current_step->node, current_step->orientation, &n);
    path_get_step_at_index(0, &first, temp_path);
    int n_fwd, n_rev;
    /* We don't do these checks for the first node - in case it's a Y node */
    if(temp_path->length > 1) {
        if (db_node_check_for_any_flag(next_step->node, next_step->orientation == forward? VISITED_FORWARD:VISITED_REVERSE)) {
            cont = false;
        }
        
        if (path_step_equals_without_label(&first, current_step) || path_has_in_step(next_step, temp_path)) {
            path_add_stop_reason(LAST, PATH_FLAG_IS_CYCLE, temp_path);
            cont = false;
        }
        /* Now check for more than one edge in either direction */
        n_fwd = db_node_edges_count_all_colours(current_step->node, current_step->orientation);
        n_rev = db_node_edges_count_all_colours(current_step->node, opposite_orientation(current_step->orientation));
        
        if (n_fwd == 0) {
            path_add_stop_reason(LAST, PATH_FLAG_STOP_BLUNT_END, temp_path);
            cont = false;
        }
       
    }
    
    if(path_get_length(temp_path) >= path_get_limit(temp_path)){
        cont = false;
        path_add_stop_reason(LAST, PATH_FLAG_LONGER_THAN_BUFFER, temp_path);
    }
    
    if(temp_path->in_nodes_count > db_graph->max_double_y_complexity && temp_path->out_nodes_count > db_graph->max_double_y_complexity){
        cont = false;
        path_add_stop_reason(LAST, PATH_TOO_COMPEX, temp_path);
    }
    
    
	return cont;
}
Ejemplo n.º 4
0
/*----------------------------------------------------------------------*
 * Function:                                                            *
 * Purpose:                                                             *
 * Params:                                                              *
 * Returns:                                                             *
 *----------------------------------------------------------------------*/
static boolean coverage_walk_continue_traversing(pathStep * current_step,
                                                 pathStep * next_step,
                                                 pathStep * reverse_step,
                                                 Path * temp_path,
                                                 dBGraph * db_graph)
{
	pathStep first;
	
	boolean cont;
    cont = current_step->label != Undefined;
    
    /* We don't do these checks for the first node - in case it's a Y node */
    if(temp_path->length > 1) {
        /* Check for a cycle - as this is a perfect path, we only need to check the first node. If we come
           back in at one of the other nodes, then it will result in two edges in one orientation */
        path_get_step_at_index(0, &first, temp_path);
        if (path_step_equals_without_label(&first, current_step)) {
            //char seq[1024];
            //binary_kmer_to_seq(&(current_step->node->kmer), db_graph->kmer_size, seq);
            //log_printf("  Stopped for cycle at %s\n", seq);
            path_add_stop_reason(LAST, PATH_FLAG_IS_CYCLE, temp_path);
            cont = false;
        }
        
        /* Check for visited flag */
        if (db_node_check_for_any_flag(next_step->node, next_step->orientation == forward? VISITED_FORWARD:VISITED_REVERSE)) {
            cont = false;
        }
        
        /* Now check for one or more edges moving forward */
        if (db_node_edges_count_all_colours(current_step->node, current_step->orientation) == 0) {
            //char seq[1024];
            //binary_kmer_to_seq(&(current_step->node->kmer), db_graph->kmer_size, seq);
            //log_printf("  Stopped for blunt end at %s\n", seq);
            path_add_stop_reason(LAST, PATH_FLAG_STOP_BLUNT_END, temp_path);
            cont = false;
        }
        
        /* Check path has space */
        if (!path_has_space(temp_path)) {
            //char seq[1024];
            //binary_kmer_to_seq(&(current_step->node->kmer), db_graph->kmer_size, seq);
            //log_printf("  Stopped for longer than buffer at %s\n", seq);
            path_add_stop_reason(LAST, PATH_FLAG_LONGER_THAN_BUFFER, temp_path);
            cont = false;
        }
    }
    
	return cont;
}
Ejemplo n.º 5
0
/*----------------------------------------------------------------------*
 * Function:                                                            *
 * Purpose:                                                             *
 * Params:                                                              *
 * Returns:                                                             *
 *----------------------------------------------------------------------*/
static pathStep *coverage_walk_get_next_step(pathStep * current_step, pathStep * next_step, pathStep * reverse_step, dBGraph * db_graph)
{
	db_graph_get_next_step(current_step, next_step, reverse_step, db_graph);
    
    assert(next_step != NULL);
    
    next_step->label = Undefined;

    if (db_node_edges_count_all_colours(next_step->node, next_step->orientation) >= 1) {
        next_step->label = coverage_walk_get_best_label(next_step->node, next_step->orientation, db_graph);
    } else {
        //char seq[1024];
        //binary_kmer_to_seq(&(next_step->node->kmer), db_graph->kmer_size, seq);
        //log_printf("  No edge at %s orientation %s\n", seq, next_step->orientation == forward ? "Fwd":"Rev");
    }
	
	return next_step;
}
Ejemplo n.º 6
0
/*----------------------------------------------------------------------*
 * Function:                                                            *
 * Purpose:                                                             *
 * Params:                                                              *
 * Returns:                                                             *
 *----------------------------------------------------------------------*/
int grow_graph_from_node(dBNode* start_node, dBNode** best_node, dBGraph* graph)
{                         
    Queue* nodes_to_walk;
    dBNode* node;
    int orientation;
    int depth;
    int current_graph_size = 0;
    int best_coverage = 0;
    int best_edges = 0;
    
    *best_node = 0;
    
    // Nucleotide iterator, used to walk all possible paths from a node
    void walk_if_exists(Nucleotide n) {
        //if (debug) printf("Trying nucleotide %i\n", n);
        
        // If there is an edge in any colour for this nucleotide...
        if (db_node_edge_exist_any_colour(node, n, orientation)) {
            
            //if (debug) printf("  Edge exists\n");
            
            // Get first node along this edge and check we've not already visited it...
            Orientation next_orientation;
            Nucleotide reverse_nucleotide;
            dBNode * next_node;
            next_node = db_graph_get_next_node(node, orientation, &next_orientation, n, &reverse_nucleotide, graph);
            if (!next_node) {
                log_and_screen_printf("Error: Something went wrong with db_graph_get_next_node\n");
                exit(-1);
            }
            
            // If not already visited the first node, walk it...
            if (!db_node_check_flag_visited(next_node)) {
                pathStep first_step;
                Path * new_path;
                dBNode* end_node; 
                int i = 0;
                                
                // Get path				
                first_step.node = node;
                first_step.orientation = orientation;
                first_step.label = n;
                new_path = path_new(MAX_EXPLORE_NODES, graph->kmer_size);
                if (!new_path) {
                    log_and_screen_printf("ERROR: Not enough memory to allocate new path.\n");
                    exit(-1);
                }
                
                db_graph_get_perfect_path_with_first_edge_all_colours(&first_step, &db_node_action_do_nothing, new_path, graph);
                
                // Add end node to list of nodes to visit
                end_node = new_path->nodes[new_path->length-1];
                if (!db_node_check_flag_visited(end_node)) {
                    if (!db_node_is_blunt_end_all_colours(end_node, new_path->orientations[new_path->length-1])) {
                        if (queue_push_node(nodes_to_walk, end_node, depth+1) == NULL) {
                            log_and_screen_printf("Queue too large. Ending.\n");
                            exit(1);
                        }                        
                    }
                }
                
                // Now go through all nodes, look for best and mark all as visited
                for (i=0; i<new_path->length; i++) {
                    if (!db_node_check_flag_visited(new_path->nodes[i])) {
                        int this_coverage = element_get_coverage_all_colours(new_path->nodes[i]);
                        int this_edges = db_node_edges_count_all_colours(new_path->nodes[i], forward) + db_node_edges_count_all_colours(new_path->nodes[i], reverse);
                        
                        if ((best_node == 0) ||
                            (this_coverage > best_coverage) ||
                            ((this_coverage == best_coverage) && (this_edges < best_edges)))
                        {
                            best_coverage = this_coverage;
                            best_edges = this_edges;
                            *best_node = new_path->nodes[i];                            
                        }
                        
                        db_node_action_set_flag_visited(new_path->nodes[i]);
                        current_graph_size++;                        
                    }
                }
                
                // Clean up
                path_destroy(new_path);
            }
        }
    }
    
    // Start a queue of nodes to walk
    //log_and_screen_printf("Allocating %d Mb to store queue information (max %d nodes, when full each node could be %d)...\n", ((METACORTEX_QUEUE_SIZE * sizeof(QueueItem*)) / 1024) / 1024, METACORTEX_QUEUE_SIZE, sizeof(QueueItem));
    nodes_to_walk = queue_new(METACORTEX_QUEUE_SIZE);
    if (!nodes_to_walk) {
        log_and_screen_printf("Couldn't get memory for node queue.\n");
        exit(-1);
    }
    
    // Add start node to list of nodes to visit
    if (queue_push_node(nodes_to_walk, start_node, 0) == NULL) {
        log_and_screen_printf("Queue too large. Ending.\n");
        exit(-1);        
    }
    
    if (!db_node_check_flag_visited(start_node)) {
        db_node_action_set_flag_visited(start_node);
        current_graph_size++;
    }
    
    // Now keep visiting nodes and walking paths
    while (nodes_to_walk->number_of_items > 0) {
        // Take top node from list
        node = queue_pop_node(nodes_to_walk, &depth);
        
        // Look at all paths out from here
        orientation = forward;
        nucleotide_iterator(&walk_if_exists);
        orientation = reverse;
        nucleotide_iterator(&walk_if_exists);				
    }
    
    queue_free(nodes_to_walk);
    
    // If we didn't find a start node, presumably this is a singleton?
    if (*best_node == 0) {
        printf("Note: didn't find a best node, setting to start node\n");
        *best_node = start_node;
    }
    
    return current_graph_size;
}