Example #1
0
static pathStep *get_first_step(pathStep * first_step, dBGraph * db_graph)
{
	//printf("|");
	WalkingFunctions wf;
	pathStep tmp_step;
	Nucleotide n;

	
    path_step_assign(&tmp_step, first_step);
	if(tmp_step.orientation == undefined){
		tmp_step.orientation = reverse; //This will allow, when reversing the walk, to walk in the forward direction, which "naturally" appends the bases to the end of the path. 
	}else{
		return first_step;
	}

	/* Check if forward node has one edge, if not that reverse node does.
       While we're at it, assign label to n. */
	if (db_node_has_precisely_one_edge_all_colours(tmp_step.node, tmp_step.orientation, &n)) {	
	} else if (db_node_has_precisely_one_edge_all_colours(tmp_step.node, opposite_orientation(tmp_step.orientation), &n)) {
        tmp_step.orientation = opposite_orientation(tmp_step.orientation);
    } else {
 
        return NULL;
    }    
	tmp_step.label = n;

    /* If it's a blunt node in the opposite orientation, then we can't get a better starting step */
    if (db_node_is_blunt_end_all_colours(first_step->node, opposite_orientation(tmp_step.orientation))) {
		path_step_assign(first_step, &tmp_step);
		return first_step;
	}
    
    //printf("#");
    /* Otherwise, do a perfect path walk to try and get a better start step */    
	//Path * temp_path = path_new(limit, db_graph->kmer_size);
	Path *temp_path = path_get_buffer_path();
	temp_path->id = -2;
	
	
	db_node_has_precisely_one_edge_all_colours(tmp_step.node, tmp_step.orientation, &tmp_step.label);
	wf.get_starting_step = &get_first_step_identity;
	wf.continue_backwards = &clean_path;
	wf.post_step_action = &path_step_do_nothing;
	wf.pre_step_action = &path_step_do_nothing;
	wf.step_action = &path_step_do_nothing;
	wf.continue_traversing = &continue_traversing;
	wf.get_next_step = &get_next_step;
    wf.output_callback = &path_do_nothing;
    
    wf.node_callbacks.used = 0;
	wf.step_actions.used = 0;
    wf.path_callbacks.used = 0 ;
    
	
    db_graph_add_path_callback_with_args(&wf, &store_last, (void *) first_step);
    
	
	if (DEBUG) {
		printf("[get_first_step]Orientation: %s\n", tmp_step.orientation == forward ? "forward" : "reverse");
		printf("[get_first_step] (%d):", path_get_edges_count(temp_path));
		path_step_print(first_step, db_graph->kmer_size, stdout);
		printf("\n");
    }
    
	db_graph_generic_walk(&tmp_step, temp_path, &wf, db_graph);
	
    path_free_buffer_path(temp_path);
	return first_step;
}
Example #2
0
/*----------------------------------------------------------------------*
 * Function:                                                            *
 * Purpose:                                                             *
 * Params:                                                              *
 * Returns:                                                             *
 *----------------------------------------------------------------------*/
int grow_graph_from_node(dBNode* start_node, dBNode** best_node, dBGraph* graph)
{                         
    Queue* nodes_to_walk;
    dBNode* node;
    int orientation;
    int depth;
    int current_graph_size = 0;
    int best_coverage = 0;
    int best_edges = 0;
    
    *best_node = 0;
    
    // Nucleotide iterator, used to walk all possible paths from a node
    void walk_if_exists(Nucleotide n) {
        //if (debug) printf("Trying nucleotide %i\n", n);
        
        // If there is an edge in any colour for this nucleotide...
        if (db_node_edge_exist_any_colour(node, n, orientation)) {
            
            //if (debug) printf("  Edge exists\n");
            
            // Get first node along this edge and check we've not already visited it...
            Orientation next_orientation;
            Nucleotide reverse_nucleotide;
            dBNode * next_node;
            next_node = db_graph_get_next_node(node, orientation, &next_orientation, n, &reverse_nucleotide, graph);
            if (!next_node) {
                log_and_screen_printf("Error: Something went wrong with db_graph_get_next_node\n");
                exit(-1);
            }
            
            // If not already visited the first node, walk it...
            if (!db_node_check_flag_visited(next_node)) {
                pathStep first_step;
                Path * new_path;
                dBNode* end_node; 
                int i = 0;
                                
                // Get path				
                first_step.node = node;
                first_step.orientation = orientation;
                first_step.label = n;
                new_path = path_new(MAX_EXPLORE_NODES, graph->kmer_size);
                if (!new_path) {
                    log_and_screen_printf("ERROR: Not enough memory to allocate new path.\n");
                    exit(-1);
                }
                
                db_graph_get_perfect_path_with_first_edge_all_colours(&first_step, &db_node_action_do_nothing, new_path, graph);
                
                // Add end node to list of nodes to visit
                end_node = new_path->nodes[new_path->length-1];
                if (!db_node_check_flag_visited(end_node)) {
                    if (!db_node_is_blunt_end_all_colours(end_node, new_path->orientations[new_path->length-1])) {
                        if (queue_push_node(nodes_to_walk, end_node, depth+1) == NULL) {
                            log_and_screen_printf("Queue too large. Ending.\n");
                            exit(1);
                        }                        
                    }
                }
                
                // Now go through all nodes, look for best and mark all as visited
                for (i=0; i<new_path->length; i++) {
                    if (!db_node_check_flag_visited(new_path->nodes[i])) {
                        int this_coverage = element_get_coverage_all_colours(new_path->nodes[i]);
                        int this_edges = db_node_edges_count_all_colours(new_path->nodes[i], forward) + db_node_edges_count_all_colours(new_path->nodes[i], reverse);
                        
                        if ((best_node == 0) ||
                            (this_coverage > best_coverage) ||
                            ((this_coverage == best_coverage) && (this_edges < best_edges)))
                        {
                            best_coverage = this_coverage;
                            best_edges = this_edges;
                            *best_node = new_path->nodes[i];                            
                        }
                        
                        db_node_action_set_flag_visited(new_path->nodes[i]);
                        current_graph_size++;                        
                    }
                }
                
                // Clean up
                path_destroy(new_path);
            }
        }
    }
    
    // Start a queue of nodes to walk
    //log_and_screen_printf("Allocating %d Mb to store queue information (max %d nodes, when full each node could be %d)...\n", ((METACORTEX_QUEUE_SIZE * sizeof(QueueItem*)) / 1024) / 1024, METACORTEX_QUEUE_SIZE, sizeof(QueueItem));
    nodes_to_walk = queue_new(METACORTEX_QUEUE_SIZE);
    if (!nodes_to_walk) {
        log_and_screen_printf("Couldn't get memory for node queue.\n");
        exit(-1);
    }
    
    // Add start node to list of nodes to visit
    if (queue_push_node(nodes_to_walk, start_node, 0) == NULL) {
        log_and_screen_printf("Queue too large. Ending.\n");
        exit(-1);        
    }
    
    if (!db_node_check_flag_visited(start_node)) {
        db_node_action_set_flag_visited(start_node);
        current_graph_size++;
    }
    
    // Now keep visiting nodes and walking paths
    while (nodes_to_walk->number_of_items > 0) {
        // Take top node from list
        node = queue_pop_node(nodes_to_walk, &depth);
        
        // Look at all paths out from here
        orientation = forward;
        nucleotide_iterator(&walk_if_exists);
        orientation = reverse;
        nucleotide_iterator(&walk_if_exists);				
    }
    
    queue_free(nodes_to_walk);
    
    // If we didn't find a start node, presumably this is a singleton?
    if (*best_node == 0) {
        printf("Note: didn't find a best node, setting to start node\n");
        *best_node = start_node;
    }
    
    return current_graph_size;
}