static pathStep *get_first_step(pathStep * first_step, dBGraph * db_graph) { //printf("|"); WalkingFunctions wf; pathStep tmp_step; Nucleotide n; path_step_assign(&tmp_step, first_step); if(tmp_step.orientation == undefined){ tmp_step.orientation = reverse; //This will allow, when reversing the walk, to walk in the forward direction, which "naturally" appends the bases to the end of the path. }else{ return first_step; } /* Check if forward node has one edge, if not that reverse node does. While we're at it, assign label to n. */ if (db_node_has_precisely_one_edge_all_colours(tmp_step.node, tmp_step.orientation, &n)) { } else if (db_node_has_precisely_one_edge_all_colours(tmp_step.node, opposite_orientation(tmp_step.orientation), &n)) { tmp_step.orientation = opposite_orientation(tmp_step.orientation); } else { return NULL; } tmp_step.label = n; /* If it's a blunt node in the opposite orientation, then we can't get a better starting step */ if (db_node_is_blunt_end_all_colours(first_step->node, opposite_orientation(tmp_step.orientation))) { path_step_assign(first_step, &tmp_step); return first_step; } //printf("#"); /* Otherwise, do a perfect path walk to try and get a better start step */ //Path * temp_path = path_new(limit, db_graph->kmer_size); Path *temp_path = path_get_buffer_path(); temp_path->id = -2; db_node_has_precisely_one_edge_all_colours(tmp_step.node, tmp_step.orientation, &tmp_step.label); wf.get_starting_step = &get_first_step_identity; wf.continue_backwards = &clean_path; wf.post_step_action = &path_step_do_nothing; wf.pre_step_action = &path_step_do_nothing; wf.step_action = &path_step_do_nothing; wf.continue_traversing = &continue_traversing; wf.get_next_step = &get_next_step; wf.output_callback = &path_do_nothing; wf.node_callbacks.used = 0; wf.step_actions.used = 0; wf.path_callbacks.used = 0 ; db_graph_add_path_callback_with_args(&wf, &store_last, (void *) first_step); if (DEBUG) { printf("[get_first_step]Orientation: %s\n", tmp_step.orientation == forward ? "forward" : "reverse"); printf("[get_first_step] (%d):", path_get_edges_count(temp_path)); path_step_print(first_step, db_graph->kmer_size, stdout); printf("\n"); } db_graph_generic_walk(&tmp_step, temp_path, &wf, db_graph); path_free_buffer_path(temp_path); return first_step; }
/*----------------------------------------------------------------------* * Function: * * Purpose: * * Params: * * Returns: * *----------------------------------------------------------------------*/ int grow_graph_from_node(dBNode* start_node, dBNode** best_node, dBGraph* graph) { Queue* nodes_to_walk; dBNode* node; int orientation; int depth; int current_graph_size = 0; int best_coverage = 0; int best_edges = 0; *best_node = 0; // Nucleotide iterator, used to walk all possible paths from a node void walk_if_exists(Nucleotide n) { //if (debug) printf("Trying nucleotide %i\n", n); // If there is an edge in any colour for this nucleotide... if (db_node_edge_exist_any_colour(node, n, orientation)) { //if (debug) printf(" Edge exists\n"); // Get first node along this edge and check we've not already visited it... Orientation next_orientation; Nucleotide reverse_nucleotide; dBNode * next_node; next_node = db_graph_get_next_node(node, orientation, &next_orientation, n, &reverse_nucleotide, graph); if (!next_node) { log_and_screen_printf("Error: Something went wrong with db_graph_get_next_node\n"); exit(-1); } // If not already visited the first node, walk it... if (!db_node_check_flag_visited(next_node)) { pathStep first_step; Path * new_path; dBNode* end_node; int i = 0; // Get path first_step.node = node; first_step.orientation = orientation; first_step.label = n; new_path = path_new(MAX_EXPLORE_NODES, graph->kmer_size); if (!new_path) { log_and_screen_printf("ERROR: Not enough memory to allocate new path.\n"); exit(-1); } db_graph_get_perfect_path_with_first_edge_all_colours(&first_step, &db_node_action_do_nothing, new_path, graph); // Add end node to list of nodes to visit end_node = new_path->nodes[new_path->length-1]; if (!db_node_check_flag_visited(end_node)) { if (!db_node_is_blunt_end_all_colours(end_node, new_path->orientations[new_path->length-1])) { if (queue_push_node(nodes_to_walk, end_node, depth+1) == NULL) { log_and_screen_printf("Queue too large. Ending.\n"); exit(1); } } } // Now go through all nodes, look for best and mark all as visited for (i=0; i<new_path->length; i++) { if (!db_node_check_flag_visited(new_path->nodes[i])) { int this_coverage = element_get_coverage_all_colours(new_path->nodes[i]); int this_edges = db_node_edges_count_all_colours(new_path->nodes[i], forward) + db_node_edges_count_all_colours(new_path->nodes[i], reverse); if ((best_node == 0) || (this_coverage > best_coverage) || ((this_coverage == best_coverage) && (this_edges < best_edges))) { best_coverage = this_coverage; best_edges = this_edges; *best_node = new_path->nodes[i]; } db_node_action_set_flag_visited(new_path->nodes[i]); current_graph_size++; } } // Clean up path_destroy(new_path); } } } // Start a queue of nodes to walk //log_and_screen_printf("Allocating %d Mb to store queue information (max %d nodes, when full each node could be %d)...\n", ((METACORTEX_QUEUE_SIZE * sizeof(QueueItem*)) / 1024) / 1024, METACORTEX_QUEUE_SIZE, sizeof(QueueItem)); nodes_to_walk = queue_new(METACORTEX_QUEUE_SIZE); if (!nodes_to_walk) { log_and_screen_printf("Couldn't get memory for node queue.\n"); exit(-1); } // Add start node to list of nodes to visit if (queue_push_node(nodes_to_walk, start_node, 0) == NULL) { log_and_screen_printf("Queue too large. Ending.\n"); exit(-1); } if (!db_node_check_flag_visited(start_node)) { db_node_action_set_flag_visited(start_node); current_graph_size++; } // Now keep visiting nodes and walking paths while (nodes_to_walk->number_of_items > 0) { // Take top node from list node = queue_pop_node(nodes_to_walk, &depth); // Look at all paths out from here orientation = forward; nucleotide_iterator(&walk_if_exists); orientation = reverse; nucleotide_iterator(&walk_if_exists); } queue_free(nodes_to_walk); // If we didn't find a start node, presumably this is a singleton? if (*best_node == 0) { printf("Note: didn't find a best node, setting to start node\n"); *best_node = start_node; } return current_graph_size; }