Beispiel #1
0
PathIndex::PathIndex(const xg::XG& index, const string& path_name, bool extract_sequence) {
    // Make sure the path is present
    assert(index.path_rank(path_name) != 0);
    
    if (extract_sequence) {
        // Constructor dispatch hack
        *this = PathIndex(index.path(path_name), index);
    } else {
        *this = PathIndex(index.path(path_name));
    }
}
Beispiel #2
0
void rectangle::simple_extend(thread_t& extension, xg::XG& graph, int delta_start = 0, int delta_end = 0) {
  if(extension.size() > 0) {
    xg::XG::ThreadMapping next_node = extension.back();
    int64_t next_side = graph.id_to_rank(next_node.node_id) * 2 + next_node.is_reverse;
    state.current_side = next_side;
  }
  state.range_start -= delta_start;
  state.range_end -= delta_end;
}
Beispiel #3
0
bool check_for_edges(int64_t old_node_id, bool old_node_is_reverse, int64_t new_node_id,
          bool new_node_is_reverse, xg::XG& index) {
  // What edge are we following
  Edge edge_taken = make_edge(old_node_id, old_node_is_reverse, new_node_id, new_node_is_reverse);

  // Make sure we find it
  bool edge_found = false;

  vector<Edge> edges = new_node_is_reverse ? index.edges_on_end(new_node_id) : index.edges_on_start(new_node_id);

  for(auto& edge : edges) {
    // Look at every edge in order.
    if(edges_equivalent(edge, edge_taken)) {
      // If we found the edge we're taking, break.
      edge_found = true;
      break;
    }
  }
  if(edge_found == false) {  cerr << "did not find edge between" << old_node_id << " and " << new_node_id << endl;}
  return edge_found;
}
Beispiel #4
0
void haplo_d::seeded_log_calculate_Is(xg::XG& graph) {
  // Things which were calculated in the constructor:
  // -- A's
  // -- J for the top continuing and any new rectangle
  // -- I for any new rectangle
  // ostream& stream = cout;
  vector<Edge> edges_out;
  vector<Edge> edges_in;
  for(int b = 1; b < cs.size(); b++) {
    vector<rectangle>& prevAs = cs[b-1].S;
    vector<rectangle>& currAs = cs[b].S;
    // xg::XG::ThreadMapping lastnode;
    // if(cs[b-1].bridge.size() == 0) {
    //   lastnode = cs[b-1].get_node();
    // } else {
    //   lastnode = cs[b-1].bridge.back();
    // }
    // edges_out = lastnode.is_reverse ? graph.edges_on_start(lastnode.node_id) : graph.edges_on_end(lastnode.node_id);
    edges_out = cs[b-1].get_last_node().is_reverse ? graph.edges_on_start(cs[b-1].get_last_node().node_id) : graph.edges_on_end(cs[b-1].get_last_node().node_id);
    edges_in = cs[b].get_node().is_reverse ? graph.edges_on_end(cs[b].get_node().node_id) : graph.edges_on_start(cs[b].get_node().node_id);
    bool new_threads = (prevAs[0].next == 1);
    // make sure that there is at least one rectangle here
    if(prevAs.size() == 0) {
      cerr << "[vg haplo error] no consistent haplotypes at node " << cs[b-1].get_node().node_id << endl;
    } else if(prevAs.size() == 1) {
      currAs.back().I = currAs.back().J;
      // currAs has size at most 2
      if(currAs.size() == 2) {
        currAs[0].I = currAs[0].J - currAs[1].J;
      }
    } else if(prevAs.size() >= 2) {
      // We're going to have to extend, so let's grab the next node
      XG::ThreadMapping next_node = cs[b].get_node();
      // Let's also grab the nodes which we'll skip over between this and the last node
      thread_t extension = cs[b-1].bridge;
      extension.push_back(next_node);
      // if J = 0 for a rectangle, then J must be 0 for all older rectangles
      if(currAs.back().J == 0) {
        currAs.pop_back();
      } else {
        int deltaJ = prevAs[0].J - currAs[prevAs[0].next].J;
        if(deltaJ == 0) {
          // cerr << b << ", deltaJ = 0" << endl;
          currAs[prevAs[0].next].I = prevAs[0].I;
          int delta_start = prevAs[0].state.range_start - currAs[prevAs[0].next].state.range_start;
          int delta_end = prevAs[0].state.range_end - currAs[prevAs[0].next].state.range_end;
          for(int a = 1; a < prevAs.size(); a++) {
            rectangle new_rect = prevAs[a];
            new_rect.simple_extend(extension, graph, delta_start, delta_end);
            new_rect.prev = a;
            currAs.push_back(new_rect);
            prevAs[a].next = currAs.size()-1;
          }
        } else {
          vector<int> previously_big;
          int big_cutoff = 400;
          for(int i = 1; i < prevAs.size(); i++) {
            if(prevAs[i].I >= big_cutoff) {
              previously_big.push_back(i);
            }
          }
          // cerr << "made big list, it's size " << previously_big.size() << endl;
          vector<rectangle> big_rectangles;
          vector<int> big_deltas;
          vector<int> big_Js;
          for(int i = 0; i < previously_big.size(); i++) {
            big_rectangles.push_back(prevAs[previously_big[i]]);
            int Jbig = big_rectangles.back().get_next_J(extension,graph, edges_in, edges_out);
            // cerr << Jbig << "\t" << flush;
            big_Js.push_back(Jbig);
            big_deltas.push_back(prevAs[previously_big[i]].J - Jbig);
            if(Jbig == 0) {
              break;
            }
          }
          // cerr << endl;
          // cerr << "collected attributes of big rectangles, " << big_Js.size() << " are nonempty" << endl;
          if(big_Js.size() > 0) {
            int Aabove = 0;
            int Jabove = currAs[prevAs[0].next].J;
            int dJabove = prevAs[0].J - currAs[prevAs[0].next].J;
            for(int i = 0; i < previously_big.size(); i++) {
              if(big_Js[i] == Jabove) {
                // all rectangles between are actually empty
                prevAs[currAs.back().prev].next = -1;
                currAs.pop_back();
              } else {
                binaryI(graph, extension, b, Aabove, previously_big[i], dJabove, big_deltas[i], Jabove, big_Js[i], 0, edges_in, edges_out);
              }
              Aabove = previously_big[i];
              Jabove = big_Js[i];
              dJabove = big_deltas[i];
              if(big_Js[i] == 0) {
                // Don't build smaller rectangles
                // Don't add this rectangle
                break;
              } else {
                big_rectangles[i].prev = previously_big[i];
                currAs.push_back(big_rectangles[i]);
                prevAs[previously_big[i]].next = currAs.size()-1;
              }
            }
            if(big_Js.back() != 0) {
              binaryI(graph, extension, b, previously_big.back(), prevAs.size(), big_deltas.back(), 0, big_Js.back(), 0, 0, edges_in, edges_out);
            }
          } else {
            binaryI(graph, extension, b, 0, prevAs.size(), deltaJ, 0, currAs[prevAs[0].next].J, 0, 0, edges_in, edges_out);
          }
          for(int a = 0; a < currAs.size() - 1; a++) {
            currAs[a].I = currAs[a].J - currAs[a+1].J;
          }
          currAs.back().I = currAs.back().J;
        }
      }
    }
  }
}
Beispiel #5
0
void haplo_d::log_calculate_Is(xg::XG& graph) {
  // Things which were calculated in the constructor:
  // -- A's
  // -- J for the top continuing and any new rectangle
  // -- I for any new rectangle
  // ostream& stream = cout;
  vector<Edge> edges_out;
  vector<Edge> edges_in;
  for(int b = 1; b < cs.size(); b++) {
    // xg::XG::ThreadMapping lastnode;
    // if(cs[b-1].bridge.size() == 0) {
    //   lastnode = cs[b-1].get_node();
    // } else {
    //   lastnode = cs[b-1].bridge.back();
    // }
    edges_out = cs[b-1].get_last_node().is_reverse ? graph.edges_on_start(cs[b-1].get_last_node().node_id) : graph.edges_on_end(cs[b-1].get_last_node().node_id);
    // edges_out = lastnode.is_reverse ? graph.edges_on_start(lastnode.node_id) : graph.edges_on_end(lastnode.node_id);
    edges_in = cs[b].get_node().is_reverse ? graph.edges_on_end(cs[b].get_node().node_id) : graph.edges_on_start(cs[b].get_node().node_id);
    vector<rectangle>& prevAs = cs[b-1].S;
    vector<rectangle>& currAs = cs[b].S;
    bool new_threads = (prevAs[0].next == 1);
    // make sure that there is at least one rectangle here
    if(prevAs.size() == 0) {
      cerr << "[vg haplo error] no consistent haplotypes at node " << cs[b-1].get_node().node_id << endl;
    } else if(prevAs.size() == 1) {
      currAs.back().I = currAs.back().J;
      // currAs has size at most 2
      if(currAs.size() == 2) {
        currAs[0].I = currAs[0].J - currAs[1].J;
      }
    } else if(prevAs.size() >= 2) {
      // We're going to have to extend, so let's grab the next node
      XG::ThreadMapping next_node = cs[b].get_node();
      // Let's also grab the nodes which we'll skip over between this and the last node
      thread_t extension = cs[b-1].bridge;
      extension.push_back(next_node);
      // if J = 0 for a rectangle, then J must be 0 for all older rectangles
      if(currAs.back().J == 0) {
        currAs.pop_back();
      } else {
        int deltaJ = prevAs[0].J - currAs[prevAs[0].next].J;
        if(deltaJ == 0) {
          // cerr << b << ", deltaJ = 0" << endl;
          currAs[prevAs[0].next].I = prevAs[0].I;
          int delta_start = prevAs[0].state.range_start - currAs[prevAs[0].next].state.range_start;
          int delta_end = prevAs[0].state.range_end - currAs[prevAs[0].next].state.range_end;
          for(int a = 1; a < prevAs.size(); a++) {
            rectangle new_rect = prevAs[a];
            new_rect.simple_extend(extension, graph, delta_start, delta_end);
            new_rect.prev = a;
            currAs.push_back(new_rect);
            prevAs[a].next = currAs.size()-1;
          }
        } else {
          // binaryI(XG&, thread_t, b, atop, abott,    dJtop, dJbott, Jtop,               Jbott, indent level)
          binaryI(graph, extension, b, 0, prevAs.size(), deltaJ, 0, currAs[prevAs[0].next].J, 0, 0, edges_in, edges_out);
          for(int a = 0; a < currAs.size() - 1; a++) {
            currAs[a].I = currAs[a].J - currAs[a+1].J;
          }
          currAs.back().I = currAs.back().J;
        }
      }
    }
  }
}
Beispiel #6
0
void rectangle::simple_extend(xg::XG::ThreadMapping next_node, xg::XG& graph, int delta_start = 0, int delta_end = 0) {
  int64_t next_side = graph.id_to_rank(next_node.node_id) * 2 + next_node.is_reverse;
  state.current_side = next_side;
  state.range_start -= delta_start;
  state.range_end -= delta_end;
}
Beispiel #7
0
haplo_d recombine_arms(haplo_d& left, haplo_d& right, int left_cut, int right_join, xg::XG& graph) {
  haplo_d to_return;
  if(!right.has_joining_node(right_join)) {
    return to_return;
  } else {
    vector<rectangle*> boundary = right.trace_strip(right_join, 0, -1);
    rectangle rect = left.cs[left_cut].S[0];
    thread_t extension = left.cs[left_cut].bridge;
    int lastJ = rect.J;
    vector<int> boundaryDeltas;
    vector<int> boundaryJs;
    for(int i = 0; i < boundary.size(); i++) {
      to_return.cs.push_back(right.cs[right_join + i].cs_shell());
      extension.push_back(to_return.cs[i].get_node());
      int new_J = rect.get_next_J(extension,graph);
      boundaryDeltas.push_back(new_J - lastJ);
      if(new_J > 0) {
        boundaryJs.push_back(new_J);
      } else {
        break;
      }
      if(boundary[i]->J - new_J > 0) {
        rectangle joiners;
        if(i > 0) {
          joiners.prev = 0;
          to_return.cs[i-1].S[0].next = 0;
        } else {
          joiners.prev = -1;
        }
        joiners.J = boundary[i]->J;
        joiners.I = boundary[i]->J - new_J;
        to_return.cs[i].S.push_back(joiners);
      }
      if(new_J > 0) {
        rectangle continuing;
        continuing.J = new_J;
        if(i > 0) {
          continuing.prev = to_return.cs[i-1].S.size() - 1;
          to_return.cs[i-1].S.back().next = to_return.cs[i].S.size();
        }
        to_return.cs[i].S.push_back(continuing);
      }
      lastJ = new_J;
      extension = left.cs[left_cut].bridge;
    }
    for(int i = 0; i < left.cs[left_cut].S.size(); i++) {
      //build first column
    }
    vector<Edge> edges_out;
    vector<Edge> edges_in;
    for(int i = 1; i < boundaryJs.size(); i++) {
      edges_out = to_return.cs[i-1].get_last_node().is_reverse ? graph.edges_on_start(to_return.cs[i-1].get_last_node().node_id) : graph.edges_on_end(to_return.cs[i-1].get_last_node().node_id);
      edges_in = to_return.cs[i].get_node().is_reverse ? graph.edges_on_end(to_return.cs[i].get_node().node_id) : graph.edges_on_start(to_return.cs[i].get_node().node_id);
      to_return.binaryI(graph, extension, i, to_return.cs[i].S.back().prev, to_return.cs[i-1].S.size(), boundaryDeltas[i], 0, boundaryJs[i], 0, 0, edges_in, edges_out);
      for(int a = 0; a < to_return.cs[i].S.size() - 1; a++) {
        to_return.cs[i].S[a].I = to_return.cs[i].S[a].J - to_return.cs[i].S[a+1].J;
      }
      to_return.cs[i].S.back().I = to_return.cs[i].S.back().J;
    }
  }
}
Beispiel #8
0
//TODO: translate this
void haplo_d::initialize_skeleton(thread_t& t, pair<int,int> interval, cross_section& prevAs, xg::XG& graph) {
  rectangle rect;
  int new_height;
  int last_height = prevAs.S[0].J;
  bool add_rectangle;
  bool add_A;
  //TODO: fix this
  int width = 0;
  for(int i = interval.first; i <= interval.second; i++) {
    // Count the number of base pairs since the last entry or exit node
    width += graph.node_length(t[i-1].node_id);
    new_height = graph.node_height(t[i]);
    if(cs.back().S.size() != 0) {
      if(i == interval.first) {
        prevAs.S[0];
      } else {
        rect = cs.back().S[0];
      }
      rect.J = rect.get_next_J(t[i],graph); // step this strip forward
      // Did any threads leave?
      if(last_height > rect.J) {
        add_A = 1;
      }
      // Are there any threads here which didn't come from the previous node?
      if(rect.J < new_height) {
        add_rectangle = 1;
        add_A = 1;
      }
      // This is an entry or exit node, add a cross-section to the vector of
      // cross-sections (which corresponds to the "A" set in the theory doc)
      if(add_A) {
        cs.back().width = width;
        width = 0;
        cs.push_back(cross_section(new_height,i,t[i]));
      } else {
        // This isn't a node where anything leaves or joins, let's skip over it
        cs.back().bridge.push_back(t[i]);
        for (size_t a = 0; a < cs.back().S.size(); a++) {
          cs.back().S[a].extend(t[i],graph);
        }
      }
      // This is an entry node; we also need a new rectangle corresponding to the
      // new strip. We need to do this *before* we populate since cross_sections
      // arrange rectangles newest -> oldest
      // NB that add_rectangle implies add_A
      if(add_rectangle) {
        rectangle new_rect;
        new_rect.extend(t[i],graph);
        new_rect.J = new_height;
        cs.back().height = new_rect.J;
        cs.back().S.push_back(new_rect);
        cs.back().S.back().I = new_rect.J - rect.J;
      }
      if(add_A) {
        int b = cs.size()-1;
        if(rect.J > 0) {
          cs[b].S.push_back(rect);
          cs[b].S.back().prev = 0;
          cs[b-1].S[0].next = cs[b].S.size()-1;
        }
      }
      last_height = new_height;
      add_A = 0;
      add_rectangle = 0;
    } else {
      cs.back().width = width;
      width = 0;
      cs.push_back(cross_section(new_height,i,t[i]));
      if(new_height > 0) {
        rectangle new_rect;
        new_rect.extend(t[i],graph);
        new_rect.J = new_height;
        cs.back().height = new_rect.J;
        cs.back().S.push_back(new_rect);
        cs.back().S.back().I = new_rect.J - rect.J;
      }
    }
  }
  if(cs.size() == 1) {
    cs.back().width = width;
  }
  cs.back().width += graph.node_length(t.back().node_id) - 1;
  for(int i = 0; i < cs.size(); i++) {
    tot_width += cs[i].width;
  }
}
Beispiel #9
0
PathIndex::PathIndex(const Path& path, const xg::XG& index) {
    // Trace the given path in the given XG graph, collecting sequence
    
    // We're going to build the sequence string
    std::stringstream seq_stream;
    
    // What base are we at in the path?
    size_t path_base = 0;
    
    // What was the last rank? Ranks must always go up.
    int64_t last_rank = -1;
    
    for (size_t i = 0; i < path.mapping_size(); i++) {
        auto& mapping = path.mapping(i);
    
        if (!by_id.count(mapping.position().node_id())) {
            // This is the first time we have visited this node in the path.
            
            // Add in a mapping.
            by_id[mapping.position().node_id()] = 
                std::make_pair(path_base, mapping.position().is_reverse());
#ifdef debug
            #pragma omp critical (cerr)
            std::cerr << "Node " << mapping.position().node_id() << " rank " << mapping.rank()
                << " starts at base " << path_base << " with "
                << index.node_sequence(mapping.position().node_id()) << std::endl;
#endif
            
            // Make sure ranks are monotonically increasing along the path, or
            // unset.
            assert(mapping.rank() > last_rank || (mapping.rank() == 0 && last_rank == 0));
            last_rank = mapping.rank();
        }
        
        // Say that this node appears here along the reference in this
        // orientation.
        by_start[path_base] = NodeSide(mapping.position().node_id(), mapping.position().is_reverse());
    
        // Remember that occurrence by node ID.
        node_occurrences[mapping.position().node_id()].push_back(by_start.find(path_base));
    
        // Find the node's sequence
        std::string node_sequence = index.node_sequence(mapping.position().node_id());
    
        while(path_base == 0 && node_sequence.size() > 0 &&
            (node_sequence[0] != 'A' && node_sequence[0] != 'T' && node_sequence[0] != 'C' &&
            node_sequence[0] != 'G' && node_sequence[0] != 'N')) {
            
            // If the path leads with invalid characters (like "X"), throw them
            // out when computing path positions.
            
            // TODO: this is a hack to deal with the debruijn-brca1-k63 graph,
            // which leads with an X.
            #pragma omp critical (cerr)
            std::cerr << "Warning: dropping invalid leading character "
                << node_sequence[0] << " from node " << mapping.position().node_id()
                << std::endl;
                
            node_sequence.erase(node_sequence.begin());
        }
        
        if (mapping.position().is_reverse()) {
            // Put the reverse sequence in the path
            seq_stream << reverse_complement(node_sequence);
        } else {
            // Put the forward sequence in the path
            seq_stream << node_sequence;
        }
        
        // Whether we found the right place for this node in the reference or
        // not, we still need to advance along the reference path. We assume the
        // whole node (except any leading bogus characters) is included in the
        // path (since it sort of has to be, syntactically, unless it's the
        // first or last node).
        path_base += node_sequence.size();
        
        // TODO: handle leading bogus characters in calls on the first node.
    }
    
    // Record the length of the last mapping's node, since there's no next mapping to work it out from
    last_node_length = path.mapping_size() > 0 ?
        index.node_length(path.mapping(path.mapping_size() - 1).position().node_id()) :
        0;
    
    // Create the actual reference sequence we will use
    sequence = seq_stream.str();
    
#ifdef debug
    // Announce progress.
    #pragma omp critical (cerr)
    std::cerr << "Traced " << path_base << " bp path." << std::endl;
    
    if (sequence.size() < 100) {
        #pragma omp critical (cerr)
        std::cerr << "Sequence: " << sequence << std::endl;
    }
#endif

}