// Construct the extension string corresponding to the path std::string SGWalk::getString(SGWalkType type) const { std::string out; if(type == SGWT_START_TO_END || type == SGWT_INTERNAL) { out.append(m_pStartVertex->getSeq().toString()); } // Determine if the string should go to the end of the last vertex // in the path size_t stop = m_edges.size(); // The first edge is always in correct frame of reference // so the comp is EC_SAME. This variable tracks where the // string that is being added is different from the starting sequence // and needs to be flipped EdgeComp currComp = EC_SAME; // If the walk direction is antisense, we reverse every component and then // reverse the entire string to generate the final string bool reverseAll = !m_edges.empty() && m_edges[0]->getDir() == ED_ANTISENSE; if(reverseAll) out = reverse(out); for(size_t i = 0; i < stop; ++i) { Edge* pYZ = m_edges[i]; // Append in the extension string std::string edge_str = pYZ->getLabel(); assert(edge_str.size() != 0); if(currComp == EC_REVERSE) edge_str = reverseComplement(edge_str); if(reverseAll) edge_str = reverse(edge_str); // Calculate the next comp, between X and Z EdgeComp ecYZ = pYZ->getComp(); EdgeComp ecXZ; if(ecYZ == EC_SAME) ecXZ = currComp; else ecXZ = !currComp; out.append(edge_str); currComp = ecXZ; } // If we want the internal portion of the string (which does not contain the endpoints // perform the truncation now. This needs to be done before the reversal. if(type == SGWT_INTERNAL) { Edge* pFirstEdge = getFirstEdge(); Edge* pLastEdge = getLastEdge(); if(pFirstEdge == NULL || pLastEdge == NULL) { out.clear(); } else { Vertex* pStart = m_pStartVertex; Vertex* pLast = getLastVertex(); int start = pStart->getSeqLen() - pFirstEdge->getMatchLength(); int end = out.size() - (pLast->getSeqLen() - pLastEdge->getMatchLength()); if(end <= start) out.clear(); else { std::string ss = out.substr(start, end - start); out = ss; } } } if(out.empty()) std::cout << "No output for walk: " << pathSignature() << "\n"; if(reverseAll) out = reverse(out); // truncate return out; }
// Construct the extension string corresponding to the path std::string SGWalk::getString(SGWalkType type, SGWalkVertexPlacementVector* pPlacementVector) const { std::string out; // Append the full length of the starting vertex to the walk if(type == SGWT_START_TO_END || type == SGWT_INTERNAL) { out.append(m_pStartVertex->getSeq().toString()); // Add the first record to the placement vector if required if(pPlacementVector != NULL) { SGWalkVertexPlacement firstPlace; firstPlace.pVertex = m_pStartVertex; firstPlace.position = 0; // 0-based coordinates firstPlace.isRC = false; pPlacementVector->push_back(firstPlace); } } // Determine if the string should go to the end of the last vertex // in the path size_t stop = m_edges.size(); // The first edge is always in correct frame of reference // so the comp is EC_SAME. This variable tracks where the // string that is being added is different from the starting sequence // and needs to be flipped EdgeComp currComp = EC_SAME; // If the walk direction is antisense, we reverse every component and then // reverse the entire string to generate the final string bool reverseAll = !m_edges.empty() && m_edges[0]->getDir() == ED_ANTISENSE; if(reverseAll) out = reverse(out); for(size_t i = 0; i < stop; ++i) { Edge* pYZ = m_edges[i]; // Append in the extension string std::string edge_str = pYZ->getLabel(); assert(edge_str.size() != 0); // Determine whether this node is reverse complement wrt the string // we are building if(currComp == EC_REVERSE) edge_str = reverseComplement(edge_str); if(reverseAll) edge_str = reverse(edge_str); // Calculate the next comp, between X and Z EdgeComp ecYZ = pYZ->getComp(); EdgeComp ecXZ; if(ecYZ == EC_SAME) ecXZ = currComp; else ecXZ = !currComp; out.append(edge_str); // Add this record into the placement vector if(pPlacementVector != NULL) { SGWalkVertexPlacement placement; placement.pVertex = pYZ->getEnd(); placement.isRC = ecXZ == EC_REVERSE; placement.position = out.size() - pYZ->getEnd()->getSeqLen(); pPlacementVector->push_back(placement); } currComp = ecXZ; } // If we want the internal portion of the string (which does not contain the endpoints // perform the truncation now. This needs to be done before the reversal. if(type == SGWT_INTERNAL) { if(pPlacementVector != NULL) { std::cerr << "Error: Vertex placement not supported for SGWT_INTERNAL walk types\n"; exit(EXIT_FAILURE); } Edge* pFirstEdge = getFirstEdge(); Edge* pLastEdge = getLastEdge(); if(pFirstEdge == NULL || pLastEdge == NULL) { out.clear(); } else { Vertex* pStart = m_pStartVertex; Vertex* pLast = getLastVertex(); int start = pStart->getSeqLen() - pFirstEdge->getMatchLength(); int end = out.size() - (pLast->getSeqLen() - pLastEdge->getMatchLength()); if(end <= start) out.clear(); else { std::string ss = out.substr(start, end - start); out = ss; } } } if(out.empty()) std::cout << "No output for walk: " << pathSignature() << "\n"; if(reverseAll) { out = reverse(out); // Reverse the placement vector too, including flipping the alignment coordinates if(pPlacementVector != NULL) { std::reverse(pPlacementVector->begin(), pPlacementVector->end()); for(size_t i = 0; i < pPlacementVector->size(); ++i) { SGWalkVertexPlacement& item = pPlacementVector->at(i); item.position = out.size() - item.position - item.pVertex->getSeqLen(); } } } return out; }