// Simulate single-end sequencing from a fragment. void SequencingSimulator::simulateSingleEnd(TRead & seq, TQualities & quals, SequencingSimulationInfo & info, TFragment const & frag, MethylationLevels const * levels) { bool isForward; if (seqOptions->strands == SequencingOptions::BOTH) isForward = (pickRandomNumber(rng, seqan::Pdf<seqan::Uniform<int> >(0, 1)) == 1); else isForward = (seqOptions->strands == SequencingOptions::FORWARD); if (!seqOptions->bsSeqOptions.bsSimEnabled) { _simulateSingleEnd(seq, quals, info, frag, isForward); } else { SEQAN_ASSERT(levels); bool bsForward = isForward; // Re-pick strandedness of the BS-treated fragment. if (seqOptions->bsSeqOptions.bsProtocol != BSSeqOptions::DIRECTIONAL) bsForward = (pickRandomNumber(methRng, seqan::Pdf<seqan::Uniform<int> >(0, 1)) == 1); _simulateBSTreatment(methFrag, frag, *levels, !bsForward); _simulateSingleEnd(seq, quals, info, infix(methFrag, 0, length(methFrag)), isForward); } }
std::pair<int, int> PositionMap::toSmallVarInterval(int svBeginPos, int svEndPos) const { SEQAN_ASSERT(!overlapsWithBreakpoint(svBeginPos, svEndPos)); GenomicInterval gi = getGenomicInterval(svBeginPos); if (gi.kind == GenomicInterval::INSERTED) { // novel sequence, cannot be projected return std::make_pair(-1, -1); } if (gi.kind != GenomicInterval::INVERTED) { // forward return std::make_pair(gi.smallVarBeginPos + (svBeginPos - gi.svBeginPos), gi.smallVarBeginPos + (svEndPos - gi.svBeginPos)); } else { // reverse return std::make_pair(gi.smallVarBeginPos + (gi.svEndPos - svBeginPos), gi.smallVarBeginPos + (gi.svEndPos - svEndPos)); } return std::make_pair(-1, -1); // cannot reach here }
void VcfMaterializer::_appendToVariants(Variants & variants, seqan::VcfRecord const & vcfRecord) { // Compute maximal length of alternative. unsigned altLength = 0; seqan::StringSet<seqan::CharString> alts; strSplit(alts, vcfRecord.alt, seqan::EqualsChar<','>()); for (unsigned i = 0; i < length(alts); ++i) altLength = std::max(altLength, (unsigned)length(alts[i])); if (contains(vcfRecord.info, "SVTYPE")) // Structural Variant { StructuralVariantRecord svRecord; svRecord.rId = vcfRecord.rID; svRecord.pos = vcfRecord.beginPos + 1; // given with shift of -1 svRecord.haplotype = 0; SEQAN_ASSERT_EQ(length(alts), 1u); if (contains(vcfRecord.info, "SVTYPE=INS")) // Insertion { svRecord.kind = StructuralVariantRecord::INDEL; svRecord.size = getSVLen(vcfRecord.info); svRecord.seq = suffix(vcfRecord.alt, 1); } else if (contains(vcfRecord.info, "SVTYPE=DEL")) // Deletion { svRecord.kind = StructuralVariantRecord::INDEL; svRecord.size = getSVLen(vcfRecord.info); } else if (contains(vcfRecord.info, "SVTYPE=INV")) // Inversion { svRecord.kind = StructuralVariantRecord::INVERSION; svRecord.size = getSVLen(vcfRecord.info); } else if (contains(vcfRecord.info, "SVTYPE=DUP")) // Duplication { svRecord.kind = StructuralVariantRecord::DUPLICATION; svRecord.size = getSVLen(vcfRecord.info); std::pair<seqan::CharString, int> pos = getTargetPos(vcfRecord.info); unsigned idx = 0; if (!getIdByName(idx, contigNamesCache(context(vcfFileIn)), pos.first)) SEQAN_FAIL("Unknown sequence %s", toCString(pos.first)); svRecord.targetRId = idx; svRecord.targetPos = pos.second - 1; } else if (contains(vcfRecord.info, "SVTYPE=BND")) // Breakend (Must be Translocation) { SEQAN_FAIL("Unexpected 'SVTYPE=BND' at this place!"); } else { SEQAN_FAIL("ERROR: Unknown SVTYPE!\n"); } // Split the target variants. SEQAN_ASSERT_NOT(empty(vcfRecord.genotypeInfos)); seqan::DirectionIterator<seqan::CharString const, seqan::Input>::Type inputIter = directionIterator(vcfRecord.genotypeInfos[0], seqan::Input()); seqan::CharString buffer; svRecord.haplotype = 0; for (; !atEnd(inputIter); ++inputIter) if ((*inputIter == '|' || *inputIter == '/')) { if (!empty(buffer)) { unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), 1u); if (idx != 0u) // if not == ref appendValue(variants.svRecords, svRecord); } svRecord.haplotype++; clear(buffer); } else { appendValue(buffer, *inputIter); } if (!empty(buffer)) { unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), 1u); if (idx != 0u) // if not == ref appendValue(variants.svRecords, svRecord); } } else if (length(vcfRecord.ref) == 1u && altLength == 1u) // SNP { SnpRecord snpRecord; snpRecord.rId = vcfRecord.rID; snpRecord.pos = vcfRecord.beginPos; // Split the alternatives. seqan::StringSet<seqan::CharString> alternatives; strSplit(alternatives, vcfRecord.alt, seqan::EqualsChar<','>()); // Split the target variants. SEQAN_ASSERT_NOT(empty(vcfRecord.genotypeInfos)); seqan::DirectionIterator<seqan::CharString const, seqan::Input>::Type inputIter = directionIterator(vcfRecord.genotypeInfos[0], seqan::Input()); seqan::CharString buffer; snpRecord.haplotype = 0; for (; !atEnd(inputIter); ++inputIter) if ((*inputIter == '|' || *inputIter == '/')) { if (!empty(buffer)) { unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), (unsigned)length(alternatives)); if (idx != 0u) // if not == ref { SEQAN_ASSERT_NOT(empty(alternatives[idx - 1])); snpRecord.to = alternatives[idx - 1][0]; appendValue(variants.snps, snpRecord); } } snpRecord.haplotype++; clear(buffer); } else { appendValue(buffer, *inputIter); } if (!empty(buffer)) { unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), (unsigned)length(alternatives)); if (idx != 0u) // if not == ref { SEQAN_ASSERT_NOT(empty(alternatives[idx - 1])); snpRecord.to = alternatives[idx - 1][0]; appendValue(variants.snps, snpRecord); } } } else // Small Indel { SmallIndelRecord smallIndel; smallIndel.rId = vcfRecord.rID; smallIndel.pos = vcfRecord.beginPos + 1; SEQAN_ASSERT_NOT(contains(vcfRecord.alt, ",")); // only one alternative SEQAN_ASSERT((length(vcfRecord.alt) == 1u) != (length(vcfRecord.ref) == 1u)); // XOR smallIndel.haplotype = 0; if (length(vcfRecord.ref) == 1u) // insertion { smallIndel.seq = suffix(vcfRecord.alt, 1); smallIndel.size = length(smallIndel.seq); } else // deletion { smallIndel.size = -(int)(length(vcfRecord.ref) - 1); } // Split the target variants. SEQAN_ASSERT_NOT(empty(vcfRecord.genotypeInfos)); seqan::DirectionIterator<seqan::CharString const, seqan::Input>::Type inputIter = directionIterator(vcfRecord.genotypeInfos[0], seqan::Input()); seqan::CharString buffer; smallIndel.haplotype = 0; for (; !atEnd(inputIter); ++inputIter) if ((*inputIter == '|' || *inputIter == '/')) { if (!empty(buffer)) { unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), 1u); if (idx != 0u) // if not == ref appendValue(variants.smallIndels, smallIndel); } smallIndel.haplotype++; clear(buffer); } else { appendValue(buffer, *inputIter); } if (!empty(buffer)) { unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), 1u); if (idx != 0u) // if not == ref appendValue(variants.smallIndels, smallIndel); } } }