Ejemplo n.º 1
0
// Simulate single-end sequencing from a fragment.
void SequencingSimulator::simulateSingleEnd(TRead & seq, TQualities & quals, SequencingSimulationInfo & info,
                                            TFragment const & frag,
                                            MethylationLevels const * levels)
{
    bool isForward;
    if (seqOptions->strands == SequencingOptions::BOTH)
        isForward = (pickRandomNumber(rng, seqan::Pdf<seqan::Uniform<int> >(0, 1)) == 1);
    else
        isForward = (seqOptions->strands == SequencingOptions::FORWARD);

    if (!seqOptions->bsSeqOptions.bsSimEnabled)
    {
        _simulateSingleEnd(seq, quals, info, frag, isForward);
    }
    else
    {
        SEQAN_ASSERT(levels);
        bool bsForward = isForward;
        // Re-pick strandedness of the BS-treated fragment.
        if (seqOptions->bsSeqOptions.bsProtocol != BSSeqOptions::DIRECTIONAL)
            bsForward = (pickRandomNumber(methRng, seqan::Pdf<seqan::Uniform<int> >(0, 1)) == 1);
        _simulateBSTreatment(methFrag, frag, *levels, !bsForward);
        _simulateSingleEnd(seq, quals, info, infix(methFrag, 0, length(methFrag)), isForward);
    }
}
Ejemplo n.º 2
0
std::pair<int, int> PositionMap::toSmallVarInterval(int svBeginPos, int svEndPos) const
{
    SEQAN_ASSERT(!overlapsWithBreakpoint(svBeginPos, svEndPos));
    GenomicInterval gi = getGenomicInterval(svBeginPos);
    if (gi.kind == GenomicInterval::INSERTED)
    {
        // novel sequence, cannot be projected
        return std::make_pair(-1, -1);
    }
    if (gi.kind != GenomicInterval::INVERTED)
    {
        // forward
        return std::make_pair(gi.smallVarBeginPos + (svBeginPos - gi.svBeginPos),
                              gi.smallVarBeginPos + (svEndPos - gi.svBeginPos));
    }
    else
    {
        // reverse
        return std::make_pair(gi.smallVarBeginPos + (gi.svEndPos - svBeginPos),
                              gi.smallVarBeginPos + (gi.svEndPos - svEndPos));
    }

    return std::make_pair(-1, -1);  // cannot reach here
}
Ejemplo n.º 3
0
void VcfMaterializer::_appendToVariants(Variants & variants, seqan::VcfRecord const & vcfRecord)
{
    // Compute maximal length of alternative.
    unsigned altLength = 0;
    seqan::StringSet<seqan::CharString> alts;
    strSplit(alts, vcfRecord.alt, seqan::EqualsChar<','>());
    for (unsigned i = 0; i < length(alts); ++i)
        altLength = std::max(altLength, (unsigned)length(alts[i]));

    if (contains(vcfRecord.info, "SVTYPE"))  // Structural Variant
    {
        StructuralVariantRecord svRecord;
        svRecord.rId = vcfRecord.rID;
        svRecord.pos = vcfRecord.beginPos + 1;  // given with shift of -1
        svRecord.haplotype = 0;

        SEQAN_ASSERT_EQ(length(alts), 1u);

        if (contains(vcfRecord.info, "SVTYPE=INS"))  // Insertion
        {
            svRecord.kind = StructuralVariantRecord::INDEL;
            svRecord.size = getSVLen(vcfRecord.info);
            svRecord.seq = suffix(vcfRecord.alt, 1);
        }
        else if (contains(vcfRecord.info, "SVTYPE=DEL"))  // Deletion
        {
            svRecord.kind = StructuralVariantRecord::INDEL;
            svRecord.size = getSVLen(vcfRecord.info);
        }
        else if (contains(vcfRecord.info, "SVTYPE=INV"))  // Inversion
        {
            svRecord.kind = StructuralVariantRecord::INVERSION;
            svRecord.size = getSVLen(vcfRecord.info);
        }
        else if (contains(vcfRecord.info, "SVTYPE=DUP"))  // Duplication
        {
            svRecord.kind = StructuralVariantRecord::DUPLICATION;
            svRecord.size = getSVLen(vcfRecord.info);
            std::pair<seqan::CharString, int> pos = getTargetPos(vcfRecord.info);
            unsigned idx = 0;
            if (!getIdByName(idx, contigNamesCache(context(vcfFileIn)), pos.first))
                SEQAN_FAIL("Unknown sequence %s", toCString(pos.first));
            svRecord.targetRId = idx;
            svRecord.targetPos = pos.second - 1;
        }
        else if (contains(vcfRecord.info, "SVTYPE=BND"))  // Breakend (Must be Translocation)
        {
            SEQAN_FAIL("Unexpected 'SVTYPE=BND' at this place!");
        }
        else
        {
            SEQAN_FAIL("ERROR: Unknown SVTYPE!\n");
        }

        // Split the target variants.
        SEQAN_ASSERT_NOT(empty(vcfRecord.genotypeInfos));
        seqan::DirectionIterator<seqan::CharString const, seqan::Input>::Type inputIter =
                directionIterator(vcfRecord.genotypeInfos[0], seqan::Input());
        seqan::CharString buffer;
        svRecord.haplotype = 0;
        for (; !atEnd(inputIter); ++inputIter)
            if ((*inputIter == '|' || *inputIter == '/'))
            {
                if (!empty(buffer))
                {
                    unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), 1u);
                    if (idx != 0u)  // if not == ref
                        appendValue(variants.svRecords, svRecord);
                }
                svRecord.haplotype++;
                clear(buffer);
            }
            else
            {
                appendValue(buffer, *inputIter);
            }
        if (!empty(buffer))
        {
            unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), 1u);
            if (idx != 0u)  // if not == ref
                appendValue(variants.svRecords, svRecord);
        }
    }
    else if (length(vcfRecord.ref) == 1u && altLength == 1u)  // SNP
    {
        SnpRecord snpRecord;
        snpRecord.rId = vcfRecord.rID;
        snpRecord.pos = vcfRecord.beginPos;

        // Split the alternatives.
        seqan::StringSet<seqan::CharString> alternatives;
        strSplit(alternatives, vcfRecord.alt, seqan::EqualsChar<','>());

        // Split the target variants.
        SEQAN_ASSERT_NOT(empty(vcfRecord.genotypeInfos));
        seqan::DirectionIterator<seqan::CharString const, seqan::Input>::Type inputIter =
                directionIterator(vcfRecord.genotypeInfos[0], seqan::Input());
        seqan::CharString buffer;
        snpRecord.haplotype = 0;
        for (; !atEnd(inputIter); ++inputIter)
            if ((*inputIter == '|' || *inputIter == '/'))
            {
                if (!empty(buffer))
                {
                    unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer),
                                            (unsigned)length(alternatives));
                    if (idx != 0u)  // if not == ref
                    {
                        SEQAN_ASSERT_NOT(empty(alternatives[idx - 1]));
                        snpRecord.to = alternatives[idx - 1][0];
                        appendValue(variants.snps, snpRecord);
                    }
                }
                snpRecord.haplotype++;
                clear(buffer);
            }
            else
            {
                appendValue(buffer, *inputIter);
            }
        if (!empty(buffer))
        {
            unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer),
                                    (unsigned)length(alternatives));
            if (idx != 0u)  // if not == ref
            {
                SEQAN_ASSERT_NOT(empty(alternatives[idx - 1]));
                snpRecord.to = alternatives[idx - 1][0];
                appendValue(variants.snps, snpRecord);
            }
        }
    }
    else  // Small Indel
    {
        SmallIndelRecord smallIndel;
        smallIndel.rId = vcfRecord.rID;
        smallIndel.pos = vcfRecord.beginPos + 1;

        SEQAN_ASSERT_NOT(contains(vcfRecord.alt, ","));  // only one alternative
        SEQAN_ASSERT((length(vcfRecord.alt) == 1u) != (length(vcfRecord.ref) == 1u));  // XOR

        smallIndel.haplotype = 0;
        if (length(vcfRecord.ref) == 1u)  // insertion
        {
            smallIndel.seq = suffix(vcfRecord.alt, 1);
            smallIndel.size = length(smallIndel.seq);
        }
        else  // deletion
        {
            smallIndel.size = -(int)(length(vcfRecord.ref) - 1);
        }

        // Split the target variants.
        SEQAN_ASSERT_NOT(empty(vcfRecord.genotypeInfos));
        seqan::DirectionIterator<seqan::CharString const, seqan::Input>::Type inputIter =
                directionIterator(vcfRecord.genotypeInfos[0], seqan::Input());
        seqan::CharString buffer;
        smallIndel.haplotype = 0;
        for (; !atEnd(inputIter); ++inputIter)
            if ((*inputIter == '|' || *inputIter == '/'))
            {
                if (!empty(buffer))
                {
                    unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), 1u);
                    if (idx != 0u)  // if not == ref
                        appendValue(variants.smallIndels, smallIndel);
                }
                smallIndel.haplotype++;
                clear(buffer);
            }
            else
            {
                appendValue(buffer, *inputIter);
            }
        if (!empty(buffer))
        {
            unsigned idx = std::min(seqan::lexicalCast<unsigned>(buffer), 1u);
            if (idx != 0u)  // if not == ref
                appendValue(variants.smallIndels, smallIndel);
        }
    }
}