示例#1
0
// diagnose
//   SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS.....................................................
//   ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX......................
//   ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII......................
//   .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ......................
//   LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL....................................................
//   !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
//   |                         |    |        |                              |                     |
//  33                        59   64       73                            104                   126 <- maxValue is value from here
// S 0........................26...31.......40
// X                          -5....0........9.............................40
// I                                0........9.............................40
// J                                   3.....9.............................40
// L 0.2......................26...31........41
//
//  S - Sanger        Phred+33,  raw reads typically (0, 40)
//  X - Solexa        Solexa+64, raw reads typically (-5, 40)
//  I - Illumina 1.3+ Phred+64,  raw reads typically (0, 40)
//  J - Illumina 1.5+ Phred+64,  raw reads typically (3, 40) with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold)
//  L - Illumina 1.8+ Phred+33,  raw reads typically (0, 41)
DNAQualityType FastqQualityTrimTask::detectQualityType(){
    int maxValue = 33;
    int minValue = 126;
    FASTQIterator iter_qual(settings.inputUrl, stateInfo);
    CHECK(!stateInfo.hasError(), DNAQualityType_Sanger);

    int counter = 0;
    while (iter_qual.hasNext()) {
        CHECK(!stateInfo.isCoR(), DNAQualityType_Sanger);

        if (counter > 1000) {   // check only first 1000 reads in file
            break;
        }

        DNASequence dna = iter_qual.next();
        int seqLen = dna.length();
        if (seqLen > dna.quality.qualCodes.length()) {
            continue;
        } else {
            for (int pos = 0; pos <= seqLen - 1; pos++) {
                maxValue = qMax(static_cast<int>(dna.quality.qualCodes.at(pos)), maxValue);
                minValue = qMin(static_cast<int>(dna.quality.qualCodes.at(pos)), minValue);
            }
        }
        counter++;
    }
    return DNAQuality::detectTypeByMinMaxQualityValues(minValue, maxValue);
}
void QualityTrimTask::runStep(){
    int ncount = 0;
    int ycount = 0;

    QScopedPointer<IOAdapter> io  (IOAdapterUtils::open(settings.outDir + settings.outName, stateInfo, IOAdapterMode_Append));

    int quality = settings.customParameters.value(QUALITY_ID, 20).toInt();
    int minLen = settings.customParameters.value(LEN_ID, 0).toInt();
    bool bothEnds = settings.customParameters.value(BOTH_ID, false).toInt();

    FASTQIterator iter(settings.inputUrl);
    while(iter.hasNext()){
        if(stateInfo.isCoR()){
            return;
        }
        DNASequence dna = iter.next();
        QString comment = DNAInfo::getFastqComment(dna.info);
        int seqLen = dna.length();
        if(seqLen > dna.quality.qualCodes.length()){
            ncount++;
            continue;
        }else{
            int endPosition = seqLen-1;
            for (; endPosition>=0; endPosition--){
                if(dna.quality.getValue(endPosition) >= quality){
                    break;
                }
            }
            int beginPosition = 0;
            if (bothEnds) {
                for (; beginPosition<=endPosition; beginPosition++) {
                    if (dna.quality.getValue(beginPosition) >= quality) {
                        break;
                    }
                }
            }
            if(endPosition>=beginPosition && endPosition-beginPosition+1 >= minLen){
                DNASequence trimmed(dna.getName(), dna.seq.left(endPosition+1).mid(beginPosition), dna.alphabet);
                trimmed.quality = dna.quality;
                trimmed.quality.qualCodes = trimmed.quality.qualCodes.left(endPosition+1).mid(beginPosition);
                FastqFormat::writeEntry(trimmed.getName(), trimmed, io.data(), "Writing error", stateInfo, false);
                ycount++;
            }else{
                ncount++;
                continue;
            }
        }
    }

    algoLog.info(QString("Discarded by trimmer %1").arg(ncount));
    algoLog.info(QString("Accepted by trimmer %1").arg(ycount));
    algoLog.info(QString("Total by trimmer %1").arg(ncount + ycount));
}
示例#3
0
GeneByGeneCompareResult GeneByGeneComparator::compareGeneAnnotation(const DNASequence& seq, const QList<SharedAnnotationData> &annData,
    const QString& annName, float identity)
{
    GeneByGeneCompareResult result;

    float maxIdentity = -1.0F;
    foreach (const SharedAnnotationData &adata, annData) {
        if (adata->name == annName) {
            U2Location location = adata->location;
            if (location->isSingleRegion()) {
                int reglen = location->regions.first().length;
                float lenRatio  = reglen * 100 /static_cast<float>(seq.length());
                maxIdentity = qMax(maxIdentity, lenRatio);
                if(lenRatio >= identity){ //check length ratio
                    QString ident = adata->findFirstQualifierValue(BLAST_IDENT);
                    if (!ident.isEmpty()){
                        //create BLAST string  YES/identity/gaps
                        float blastIdent = parseBlastQual(ident);
                        if (blastIdent != -1.0f && blastIdent >= identity){
                            result.identical = true;
                            result.identityString = GeneByGeneCompareResult::IDENTICAL_YES;
                            result.identityString.append(QString("\\%1").arg(blastIdent));
                            QString gaps = adata->findFirstQualifierValue(BLAST_GAPS);
                            if (!gaps.isEmpty()){
                                float blastGaps = parseBlastQual(gaps);
                                if (blastGaps!=1.0f){
                                    result.identityString.append(QString("\\%1").arg(blastGaps));
                                }
                            }else{
                                result.identityString.append(QString("\\0"));
                            }
                        }
                    }else{ //not a blast annotation
                        result.identical = true;
                        result.identityString = GeneByGeneCompareResult::IDENTICAL_YES;
                    }
                }
            }
            break;
        }
    }

    if (result.identical == false && maxIdentity != -1.0f){
        result.identityString.append(QString("\\%1").arg(maxIdentity));
    }

    return result;
}