// diagnose // SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS..................................................... // ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX...................... // ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII...................... // .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ...................... // LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL.................................................... // !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ // | | | | | | // 33 59 64 73 104 126 <- maxValue is value from here // S 0........................26...31.......40 // X -5....0........9.............................40 // I 0........9.............................40 // J 3.....9.............................40 // L 0.2......................26...31........41 // // S - Sanger Phred+33, raw reads typically (0, 40) // X - Solexa Solexa+64, raw reads typically (-5, 40) // I - Illumina 1.3+ Phred+64, raw reads typically (0, 40) // J - Illumina 1.5+ Phred+64, raw reads typically (3, 40) with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold) // L - Illumina 1.8+ Phred+33, raw reads typically (0, 41) DNAQualityType FastqQualityTrimTask::detectQualityType(){ int maxValue = 33; int minValue = 126; FASTQIterator iter_qual(settings.inputUrl, stateInfo); CHECK(!stateInfo.hasError(), DNAQualityType_Sanger); int counter = 0; while (iter_qual.hasNext()) { CHECK(!stateInfo.isCoR(), DNAQualityType_Sanger); if (counter > 1000) { // check only first 1000 reads in file break; } DNASequence dna = iter_qual.next(); int seqLen = dna.length(); if (seqLen > dna.quality.qualCodes.length()) { continue; } else { for (int pos = 0; pos <= seqLen - 1; pos++) { maxValue = qMax(static_cast<int>(dna.quality.qualCodes.at(pos)), maxValue); minValue = qMin(static_cast<int>(dna.quality.qualCodes.at(pos)), minValue); } } counter++; } return DNAQuality::detectTypeByMinMaxQualityValues(minValue, maxValue); }
void QualityTrimTask::runStep(){ int ncount = 0; int ycount = 0; QScopedPointer<IOAdapter> io (IOAdapterUtils::open(settings.outDir + settings.outName, stateInfo, IOAdapterMode_Append)); int quality = settings.customParameters.value(QUALITY_ID, 20).toInt(); int minLen = settings.customParameters.value(LEN_ID, 0).toInt(); bool bothEnds = settings.customParameters.value(BOTH_ID, false).toInt(); FASTQIterator iter(settings.inputUrl); while(iter.hasNext()){ if(stateInfo.isCoR()){ return; } DNASequence dna = iter.next(); QString comment = DNAInfo::getFastqComment(dna.info); int seqLen = dna.length(); if(seqLen > dna.quality.qualCodes.length()){ ncount++; continue; }else{ int endPosition = seqLen-1; for (; endPosition>=0; endPosition--){ if(dna.quality.getValue(endPosition) >= quality){ break; } } int beginPosition = 0; if (bothEnds) { for (; beginPosition<=endPosition; beginPosition++) { if (dna.quality.getValue(beginPosition) >= quality) { break; } } } if(endPosition>=beginPosition && endPosition-beginPosition+1 >= minLen){ DNASequence trimmed(dna.getName(), dna.seq.left(endPosition+1).mid(beginPosition), dna.alphabet); trimmed.quality = dna.quality; trimmed.quality.qualCodes = trimmed.quality.qualCodes.left(endPosition+1).mid(beginPosition); FastqFormat::writeEntry(trimmed.getName(), trimmed, io.data(), "Writing error", stateInfo, false); ycount++; }else{ ncount++; continue; } } } algoLog.info(QString("Discarded by trimmer %1").arg(ncount)); algoLog.info(QString("Accepted by trimmer %1").arg(ycount)); algoLog.info(QString("Total by trimmer %1").arg(ncount + ycount)); }
GeneByGeneCompareResult GeneByGeneComparator::compareGeneAnnotation(const DNASequence& seq, const QList<SharedAnnotationData> &annData, const QString& annName, float identity) { GeneByGeneCompareResult result; float maxIdentity = -1.0F; foreach (const SharedAnnotationData &adata, annData) { if (adata->name == annName) { U2Location location = adata->location; if (location->isSingleRegion()) { int reglen = location->regions.first().length; float lenRatio = reglen * 100 /static_cast<float>(seq.length()); maxIdentity = qMax(maxIdentity, lenRatio); if(lenRatio >= identity){ //check length ratio QString ident = adata->findFirstQualifierValue(BLAST_IDENT); if (!ident.isEmpty()){ //create BLAST string YES/identity/gaps float blastIdent = parseBlastQual(ident); if (blastIdent != -1.0f && blastIdent >= identity){ result.identical = true; result.identityString = GeneByGeneCompareResult::IDENTICAL_YES; result.identityString.append(QString("\\%1").arg(blastIdent)); QString gaps = adata->findFirstQualifierValue(BLAST_GAPS); if (!gaps.isEmpty()){ float blastGaps = parseBlastQual(gaps); if (blastGaps!=1.0f){ result.identityString.append(QString("\\%1").arg(blastGaps)); } }else{ result.identityString.append(QString("\\0")); } } }else{ //not a blast annotation result.identical = true; result.identityString = GeneByGeneCompareResult::IDENTICAL_YES; } } } break; } } if (result.identical == false && maxIdentity != -1.0f){ result.identityString.append(QString("\\%1").arg(maxIdentity)); } return result; }