FastqRead::FastqRead(Sequence s, QualityScores q, string f) { try { m = MothurOut::getInstance(); format = f; //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); convertBackTable.push_back(((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499))); } if (s.getName() != q.getName()) { m->mothurOut("[ERROR]: sequence name does not match quality score name. Cannot construct fastq object.\n"); m->control_pressed = true; } else { name = s.getName(); comment = s.getComment(); sequence = s.getUnaligned(); scores = q.getScores(); scoreString = convertQual(scores); } } catch(exception& e) { m->errorOut(e, "FastqRead", "FastqRead"); exit(1); } }
/********************************************************************/ TrimOligos::~TrimOligos() {} //*******************************************************************/ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){ try { string rawSequence = seq.getUnaligned(); int success = bdiffs + 1; //guilty until proven innocent //can you find the barcode for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); if(qual.getName() != ""){ qual.trimQScores(oligo.length(), -1); } success = 0; break; } } //if you found the barcode or if you don't want to allow for diffs if ((bdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it int maxLength = 0; Alignment* alignment; if (barcodes.size() > 0) { map<string,int>::iterator it=barcodes.begin(); for(it;it!=barcodes.end();it++){ if(it->first.length() > maxLength){ maxLength = it->first.length(); } } alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; int minGroup = -1; int minPos = 0; for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; // int length = oligo.length(); if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length success = bdiffs + 10; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); int alnLength = oligo.length(); for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; minGroup = it->second; minPos = 0; for(int i=0;i<alnLength;i++){ if(temp[i] != '-'){ minPos++; } } } else if(numDiff == minDiff){ minCount++; } } if(minDiff > bdiffs) { success = minDiff; } //no good matches else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); if(qual.getName() != ""){ qual.trimQScores(minPos, -1); } success = minDiff; } if (alignment != NULL) { delete alignment; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripBarcode"); exit(1); } } //*******************************************************************/ int TrimOligos::stripBarcode(Sequence& seq, int& group){ try { string rawSequence = seq.getUnaligned(); int success = bdiffs + 1; //guilty until proven innocent //can you find the barcode for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); success = 0; break; } } //if you found the barcode or if you don't want to allow for diffs if ((bdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it int maxLength = 0; Alignment* alignment; if (barcodes.size() > 0) { map<string,int>::iterator it=barcodes.begin(); for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ if(it->first.length() > maxLength){ maxLength = it->first.length(); } } alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; int minGroup = -1; int minPos = 0; for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; // int length = oligo.length(); if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length success = bdiffs + 10; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); int alnLength = oligo.length(); for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; minGroup = it->second; minPos = 0; for(int i=0;i<alnLength;i++){ if(temp[i] != '-'){ minPos++; } } } else if(numDiff == minDiff){ minCount++; } } if(minDiff > bdiffs) { success = minDiff; } //no good matches else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); success = minDiff; } if (alignment != NULL) { delete alignment; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripBarcode"); exit(1); } } //********************************************************************/ int TrimOligos::stripForward(Sequence& seq, int& group){ try { string rawSequence = seq.getUnaligned(); int success = pdiffs + 1; //guilty until proven innocent //can you find the primer for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); success = 0; break; } } //if you found the barcode or if you don't want to allow for diffs if ((pdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it int maxLength = 0; Alignment* alignment; if (primers.size() > 0) { map<string,int>::iterator it=primers.begin(); for(it;it!=primers.end();it++){ if(it->first.length() > maxLength){ maxLength = it->first.length(); } } alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; int minGroup = -1; int minPos = 0; for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){ string oligo = it->first; // int length = oligo.length(); if(rawSequence.length() < maxLength){ success = pdiffs + 100; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); int alnLength = oligo.length(); for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; minGroup = it->second; minPos = 0; for(int i=0;i<alnLength;i++){ if(temp[i] != '-'){ minPos++; } } } else if(numDiff == minDiff){ minCount++; } } if(minDiff > pdiffs) { success = minDiff; } //no good matches else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); success = minDiff; } if (alignment != NULL) { delete alignment; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripForward"); exit(1); } } //*******************************************************************/ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){ try { string rawSequence = seq.getUnaligned(); int success = pdiffs + 1; //guilty until proven innocent //can you find the primer for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); if(qual.getName() != ""){ qual.trimQScores(oligo.length(), -1); } success = 0; break; } } //if you found the barcode or if you don't want to allow for diffs if ((pdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it int maxLength = 0; Alignment* alignment; if (primers.size() > 0) { map<string,int>::iterator it=primers.begin(); for(it;it!=primers.end();it++){ if(it->first.length() > maxLength){ maxLength = it->first.length(); } } alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; int minGroup = -1; int minPos = 0; for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){ string oligo = it->first; // int length = oligo.length(); if(rawSequence.length() < maxLength){ success = pdiffs + 100; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); int alnLength = oligo.length(); for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; minGroup = it->second; minPos = 0; for(int i=0;i<alnLength;i++){ if(temp[i] != '-'){ minPos++; } } } else if(numDiff == minDiff){ minCount++; } } if(minDiff > pdiffs) { success = minDiff; } //no good matches else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); if(qual.getName() != ""){ qual.trimQScores(minPos, -1); } success = minDiff; } if (alignment != NULL) { delete alignment; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripForward"); exit(1); } } //******************************************************************/ bool TrimOligos::stripReverse(Sequence& seq, QualityScores& qual){ try { string rawSequence = seq.getUnaligned(); bool success = 0; //guilty until proven innocent for(int i=0;i<revPrimer.size();i++){ string oligo = revPrimer[i]; if(rawSequence.length() < oligo.length()){ success = 0; break; } if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){ seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length())); if(qual.getName() != ""){ qual.trimQScores(-1, rawSequence.length()-oligo.length()); } success = 1; break; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripReverse"); exit(1); } }
int ReverseSeqsCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } string fastaReverseFileName; if(fastaFileName != ""){ ifstream inFASTA; m->openInputFile(fastaFileName, inFASTA); ofstream outFASTA; string tempOutputDir = outputDir; if (outputDir == "") { tempOutputDir += m->hasPath(fastaFileName); } //if user entered a file with a path then preserve it map<string, string> variables; variables["[filename]"] = tempOutputDir + m->getRootName(m->getSimpleName(fastaFileName)); variables["[extension]"] = m->getExtension(fastaFileName); fastaReverseFileName = getOutputFileName("fasta", variables); m->openOutputFile(fastaReverseFileName, outFASTA); while(!inFASTA.eof()){ if (m->control_pressed) { inFASTA.close(); outFASTA.close(); m->mothurRemove(fastaReverseFileName); return 0; } Sequence currSeq(inFASTA); m->gobble(inFASTA); if (currSeq.getName() != "") { currSeq.reverseComplement(); currSeq.printSequence(outFASTA); } } inFASTA.close(); outFASTA.close(); outputNames.push_back(fastaReverseFileName); outputTypes["fasta"].push_back(fastaReverseFileName); } string qualReverseFileName; if(qualFileName != ""){ QualityScores currQual; ifstream inQual; m->openInputFile(qualFileName, inQual); ofstream outQual; string tempOutputDir = outputDir; if (outputDir == "") { tempOutputDir += m->hasPath(qualFileName); } //if user entered a file with a path then preserve it map<string, string> variables; variables["[filename]"] = tempOutputDir + m->getRootName(m->getSimpleName(qualFileName)); variables["[extension]"] = m->getExtension(qualFileName); string qualReverseFileName = getOutputFileName("qfile", variables); m->openOutputFile(qualReverseFileName, outQual); while(!inQual.eof()){ if (m->control_pressed) { inQual.close(); outQual.close(); m->mothurRemove(qualReverseFileName); return 0; } currQual = QualityScores(inQual); m->gobble(inQual); currQual.flipQScores(); currQual.printQScores(outQual); } inQual.close(); outQual.close(); outputNames.push_back(qualReverseFileName); outputTypes["qfile"].push_back(qualReverseFileName); } if (m->control_pressed) { m->mothurRemove(qualReverseFileName); m->mothurRemove(fastaReverseFileName); return 0; } //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for(int i=0;i<outputNames.size();i++){ m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } return 0; } catch(exception& e) { m->errorOut(e, "ReverseSeqsCommand", "execute"); exit(1); } }
/********************************************************************/ TrimOligos::~TrimOligos() {} //*******************************************************************/ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){ try { string rawSequence = seq.getUnaligned(); int success = bdiffs + 1; //guilty until proven innocent //can you find the barcode for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); if(qual.getName() != ""){ qual.trimQScores(oligo.length(), -1); } success = 0; break; } } //if you found the barcode or if you don't want to allow for diffs if ((bdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it int maxLength = 0; Alignment* alignment; if (barcodes.size() > 0) { map<string,int>::iterator it=barcodes.begin(); for(it;it!=barcodes.end();it++){ if(it->first.length() > maxLength){ maxLength = it->first.length(); } } alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; int minGroup = -1; int minPos = 0; for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; // int length = oligo.length(); if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length success = bdiffs + 10; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); int alnLength = oligo.length(); for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; minGroup = it->second; minPos = 0; for(int i=0;i<alnLength;i++){ if(temp[i] != '-'){ minPos++; } } } else if(numDiff == minDiff){ minCount++; } } if(minDiff > bdiffs) { success = minDiff; } //no good matches else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); if(qual.getName() != ""){ qual.trimQScores(minPos, -1); } success = minDiff; } if (alignment != NULL) { delete alignment; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripBarcode"); exit(1); } } //*******************************************************************/ int TrimOligos::stripBarcode(Sequence& seq, int& group){ try { string rawSequence = seq.getUnaligned(); int success = bdiffs + 1; //guilty until proven innocent //can you find the barcode for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); success = 0; break; } } //if you found the barcode or if you don't want to allow for diffs if ((bdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it int maxLength = 0; Alignment* alignment; if (barcodes.size() > 0) { map<string,int>::iterator it=barcodes.begin(); for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ if(it->first.length() > maxLength){ maxLength = it->first.length(); } } alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; int minGroup = -1; int minPos = 0; for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; // int length = oligo.length(); if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length success = bdiffs + 10; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); int alnLength = oligo.length(); for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; minGroup = it->second; minPos = 0; for(int i=0;i<alnLength;i++){ if(temp[i] != '-'){ minPos++; } } } else if(numDiff == minDiff){ minCount++; } } if(minDiff > bdiffs) { success = minDiff; } //no good matches else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); success = minDiff; } if (alignment != NULL) { delete alignment; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripBarcode"); exit(1); } } //********************************************************************/ int TrimOligos::stripForward(Sequence& seq, int& group){ try { string rawSequence = seq.getUnaligned(); int success = pdiffs + 1; //guilty until proven innocent //can you find the primer for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); success = 0; break; } } //if you found the barcode or if you don't want to allow for diffs if ((pdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it int maxLength = 0; Alignment* alignment; if (primers.size() > 0) { map<string,int>::iterator it=primers.begin(); for(it;it!=primers.end();it++){ if(it->first.length() > maxLength){ maxLength = it->first.length(); } } alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; int minGroup = -1; int minPos = 0; for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){ string oligo = it->first; // int length = oligo.length(); if(rawSequence.length() < maxLength){ success = pdiffs + 100; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); int alnLength = oligo.length(); for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; minGroup = it->second; minPos = 0; for(int i=0;i<alnLength;i++){ if(temp[i] != '-'){ minPos++; } } } else if(numDiff == minDiff){ minCount++; } } if(minDiff > pdiffs) { success = minDiff; } //no good matches else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); success = minDiff; } if (alignment != NULL) { delete alignment; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripForward"); exit(1); } } //*******************************************************************/ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){ try { string rawSequence = seq.getUnaligned(); int success = pdiffs + 1; //guilty until proven innocent //can you find the primer for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){ string oligo = it->first; if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out break; } if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); if(qual.getName() != ""){ qual.trimQScores(oligo.length(), -1); } success = 0; break; } } //if you found the barcode or if you don't want to allow for diffs if ((pdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it int maxLength = 0; Alignment* alignment; if (primers.size() > 0) { map<string,int>::iterator it=primers.begin(); for(it;it!=primers.end();it++){ if(it->first.length() > maxLength){ maxLength = it->first.length(); } } alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1)); }else{ alignment = NULL; } //can you find the barcode int minDiff = 1e6; int minCount = 1; int minGroup = -1; int minPos = 0; for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){ string oligo = it->first; // int length = oligo.length(); if(rawSequence.length() < maxLength){ success = pdiffs + 100; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); int alnLength = oligo.length(); for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } oligo = oligo.substr(0,alnLength); temp = temp.substr(0,alnLength); int numDiff = countDiffs(oligo, temp); if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; minGroup = it->second; minPos = 0; for(int i=0;i<alnLength;i++){ if(temp[i] != '-'){ minPos++; } } } else if(numDiff == minDiff){ minCount++; } } if(minDiff > pdiffs) { success = minDiff; } //no good matches else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers else{ //use the best match group = minGroup; seq.setUnaligned(rawSequence.substr(minPos)); if(qual.getName() != ""){ qual.trimQScores(minPos, -1); } success = minDiff; } if (alignment != NULL) { delete alignment; } } return success; } catch(exception& e) { m->errorOut(e, "TrimOligos", "stripForward"); exit(1); } }