void TFastaFile::readRecords(const string& aFastaFilename, vector<Engine::SequenceManager::Sequence>& aFastaSequences, bool removeTabCharacterInDescription) { string info; string seq; aFastaSequences.clear(); TFastaFile fastaFile(aFastaFilename); while(fastaFile.readNextRecord(info, seq, removeTabCharacterInDescription)) { aFastaSequences.push_back(Engine::SequenceManager::Sequence(info, seq)); } fastaFile.closeFastaFile(); }
int seqNoise::writeOutput(string fastaFileName, string namesFileName, string uMapFileName, vector<int> finalTau, vector<int> centroids, vector<int> otuData, vector<string> sequences, vector<string> uniqueNames, vector<string> redundantNames, vector<int> seqFreq, vector<double>& distances){ try { int numOTUs = finalTau.size(); int numSeqs = uniqueNames.size(); ofstream fastaFile(fastaFileName.c_str()); ofstream namesFile(namesFileName.c_str()); ofstream uMapFile(uMapFileName.c_str()); vector<int> maxSequenceAbund(numOTUs, 0); vector<int> maxSequenceIndex(numOTUs, 0); for(int i=0;i<numSeqs;i++){ if (m->control_pressed) { return 0; } if(maxSequenceAbund[otuData[i]] < seqFreq[i]){ maxSequenceAbund[otuData[i]] = seqFreq[i]; maxSequenceIndex[otuData[i]] = i; } } int count = 1; for(int i=0;i<numOTUs;i++){ if (m->control_pressed) { return 0; } if(finalTau[i] > 0){ if(maxSequenceIndex[i] != centroids[i] && distances[maxSequenceIndex[i]*numSeqs + centroids[i]] == 0){ // cout << uniqueNames[centroids[i]] << '\t' << uniqueNames[maxSequenceIndex[i]] << '\t' << count << endl; centroids[i] = maxSequenceIndex[i]; } int index = centroids[i]; fastaFile << '>' << uniqueNames[index] << endl << sequences[index] << endl; namesFile << uniqueNames[index] << '\t'; string refSeq = sequences[index]; string redundantSeqs = redundantNames[index];; vector<freqData> frequencyData; for(int j=0;j<numSeqs;j++){ if(otuData[j] == i && j != index){ frequencyData.push_back(freqData(j, seqFreq[j])); } } sort(frequencyData.rbegin(), frequencyData.rend()); string refDegap = degapSeq(refSeq); vector<int> rUnalign = convertSeq(refDegap); uMapFile << "ideal_seq_" << count << '\t' << finalTau[i] << endl; uMapFile << uniqueNames[index] << '\t' << seqFreq[index] << "\t0\t" << refDegap << endl; for(int j=0;j<frequencyData.size();j++){ if (m->control_pressed) { return 0; } redundantSeqs += ',' + redundantNames[frequencyData[j].index]; uMapFile << uniqueNames[frequencyData[j].index] << '\t' << seqFreq[frequencyData[j].index] << '\t'; string querySeq = sequences[frequencyData[j].index]; string queryDegap = degapSeq(querySeq); vector<int> qUnalign = convertSeq(queryDegap); int udiffs = countDiffs(qUnalign, rUnalign); uMapFile << udiffs << '\t' << queryDegap << endl; } uMapFile << endl; namesFile << redundantSeqs << endl; count++; } } fastaFile.close(); namesFile.close(); uMapFile.close(); return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "writeOutput"); exit(1); } }