Esempio n. 1
0
		void TFastaFile::readRecords(const string& aFastaFilename, vector<Engine::SequenceManager::Sequence>& aFastaSequences, bool removeTabCharacterInDescription)
		{
			string info;
			string seq;
			aFastaSequences.clear();
			TFastaFile fastaFile(aFastaFilename);
			while(fastaFile.readNextRecord(info, seq, removeTabCharacterInDescription))
			{
				aFastaSequences.push_back(Engine::SequenceManager::Sequence(info, seq));
			}
			fastaFile.closeFastaFile();
		}
Esempio n. 2
0
int seqNoise::writeOutput(string fastaFileName, string namesFileName, string uMapFileName, vector<int> finalTau, vector<int> centroids, vector<int> otuData, vector<string> sequences, vector<string> uniqueNames, vector<string> redundantNames, vector<int> seqFreq, vector<double>& distances){
	try {
		int numOTUs = finalTau.size();
		int numSeqs = uniqueNames.size();
		
		ofstream fastaFile(fastaFileName.c_str());
		ofstream namesFile(namesFileName.c_str());
		ofstream uMapFile(uMapFileName.c_str());
		
		vector<int> maxSequenceAbund(numOTUs, 0);
		vector<int> maxSequenceIndex(numOTUs, 0);
		
		for(int i=0;i<numSeqs;i++){
			if (m->control_pressed) { return 0; }
			if(maxSequenceAbund[otuData[i]] < seqFreq[i]){
				maxSequenceAbund[otuData[i]] = seqFreq[i];
				maxSequenceIndex[otuData[i]] = i;
			}
		}
		
		int count = 1;
		
		for(int i=0;i<numOTUs;i++){
			if (m->control_pressed) { return 0; }
			
			if(finalTau[i] > 0){
				
				if(maxSequenceIndex[i] != centroids[i] && distances[maxSequenceIndex[i]*numSeqs + centroids[i]] == 0){
					//				cout << uniqueNames[centroids[i]] << '\t' << uniqueNames[maxSequenceIndex[i]] << '\t' << count << endl;
					centroids[i] = maxSequenceIndex[i];
				}
				
				int index = centroids[i];
				
				fastaFile << '>' << uniqueNames[index] << endl << sequences[index] << endl;
				namesFile << uniqueNames[index] << '\t';
				
				string refSeq = sequences[index];
				string redundantSeqs = redundantNames[index];;
				
				
				vector<freqData> frequencyData;
				
				for(int j=0;j<numSeqs;j++){
					if(otuData[j] == i && j != index){
						frequencyData.push_back(freqData(j, seqFreq[j]));
					}
				}
				sort(frequencyData.rbegin(), frequencyData.rend());
				
				string refDegap = degapSeq(refSeq);
				vector<int> rUnalign = convertSeq(refDegap);
				
				uMapFile << "ideal_seq_" << count << '\t' << finalTau[i] << endl;
				uMapFile << uniqueNames[index] << '\t' << seqFreq[index] << "\t0\t" << refDegap << endl;
				
				
				for(int j=0;j<frequencyData.size();j++){
					if (m->control_pressed) { return 0; }
					redundantSeqs += ',' + redundantNames[frequencyData[j].index];
					
					uMapFile << uniqueNames[frequencyData[j].index] << '\t' << seqFreq[frequencyData[j].index] << '\t';
					
					string querySeq = sequences[frequencyData[j].index];
					
					string queryDegap = degapSeq(querySeq);
					vector<int> qUnalign = convertSeq(queryDegap);
					
					int udiffs = countDiffs(qUnalign, rUnalign);
					uMapFile << udiffs << '\t' << queryDegap << endl;
					
				}					
				
				uMapFile << endl;
				namesFile << redundantSeqs << endl;
				count++;
				
			}
		}
		fastaFile.close();
		namesFile.close();
		uMapFile.close();
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "seqNoise", "writeOutput");
		exit(1);
	}
}