Ejemplo n.º 1
0
void ColorSpaceLoader::load(int maxToLoad,ArrayOfReads*reads,MyAllocator*seqMyAllocator){
	char bufferForLine[1024];
	int loadedSequences=0;
	while(m_loaded<m_size&& loadedSequences<maxToLoad){
		fgets(bufferForLine,4096,m_f);
		if(bufferForLine[0]=='#'){
			continue;// skip csfasta comment
		}
		// read two lines
		if(bufferForLine[0]=='>'){
			fgets(bufferForLine,4096,m_f);
			for(int j=0;j<(int)strlen(bufferForLine);j++){
				if(bufferForLine[j]==DOUBLE_ENCODING_A_COLOR){
					bufferForLine[j]='A';
				}else if(bufferForLine[j]==DOUBLE_ENCODING_T_COLOR){
					bufferForLine[j]='T';
				}else if(bufferForLine[j]==DOUBLE_ENCODING_C_COLOR){
					bufferForLine[j]='C';
				}else if(bufferForLine[j]==DOUBLE_ENCODING_G_COLOR){
					bufferForLine[j]='G';
				}
			}
			Read t;
			// remove the leading T & first color
			t.constructor(bufferForLine+2,seqMyAllocator,true);
			reads->push_back(&t);
			loadedSequences++;
			m_loaded++;
		}
	}
	if(m_loaded==m_size){
		fclose(m_f);
	}
}
Ejemplo n.º 2
0
void FastqLoader::loadWithPeriod(int maxToLoad,ArrayOfReads*reads,MyAllocator*seqMyAllocator,int period){
	char buffer[RAY_MAXIMUM_READ_LENGTH];
	int rotatingVariable=0;
	int loadedSequences=0;

	while(loadedSequences<maxToLoad && NULL!=fgets(buffer,RAY_MAXIMUM_READ_LENGTH,m_f)){
		if(rotatingVariable==1){
			Read t;
			t.constructor(buffer,seqMyAllocator,true);
			reads->push_back(&t);
		}
		rotatingVariable++;

		// a period is reached for each read.
		if(rotatingVariable==period){
			rotatingVariable=0;
			loadedSequences++;
			m_loaded++;
		}
	}

	if(m_loaded==m_size){
		fclose(m_f);
	}
}
Ejemplo n.º 3
0
/** load sequences */
int FastaLoader::load(string file,ArrayOfReads*reads,MyAllocator*seqMyAllocator){
	string id;
	ostringstream sequence;
	string buffer;
	ifstream f(file.c_str());
	while(!f.eof()){
		buffer="";
		f>>buffer;
		if(buffer=="")
			continue;
		if(buffer[0]=='>'){
			char bufferForLine[1024];
			f.getline(bufferForLine,1024);
			if(id!=""){
				string sequenceStr=sequence.str();

				Read t;
				t.constructor(sequenceStr.c_str(),seqMyAllocator,true);
				reads->push_back(&t);
			}
			id=buffer;
			sequence.str("");
		}else{
			sequence<< buffer;
		}
	}
	string sequenceStr=sequence.str();
	ostringstream quality;
	for(int i=0;i<(int)sequenceStr.length();i++){
		quality<< "F";
	}
	Read t;
	t.constructor(sequenceStr.c_str(),seqMyAllocator,true);
	reads->push_back(&t);

	f.close();
	return EXIT_SUCCESS;
}
Ejemplo n.º 4
0
void ColorSpaceLoader::load(int maxToLoad,ArrayOfReads*reads,MyAllocator*seqMyAllocator){
	char bufferForLine[RAY_MAXIMUM_READ_LENGTH];
	int loadedSequences=0;
	while(m_loaded<m_size&& loadedSequences<maxToLoad){
		if(NULL==fgets(bufferForLine,RAY_MAXIMUM_READ_LENGTH,m_f))
			continue;

		if(bufferForLine[0]=='#'){
			continue;// skip csfasta comment
		}
		// read two lines
		if(bufferForLine[0]=='>'){
			char*returnValue=fgets(bufferForLine,RAY_MAXIMUM_READ_LENGTH,m_f);
			assert(returnValue != NULL);

			for(int j=0;j<(int)strlen(bufferForLine);j++){
				if(bufferForLine[j]==DOUBLE_ENCODING_A_COLOR){
					bufferForLine[j]=SYMBOL_A;
				}else if(bufferForLine[j]==DOUBLE_ENCODING_T_COLOR){
					bufferForLine[j]=SYMBOL_T;
				}else if(bufferForLine[j]==DOUBLE_ENCODING_C_COLOR){
					bufferForLine[j]=SYMBOL_C;
				}else if(bufferForLine[j]==DOUBLE_ENCODING_G_COLOR){
					bufferForLine[j]=SYMBOL_G;
				}
			}
			Read t;
			// remove the leading T & first color
			t.constructor(bufferForLine+2,seqMyAllocator,true);
			reads->push_back(&t);
			loadedSequences++;
			m_loaded++;
		}
	}
	if(m_loaded==m_size){
		fclose(m_f);
	}
}
Ejemplo n.º 5
0
// a very simple and compact fastq.gz reader
void FastqGzLoader::load(int maxToLoad,ArrayOfReads*reads,MyAllocator*seqMyAllocator,int period) {
    char buffer[4096];
    int rotatingVariable=0;
    int loadedSequences=0;

    while(loadedSequences<maxToLoad && Z_NULL!=gzgets(m_f,buffer,4096)) {
        if(rotatingVariable==1) {
            Read t;
            t.constructor(buffer,seqMyAllocator,true);
            reads->push_back(&t);
        }
        rotatingVariable++;

        // a period is reached for each read.
        if(rotatingVariable==period) {
            rotatingVariable=0;
            loadedSequences++;
            m_loaded++;
        }
    }
    if(m_loaded==m_size) {
        gzclose(m_f);
    }
}
Ejemplo n.º 6
0
void SequencesLoader::registerSequence(){
	if(m_myReads->size()% NUMBER_OF_SEQUENCES_PERIOD ==0){
		LargeCount amount=m_myReads->size();
		cout<<"Rank "<<m_rank<<" has "<<amount<<" sequence reads"<<endl;

		if(m_parameters->showMemoryUsage()){
			showMemoryUsage(m_rank);
		}
	}

	#ifdef ASSERT
	assert(m_distribution_sequence_id<m_loader.size());
	#endif

	Read*theRead=m_loader.at(m_distribution_sequence_id);
	char read[RAY_MAXIMUM_READ_LENGTH];
	theRead->getSeq(read,m_parameters->getColorSpaceMode(),false);

	//cout<<"DEBUG2 Read="<<m_distribution_sequence_id<<" color="<<m_parameters->getColorSpaceMode()<<" Seq= "<<read<<endl;

	Read myRead;
	myRead.constructor(read,&(*m_persistentAllocator),true);
	m_myReads->push_back(&myRead);

	if(m_LOADER_isLeftFile){
		ReadHandle leftSequenceGlobalId=m_distribution_currentSequenceId;
		LargeIndex leftSequenceIdOnRank=m_myReads->size()-1;

		#ifdef ASSERT
		assert(m_loader.size()!=0);
		#endif

		ReadHandle rightSequenceGlobalId=leftSequenceGlobalId+m_loader.size();

		#ifdef ASSERT
		assert(leftSequenceGlobalId<rightSequenceGlobalId);
		assert(leftSequenceGlobalId>=0);
		assert(leftSequenceGlobalId<m_totalNumberOfSequences);
		assert(rightSequenceGlobalId>=0);
		assert(rightSequenceGlobalId<m_totalNumberOfSequences);
		#endif

		int rightSequenceRank=m_parameters->getRankFromGlobalId(rightSequenceGlobalId);

		#ifdef ASSERT
		if(rightSequenceRank>=m_size){
			cout<<"m_distribution_currentSequenceId="<<m_distribution_currentSequenceId<<" m_distribution_sequence_id="<<m_distribution_sequence_id<<" LoaderSize="<<m_loader.size()<<" Rank="<<rightSequenceRank<<" Size="<<m_size<<endl;
			assert(rightSequenceRank<m_size);
		}
		#endif

		LargeIndex rightSequenceIdOnRank=m_parameters->getIdFromGlobalId(rightSequenceGlobalId);

		int library=m_parameters->getLibrary(m_distribution_file_id);

		(*m_myReads)[leftSequenceIdOnRank]->setLeftType();
		(*m_myReads)[leftSequenceIdOnRank]->getPairedRead()->constructor(rightSequenceRank,rightSequenceIdOnRank,library);
	}else if(m_LOADER_isRightFile){

		#ifdef ASSERT
		assert(m_loader.size()!=0);
		#endif

		ReadHandle rightSequenceGlobalId=(m_distribution_currentSequenceId);
		LargeIndex rightSequenceIdOnRank=m_myReads->size()-1;
		ReadHandle leftSequenceGlobalId=rightSequenceGlobalId-m_loader.size();

		#ifdef ASSERT
		assert(leftSequenceGlobalId>=0);

		if(leftSequenceGlobalId>=m_totalNumberOfSequences){
			cout<<"Error: invalid ReadHandle object, leftSequenceGlobalId: "<<leftSequenceGlobalId;
			cout<<" m_totalNumberOfSequences: "<<m_totalNumberOfSequences;
			cout<<" rightSequenceGlobalId: "<<rightSequenceGlobalId<<endl;
			cout<<" m_distribution_currentSequenceId "<<m_distribution_currentSequenceId;
			cout<<" m_loader.size: "<<m_loader.size();
			cout<<" rightSequenceIdOnRank: "<<rightSequenceIdOnRank<<" m_myReads->size: "<<m_myReads->size();
			cout<<endl;
		}

		assert(leftSequenceGlobalId<m_totalNumberOfSequences);
		assert(rightSequenceGlobalId>=0);
		assert(rightSequenceGlobalId<m_totalNumberOfSequences);
		#endif

		Rank leftSequenceRank=m_parameters->getRankFromGlobalId(leftSequenceGlobalId);
		#ifdef ASSERT
		if(leftSequenceRank>=m_size){
			cout<<"Global="<<leftSequenceGlobalId<<" rank="<<leftSequenceRank<<endl;
		}
		assert(leftSequenceRank<m_size);
		#endif
		LargeIndex leftSequenceIdOnRank=m_parameters->getIdFromGlobalId(leftSequenceGlobalId);
		int library=m_parameters->getLibrary(m_distribution_file_id);

		(*m_myReads)[rightSequenceIdOnRank]->setRightType();
		(*m_myReads)[rightSequenceIdOnRank]->getPairedRead()->constructor(leftSequenceRank,leftSequenceIdOnRank,library);
	// left sequence in interleaved file
	}else if(m_isInterleavedFile && ((m_distribution_sequence_id)%2)==0){
		ReadHandle rightSequenceGlobalId=(m_distribution_currentSequenceId)+1;

		#ifdef ASSERT
		assert(rightSequenceGlobalId>=0);
		assert(rightSequenceGlobalId<m_totalNumberOfSequences);
		#endif

		Rank rightSequenceRank=m_parameters->getRankFromGlobalId(rightSequenceGlobalId);
		LargeIndex rightSequenceIdOnRank=m_parameters->getIdFromGlobalId(rightSequenceGlobalId);

		LargeIndex leftSequenceIdOnRank=m_myReads->size()-1;

		int library=m_parameters->getLibrary(m_distribution_file_id);

		(*m_myReads)[leftSequenceIdOnRank]->setLeftType();
		(*m_myReads)[leftSequenceIdOnRank]->getPairedRead()->constructor(rightSequenceRank,rightSequenceIdOnRank,library);

	// only the right sequence.
	}else if(m_isInterleavedFile &&((m_distribution_sequence_id)%2)==1){
		ReadHandle rightSequenceGlobalId=(m_distribution_currentSequenceId);
		LargeIndex rightSequenceIdOnRank=m_myReads->size()-1;
		ReadHandle leftSequenceGlobalId=rightSequenceGlobalId-1;

		#ifdef ASSERT
		assert(leftSequenceGlobalId>=0);
		assert(leftSequenceGlobalId<m_totalNumberOfSequences);
		assert(rightSequenceGlobalId>=0);
		assert(rightSequenceGlobalId<m_totalNumberOfSequences);
		#endif

		Rank leftSequenceRank=m_parameters->getRankFromGlobalId(leftSequenceGlobalId);
		LargeIndex leftSequenceIdOnRank=m_parameters->getIdFromGlobalId(leftSequenceGlobalId);
		int library=m_parameters->getLibrary(m_distribution_file_id);

		(*m_myReads)[rightSequenceIdOnRank]->setRightType();
		(*m_myReads)[rightSequenceIdOnRank]->getPairedRead()->constructor(leftSequenceRank,leftSequenceIdOnRank,library);
	}
}