void ColorSpaceLoader::load(int maxToLoad,ArrayOfReads*reads,MyAllocator*seqMyAllocator){ char bufferForLine[1024]; int loadedSequences=0; while(m_loaded<m_size&& loadedSequences<maxToLoad){ fgets(bufferForLine,4096,m_f); if(bufferForLine[0]=='#'){ continue;// skip csfasta comment } // read two lines if(bufferForLine[0]=='>'){ fgets(bufferForLine,4096,m_f); for(int j=0;j<(int)strlen(bufferForLine);j++){ if(bufferForLine[j]==DOUBLE_ENCODING_A_COLOR){ bufferForLine[j]='A'; }else if(bufferForLine[j]==DOUBLE_ENCODING_T_COLOR){ bufferForLine[j]='T'; }else if(bufferForLine[j]==DOUBLE_ENCODING_C_COLOR){ bufferForLine[j]='C'; }else if(bufferForLine[j]==DOUBLE_ENCODING_G_COLOR){ bufferForLine[j]='G'; } } Read t; // remove the leading T & first color t.constructor(bufferForLine+2,seqMyAllocator,true); reads->push_back(&t); loadedSequences++; m_loaded++; } } if(m_loaded==m_size){ fclose(m_f); } }
void FastqLoader::loadWithPeriod(int maxToLoad,ArrayOfReads*reads,MyAllocator*seqMyAllocator,int period){ char buffer[RAY_MAXIMUM_READ_LENGTH]; int rotatingVariable=0; int loadedSequences=0; while(loadedSequences<maxToLoad && NULL!=fgets(buffer,RAY_MAXIMUM_READ_LENGTH,m_f)){ if(rotatingVariable==1){ Read t; t.constructor(buffer,seqMyAllocator,true); reads->push_back(&t); } rotatingVariable++; // a period is reached for each read. if(rotatingVariable==period){ rotatingVariable=0; loadedSequences++; m_loaded++; } } if(m_loaded==m_size){ fclose(m_f); } }
/** load sequences */ int FastaLoader::load(string file,ArrayOfReads*reads,MyAllocator*seqMyAllocator){ string id; ostringstream sequence; string buffer; ifstream f(file.c_str()); while(!f.eof()){ buffer=""; f>>buffer; if(buffer=="") continue; if(buffer[0]=='>'){ char bufferForLine[1024]; f.getline(bufferForLine,1024); if(id!=""){ string sequenceStr=sequence.str(); Read t; t.constructor(sequenceStr.c_str(),seqMyAllocator,true); reads->push_back(&t); } id=buffer; sequence.str(""); }else{ sequence<< buffer; } } string sequenceStr=sequence.str(); ostringstream quality; for(int i=0;i<(int)sequenceStr.length();i++){ quality<< "F"; } Read t; t.constructor(sequenceStr.c_str(),seqMyAllocator,true); reads->push_back(&t); f.close(); return EXIT_SUCCESS; }
void ColorSpaceLoader::load(int maxToLoad,ArrayOfReads*reads,MyAllocator*seqMyAllocator){ char bufferForLine[RAY_MAXIMUM_READ_LENGTH]; int loadedSequences=0; while(m_loaded<m_size&& loadedSequences<maxToLoad){ if(NULL==fgets(bufferForLine,RAY_MAXIMUM_READ_LENGTH,m_f)) continue; if(bufferForLine[0]=='#'){ continue;// skip csfasta comment } // read two lines if(bufferForLine[0]=='>'){ char*returnValue=fgets(bufferForLine,RAY_MAXIMUM_READ_LENGTH,m_f); assert(returnValue != NULL); for(int j=0;j<(int)strlen(bufferForLine);j++){ if(bufferForLine[j]==DOUBLE_ENCODING_A_COLOR){ bufferForLine[j]=SYMBOL_A; }else if(bufferForLine[j]==DOUBLE_ENCODING_T_COLOR){ bufferForLine[j]=SYMBOL_T; }else if(bufferForLine[j]==DOUBLE_ENCODING_C_COLOR){ bufferForLine[j]=SYMBOL_C; }else if(bufferForLine[j]==DOUBLE_ENCODING_G_COLOR){ bufferForLine[j]=SYMBOL_G; } } Read t; // remove the leading T & first color t.constructor(bufferForLine+2,seqMyAllocator,true); reads->push_back(&t); loadedSequences++; m_loaded++; } } if(m_loaded==m_size){ fclose(m_f); } }
// a very simple and compact fastq.gz reader void FastqGzLoader::load(int maxToLoad,ArrayOfReads*reads,MyAllocator*seqMyAllocator,int period) { char buffer[4096]; int rotatingVariable=0; int loadedSequences=0; while(loadedSequences<maxToLoad && Z_NULL!=gzgets(m_f,buffer,4096)) { if(rotatingVariable==1) { Read t; t.constructor(buffer,seqMyAllocator,true); reads->push_back(&t); } rotatingVariable++; // a period is reached for each read. if(rotatingVariable==period) { rotatingVariable=0; loadedSequences++; m_loaded++; } } if(m_loaded==m_size) { gzclose(m_f); } }
void SequencesLoader::registerSequence(){ if(m_myReads->size()% NUMBER_OF_SEQUENCES_PERIOD ==0){ LargeCount amount=m_myReads->size(); cout<<"Rank "<<m_rank<<" has "<<amount<<" sequence reads"<<endl; if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } } #ifdef ASSERT assert(m_distribution_sequence_id<m_loader.size()); #endif Read*theRead=m_loader.at(m_distribution_sequence_id); char read[RAY_MAXIMUM_READ_LENGTH]; theRead->getSeq(read,m_parameters->getColorSpaceMode(),false); //cout<<"DEBUG2 Read="<<m_distribution_sequence_id<<" color="<<m_parameters->getColorSpaceMode()<<" Seq= "<<read<<endl; Read myRead; myRead.constructor(read,&(*m_persistentAllocator),true); m_myReads->push_back(&myRead); if(m_LOADER_isLeftFile){ ReadHandle leftSequenceGlobalId=m_distribution_currentSequenceId; LargeIndex leftSequenceIdOnRank=m_myReads->size()-1; #ifdef ASSERT assert(m_loader.size()!=0); #endif ReadHandle rightSequenceGlobalId=leftSequenceGlobalId+m_loader.size(); #ifdef ASSERT assert(leftSequenceGlobalId<rightSequenceGlobalId); assert(leftSequenceGlobalId>=0); assert(leftSequenceGlobalId<m_totalNumberOfSequences); assert(rightSequenceGlobalId>=0); assert(rightSequenceGlobalId<m_totalNumberOfSequences); #endif int rightSequenceRank=m_parameters->getRankFromGlobalId(rightSequenceGlobalId); #ifdef ASSERT if(rightSequenceRank>=m_size){ cout<<"m_distribution_currentSequenceId="<<m_distribution_currentSequenceId<<" m_distribution_sequence_id="<<m_distribution_sequence_id<<" LoaderSize="<<m_loader.size()<<" Rank="<<rightSequenceRank<<" Size="<<m_size<<endl; assert(rightSequenceRank<m_size); } #endif LargeIndex rightSequenceIdOnRank=m_parameters->getIdFromGlobalId(rightSequenceGlobalId); int library=m_parameters->getLibrary(m_distribution_file_id); (*m_myReads)[leftSequenceIdOnRank]->setLeftType(); (*m_myReads)[leftSequenceIdOnRank]->getPairedRead()->constructor(rightSequenceRank,rightSequenceIdOnRank,library); }else if(m_LOADER_isRightFile){ #ifdef ASSERT assert(m_loader.size()!=0); #endif ReadHandle rightSequenceGlobalId=(m_distribution_currentSequenceId); LargeIndex rightSequenceIdOnRank=m_myReads->size()-1; ReadHandle leftSequenceGlobalId=rightSequenceGlobalId-m_loader.size(); #ifdef ASSERT assert(leftSequenceGlobalId>=0); if(leftSequenceGlobalId>=m_totalNumberOfSequences){ cout<<"Error: invalid ReadHandle object, leftSequenceGlobalId: "<<leftSequenceGlobalId; cout<<" m_totalNumberOfSequences: "<<m_totalNumberOfSequences; cout<<" rightSequenceGlobalId: "<<rightSequenceGlobalId<<endl; cout<<" m_distribution_currentSequenceId "<<m_distribution_currentSequenceId; cout<<" m_loader.size: "<<m_loader.size(); cout<<" rightSequenceIdOnRank: "<<rightSequenceIdOnRank<<" m_myReads->size: "<<m_myReads->size(); cout<<endl; } assert(leftSequenceGlobalId<m_totalNumberOfSequences); assert(rightSequenceGlobalId>=0); assert(rightSequenceGlobalId<m_totalNumberOfSequences); #endif Rank leftSequenceRank=m_parameters->getRankFromGlobalId(leftSequenceGlobalId); #ifdef ASSERT if(leftSequenceRank>=m_size){ cout<<"Global="<<leftSequenceGlobalId<<" rank="<<leftSequenceRank<<endl; } assert(leftSequenceRank<m_size); #endif LargeIndex leftSequenceIdOnRank=m_parameters->getIdFromGlobalId(leftSequenceGlobalId); int library=m_parameters->getLibrary(m_distribution_file_id); (*m_myReads)[rightSequenceIdOnRank]->setRightType(); (*m_myReads)[rightSequenceIdOnRank]->getPairedRead()->constructor(leftSequenceRank,leftSequenceIdOnRank,library); // left sequence in interleaved file }else if(m_isInterleavedFile && ((m_distribution_sequence_id)%2)==0){ ReadHandle rightSequenceGlobalId=(m_distribution_currentSequenceId)+1; #ifdef ASSERT assert(rightSequenceGlobalId>=0); assert(rightSequenceGlobalId<m_totalNumberOfSequences); #endif Rank rightSequenceRank=m_parameters->getRankFromGlobalId(rightSequenceGlobalId); LargeIndex rightSequenceIdOnRank=m_parameters->getIdFromGlobalId(rightSequenceGlobalId); LargeIndex leftSequenceIdOnRank=m_myReads->size()-1; int library=m_parameters->getLibrary(m_distribution_file_id); (*m_myReads)[leftSequenceIdOnRank]->setLeftType(); (*m_myReads)[leftSequenceIdOnRank]->getPairedRead()->constructor(rightSequenceRank,rightSequenceIdOnRank,library); // only the right sequence. }else if(m_isInterleavedFile &&((m_distribution_sequence_id)%2)==1){ ReadHandle rightSequenceGlobalId=(m_distribution_currentSequenceId); LargeIndex rightSequenceIdOnRank=m_myReads->size()-1; ReadHandle leftSequenceGlobalId=rightSequenceGlobalId-1; #ifdef ASSERT assert(leftSequenceGlobalId>=0); assert(leftSequenceGlobalId<m_totalNumberOfSequences); assert(rightSequenceGlobalId>=0); assert(rightSequenceGlobalId<m_totalNumberOfSequences); #endif Rank leftSequenceRank=m_parameters->getRankFromGlobalId(leftSequenceGlobalId); LargeIndex leftSequenceIdOnRank=m_parameters->getIdFromGlobalId(leftSequenceGlobalId); int library=m_parameters->getLibrary(m_distribution_file_id); (*m_myReads)[rightSequenceIdOnRank]->setRightType(); (*m_myReads)[rightSequenceIdOnRank]->getPairedRead()->constructor(leftSequenceRank,leftSequenceIdOnRank,library); } }