Ejemplo n.º 1
0
// Merge two readsFiles together
void mergeReadFiles(const std::string& readsFile1, const std::string& readsFile2, const std::string& outPrefix)
{
    // If the outfile is the empty string, append the reads in readsFile2 into readsFile1
    // otherwise cat the files together
    std::ostream* pWriter;
    if(outPrefix.empty())
    {
        pWriter = createWriter(readsFile1, std::ios_base::out | std::ios_base::app);
    }
    else
    {
        pWriter = createWriter(makeFilename(outPrefix, ".fa"));

        // Copy reads1 to the outfile
        SeqReader reader(readsFile1);
        SeqRecord record;
        while(reader.get(record))
            record.write(*pWriter);
    }

    // Copy reads2 to writer
    SeqReader reader(readsFile2);
    SeqRecord record;
    while(reader.get(record))
        record.write(*pWriter);
    delete pWriter;
}
Ejemplo n.º 2
0
ossimImageFileWriter*
ossimImageWriterFactory::createWriter(const ossimKeywordlist& kwl,
                                      const char *prefix)const
{
   ossimString type = kwl.find(prefix, ossimKeywordNames::TYPE_KW);
   ossimImageFileWriter* result = (ossimImageFileWriter*)NULL;
   
   if(type != "")
   {
      result = createWriter(type);
      if (result)
      {
         if (result->hasImageType(type))
         {
            ossimKeywordlist kwl2(kwl);
            kwl2.add(prefix,
                     ossimKeywordNames::IMAGE_TYPE_KW,
                     type,
                     true);
         
            result->loadState(kwl2, prefix);
         }
         else
         {
            result->loadState(kwl, prefix);
         }
      }
   }

   return result;
}
Ejemplo n.º 3
0
bool exportToTarget(const OS_NAMESPACE_NAME::XMLDocument &document, XMLFormatTarget *target, const OS_NAMESPACE_NAME::String &encoding)
{
    try
    {
        AutoPtr<DOMDocument> doc = getImplementation()->createDocument();
        if(convertDocument(document, doc) == false)
            return false;

        AutoPtr<DOMLSSerializer> writer(createWriter());
        AutoPtr<DOMLSOutput> stream(createOutputStream(encoding));
        stream->setByteStream(target);

        writer->write(doc, stream);

        return true;
    }
    catch(const XMLException &e)
    {
        OS_LOG_ERROR(e.getMessage());
    }
    catch(const DOMException &e)
    {
        OS_LOG_ERROR(e.getMessage());
    }
    catch(...)
    {
        OS_LOG_ERROR(OS_ERR_UNKNOWN(xml));
    }

    return false;
}
Ejemplo n.º 4
0
bool exportToString(const OS_NAMESPACE_NAME::XMLDocument &document, OS_NAMESPACE_NAME::String &str)
{
    try
    {
        AutoPtr<DOMDocument> doc = getImplementation()->createDocument();
        if(convertDocument(document, doc) == false)
            return false;

        AutoPtr<DOMLSSerializer> writer(createWriter());

        XMLCh *xmlString = writer->writeToString(doc->getDocumentElement());
        str = xmlString;
        XMLString::release(&xmlString);

        return true;
    }
    catch(const XMLException &e)
    {
        OS_LOG_ERROR(e.getMessage());
    }
    catch(const DOMException &e)
    {
        OS_LOG_ERROR(e.getMessage());
    }
    catch(...)
    {
        OS_LOG_ERROR(OS_ERR_UNKNOWN(xml));
    }

    return false;
}
Ejemplo n.º 5
0
void restoreDirectory (GmZipFileReader * pSetReader
							, const wxString & szDestPath
							, GmWriter * pWriter
							, GmRestOp option
							, GmExecUnitBase * pExecUnit
							, GmTempEvent * pEvent
							, GmUifSourceEntry * pEntry
							, GmDirectoryNode<GmLeafNode> * pDirectory
							, ubyte4 uiTotalSets
							, const wxString & prefixPath)
{
	if ((option & GRP_NEW_PLACE) == GRP_NEW_PLACE) {
		assert (pWriter);
		wxString path = getSourcePath (pWriter, pEntry, szDestPath);
		pWriter->SetDestPath (path);

		pSetReader->restoreFile (pWriter, pDirectory, prefixPath, uiTotalSets, pEvent);
	}
	else {  // !GRP_NEW_PLACE
		assert (pWriter == 0);
		GmWriter * pWriter2 = createWriter (pSetReader, pEntry, option, pExecUnit, 0);

		if (pWriter2 == 0) {
			wxString msg;
			throw GmException (msg);
		}

		pSetReader->restoreFile (pWriter2, pDirectory, prefixPath, uiTotalSets, 0);
		delete pWriter2;
	}

	return;
}
Ejemplo n.º 6
0
void PopulationIndex::mergeIndexFiles(const std::string& file1, const std::string& file2, const std::string& outfile)
{
    std::ostream* writer = createWriter(outfile);
    
    // Copy the first index to the output unmodified but track the number of elements read
    size_t num_file_1 = 0;
    std::istream* reader = createReader(file1);
    std::string line;
    while(getline(*reader, line))
    {
        // Copy
        *writer << line << "\n";
        
        // Parse
        PopulationMember member = str2member(line);
        num_file_1 += (member.end - member.start + 1);
    }    
    delete reader;

    // Copy the second index, offsetting by the number of reads in file1
    reader = createReader(file2);
    while(getline(*reader, line))
    {
        PopulationMember member = str2member(line);
        member.start += num_file_1;
        member.end += num_file_1;

        // Copy
        *writer << member.start << "\t" << member.end << "\t" << member.name << "\n";
    } 
    delete reader;
    delete writer;
}
Ejemplo n.º 7
0
/**
 * Configures verbosegc according to the parameters passed.
 * @param filename The name of the file or output stream to log to.
 * @param fileCount The number of files to log to.
 * @param iterations The number of gc cycles to log to each file.
 * @return true on success, false on failure
 */
bool
MM_VerboseManager::configureVerboseGC(OMR_VM *omrVM, char *filename, uintptr_t fileCount, uintptr_t iterations)
{
	MM_EnvironmentBase env(omrVM);

	MM_VerboseWriter *writer = NULL;

	disableWriters();

	WriterType type = parseWriterType(&env, filename, fileCount, iterations);

	writer = findWriterInChain(type);

	if (NULL != writer) {
		writer->reconfigure(&env, filename, fileCount, iterations);
	} else {

		writer = createWriter(&env, type, filename, fileCount, iterations);

		if(NULL == writer) {
			return false;
		}

		_writerChain->addWriter(writer);
	}

	writer->isActive(true);

	return true;
}
Ejemplo n.º 8
0
// Save the SA to disc
void SampledSuffixArray::writeSSA(std::string filename)
{
    std::ostream* pWriter = createWriter(filename, std::ios::out | std::ios::binary);
    
    // Write a magic number
    SSA_WRITE(SSA_MAGIC_NUMBER)

    // Write sample rate
    SSA_WRITE(m_sampleRate)

    // Write number of lexicographic index entries
    size_t n = m_saLexoIndex.size();
    SSA_WRITE(n)

    // Write lexo index
    SSA_WRITE_N(m_saLexoIndex.front(), sizeof(SSA_INT_TYPE) * n)
    
    // Write number of samples
    n = m_saSamples.size();
    SSA_WRITE(n)

    // Write samples
    SSA_WRITE_N(m_saSamples.front(), sizeof(SAElem) * n)

    delete pWriter;
}
Ejemplo n.º 9
0
bool SaveLoad_v4::GameHandler::saveScreenProps(int slot, const byte *props) {
	if (!createWriter(slot))
		return false;

	SavePartMem mem(256000);

	if (!mem.readFrom(props, 0, 256000))
		return false;

	return _writer->writePart(2, &mem);
}
Ejemplo n.º 10
0
bool OsmMapWriterFactory::hasPartialWriter(QString url)
{
  bool result = false;
  shared_ptr<OsmMapWriter> writer = createWriter(url);
  shared_ptr<PartialOsmMapWriter> streamWriter = dynamic_pointer_cast<PartialOsmMapWriter>(writer);
  if (streamWriter)
  {
    result = true;
  }

  return result;
}
Ejemplo n.º 11
0
bool OsmMapWriterFactory::hasElementOutputStream(QString url)
{
  bool result = false;
  shared_ptr<OsmMapWriter> writer = createWriter(url);
  shared_ptr<ElementOutputStream> streamWriter = dynamic_pointer_cast<ElementOutputStream>(writer);
  if (streamWriter)
  {
    result = true;
  }

  return result;
}
Ejemplo n.º 12
0
void DataTableSupport::notify(const ::pipelib::event::PipeEngineStateChanged<EngineAccess> & evt)
{
  if(evt.getNewState() == ::pipelib::PipelineState::RUNNING)
  {
    createWriter();
  }
  else if(evt.getNewState() == ::pipelib::PipelineState::FINISHED)
  {
    // Write out the final state of the data table
    if(myWriter.get())
      myWriter->write();

    if(myClearTableOnPipeFinish)
      myTable.clear();
  }
}
Ejemplo n.º 13
0
char *A2Pserialize(ATerm term, A2PType topType, int *length){
	A2PWriter writer = createWriter();
	ByteBuffer buffer = writer->buffer;
	char *result;
	int bufferSize;
	
	doSerialize(writer, topType, term);
	
	bufferSize = getCurrentByteBufferSize(buffer);
	result = (char*) malloc(bufferSize);
	memcpy(result, buffer->buffer, bufferSize);
	*length = bufferSize;
	
	destroyWriter(writer);
	
	return result;
}
Ejemplo n.º 14
0
ossimImageFileWriter* ossimOpjWriterFactory::createWriter(
   const ossimKeywordlist& kwl, const char *prefix)const
{
   ossimRefPtr<ossimImageFileWriter> writer = 0;
   const char* type = kwl.find(prefix, ossimKeywordNames::TYPE_KW);
   if (type)
   {
      writer = createWriter(ossimString(type));
      if (writer.valid())
      {
         if (writer->loadState(kwl, prefix) == false)
         {
            writer = 0;
         }
      }
   }
   return writer.release();
}
Ejemplo n.º 15
0
void DataTableSupport::setFilename(const fs::path & filename)
{
  if(myFilename == filename)
    return;

  myFilename = filename;

  if(myEngine && myEngine->getState() == ::pipelib::PipelineState::RUNNING)
  {
    if(myWriter.get())
    {
      // Write the final state of the old filename
      myWriter->write();
    }

    // Start the new writer
    createWriter();
  }
}
Ejemplo n.º 16
0
void PotreeWriterNode::flush(){

	if(cache.size() > 0){
		 // move data file aside to temporary directory for reading
		string filepath = path + "/data/" + name + potreeWriter->getExtension();
		string temppath = path +"/temp/prepend" + potreeWriter->getExtension();
		if(fs::exists(filepath)){
			fs::rename(fs::path(filepath), fs::path(temppath));
		}
		

		PointWriter *writer = createWriter(path + "/data/" + name + potreeWriter->getExtension(), scale);
		if(fs::exists(temppath)){
			PointReader *reader = createReader(temppath);
			while(reader->readNextPoint()){
				writer->write(reader->getPoint());
			}
			reader->close();
			delete reader;
			fs::remove(temppath);
		}

		for(int i = 0; i < cache.size(); i++){
			writer->write(cache[i]);
		}
		writer->close();
		delete writer;

		cache = vector<Point>();
	}else if(cache.size() == 0 && grid->numAccepted > 0 && addCalledSinceLastFlush == false){
		delete grid;
		grid = new SparseGrid(aabb, spacing);
	}
	
	addCalledSinceLastFlush = false;

	for(int i = 0; i < 8; i++){
		if(children[i] != NULL){
			children[i]->flush();
		}
	}
}
Ejemplo n.º 17
0
bool SaveLoad_v4::GameHandler::save(int16 dataVar, int32 size, int32 offset) {
	uint32 varSize = SaveHandler::getVarSize(_vm);

	if (varSize == 0)
		return false;

	if (size == 0) {
		// Indicator to load all variables
		dataVar = 0;
		size = varSize;
	}

	if (offset < 500) {
		// Global properties

		debugC(3, kDebugSaveLoad, "Saving global properties");

		if ((size + offset) > 500) {
			warning("Wrong global properties list size (%d, %d)", size, offset);
			return false;
		}

		_vm->_inter->_variables->copyTo(dataVar, _props + offset, size);

	} else if (offset == 500) {
		// Save index

		if (size != 1200) {
			warning("Requested index has wrong size (%d)", size);
			return false;
		}

		// Just copy the index into our buffer
		_vm->_inter->_variables->copyTo(dataVar, _index, 1200);
		_hasIndex = true;

	} else {
		// Save slot, whole variable block

		uint32 slot = _slotFile->getSlot(offset);
		int slotRem = _slotFile->getSlotRemainder(offset);

		debugC(2, kDebugSaveLoad, "Saving to slot %d", slot);

		if ((slot >= kSlotCount) || (slotRem != 0) ||
		    (dataVar != 0) || (((uint32) size) != varSize)) {

			warning("Invalid saving procedure (%d, %d, %d, %d, %d)",
					dataVar, size, offset, slot, slotRem);
			return false;
		}

		// An index is needed for the save slot description
		if (!_hasIndex) {
			warning("No index written yet");
			return false;
		}

		_hasIndex = false;

		if (!createWriter(slot))
			return false;

		SavePartInfo info(kSlotNameLength, (uint32) _vm->getGameType(), 0,
				_vm->getEndianness(), varSize);
		SavePartVars vars(_vm, varSize);

		// Write the description
		info.setDesc(_index + (slot * kSlotNameLength), kSlotNameLength);
		// Write all variables
		if (!vars.readFrom(0, 0, varSize))
			return false;

		if (!_writer->writePart(0, &info))
			return false;
		if (!_writer->writePart(1, &vars))
			return false;

		_lastSlot = slot;
	}

	return true;
}
Ejemplo n.º 18
0
ossimObject* ossimImageWriterFactory::createObject(const ossimString& typeName)const
{
   return createWriter(typeName);
}
Ejemplo n.º 19
0
ossimObject* ossimImageWriterFactory::createObject(const ossimKeywordlist& kwl,
                                                   const char *prefix)const
{
   return createWriter(kwl, prefix);
}
Ejemplo n.º 20
0
std::string parseDupHits(const StringVector& hitsFilenames, const std::string& out_prefix)
{
    // Load the suffix array index and the reverse suffix array index
    // Note these are not the full suffix arrays
    SuffixArray* pFwdSAI = new SuffixArray(opt::prefix + SAI_EXT);
    SuffixArray* pRevSAI = new SuffixArray(opt::prefix + RSAI_EXT);

    // Load the read table to look up the lengths of the reads and their ids.
    // When rmduping a set of reads, the ReadInfoTable can actually be larger than the
    // BWT if the names of the reads are very long. Previously, when two reads
    // are duplicated, the read with the lexographically lower read name was chosen
    // to be kept. To save memory here, we break ties using the index in the ReadInfoTable
    // instead. This allows us to avoid loading the read names.
    ReadInfoTable* pRIT = new ReadInfoTable(opt::readsFile, pFwdSAI->getNumStrings(), RIO_NUMERICID);

    std::string outFile = out_prefix + ".fa";
    std::string dupFile = out_prefix + ".dups.fa";
    std::ostream* pWriter = createWriter(outFile);
    std::ostream* pDupWriter = createWriter(dupFile);

    size_t substringRemoved = 0;
    size_t identicalRemoved = 0;
    size_t kept = 0;
    size_t buffer_size = SequenceProcessFramework::BUFFER_SIZE;

    // The reads must be output in their original ordering.
    // The hits are in the blocks of buffer_size items. We read
    // buffer_size items from the first hits file, then buffer_size
    // from the second and so on until all the hits have been processed.
    size_t num_files = hitsFilenames.size();
    std::vector<std::istream*> reader_vec(num_files, 0);

    for(size_t i = 0; i < num_files; ++i)
    {
        std::cout << "Opening " << hitsFilenames[i] << "\n";
        reader_vec[i] = createReader(hitsFilenames[i]);
    }

    bool done = false;
    size_t currReaderIdx = 0;
    size_t numRead = 0;
    size_t numReadersDone = 0;
    std::string line;

    while(!done)
    {
        // Parse a line from the current file
        bool valid = getline(*reader_vec[currReaderIdx], line);
        ++numRead;
        // Deal with switching the active reader and the end of files
        if(!valid || numRead == buffer_size)
        {
            // Switch the reader
            currReaderIdx = (currReaderIdx + 1) % num_files;
            numRead = 0;

            // Break once all the readers are invalid
            if(!valid)
            {
                ++numReadersDone;
                if(numReadersDone == num_files)
                {
                    done = true;
                    break;
                }
            }
        }

        // Parse the data
        if(valid)
        {
            std::string id;
            std::string sequence;
            std::string hitsStr;
            size_t readIdx;
            size_t numCopies;
            bool isSubstring;

            std::stringstream parser(line);
            parser >> id;
            parser >> sequence;
            getline(parser, hitsStr);

            OverlapVector ov;
            OverlapCommon::parseHitsString(hitsStr, pRIT, pRIT, pFwdSAI, pRevSAI, true, readIdx, numCopies, ov, isSubstring);
            
            bool isContained = false;
            if(isSubstring)
            {
                ++substringRemoved;
                isContained = true;
            }
            else
            {
                for(OverlapVector::iterator iter = ov.begin(); iter != ov.end(); ++iter)
                {
                    if(iter->isContainment() && iter->getContainedIdx() == 0)
                    {
                        // This read is contained by some other read
                        ++identicalRemoved;
                        isContained = true;
                        break;
                    }
                }
            }

            SeqItem item = {id, sequence};
            std::stringstream meta;
            meta << id << " NumDuplicates=" << numCopies;

            if(isContained)
            {
                // The read's index in the sequence data base
                // is needed when removing it from the FM-index.
                // In the output fasta, we set the reads ID to be the index
                // and record its old id in the fasta header.
                std::stringstream newID;
                newID << item.id << ",seqrank=" << readIdx;
                item.id = newID.str();

                // Write some metadata with the fasta record
                item.write(*pDupWriter, meta.str());
            }
            else
            {
                ++kept;
                // Write the read
                item.write(*pWriter, meta.str());
            }
        }
    }

    for(size_t i = 0; i < num_files; ++i)
    {
        delete reader_vec[i];
        unlink(hitsFilenames[i].c_str());
    }

    
    printf("[%s] Removed %zu substring reads\n", PROGRAM_IDENT, substringRemoved);
    printf("[%s] Removed %zu identical reads\n", PROGRAM_IDENT, identicalRemoved);
    printf("[%s] Kept %zu reads\n", PROGRAM_IDENT, kept);

    // Delete allocated data
    delete pFwdSAI;
    delete pRevSAI;
    delete pRIT;
    delete pWriter;
    delete pDupWriter;

    return dupFile;
}
Ejemplo n.º 21
0
//
// Main
//
int filterMain(int argc, char** argv)
{
    parseFilterOptions(argc, argv);
    Timer* pTimer = new Timer(PROGRAM_IDENT);


    BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
    BWT* pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate);
    //pBWT->printInfo();

    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = createWriter(opt::discardFile);
    QCPostProcess* pPostProcessor = new QCPostProcess(pWriter, pDiscardWriter);

    // If performing duplicate check, create a bitvector to record
    // which reads are duplicates
    BitVector* pSharedBV = NULL;
    if(opt::dupCheck)
        pSharedBV = new BitVector(pBWT->getNumStrings());

    // Set up QC parameters
    QCParameters params;
    params.pBWT = pBWT;
    params.pRevBWT = pRBWT;
    params.pSharedBV = pSharedBV;

    params.checkDuplicates = opt::dupCheck;
    params.substringOnly = opt::substringOnly;
    params.checkKmer = opt::kmerCheck;
    params.checkHPRuns = opt::hpCheck;
    params.checkDegenerate = opt::lowComplexityCheck;

    params.verbose = opt::verbose;

    params.kmerLength = opt::kmerLength;
    params.kmerThreshold = opt::kmerThreshold;

    params.hpKmerLength = 51;
    params.hpHardAcceptCount = 10;
    params.hpMinProportion = 0.1f;
    params.hpMinLength = 6;

    if(opt::numThreads <= 1)
    {
        // Serial mode
        QCProcess processor(params);
        PROCESS_FILTER_SERIAL(opt::readsFile, &processor, pPostProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<QCProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            QCProcess* pProcessor = new QCProcess(params);
            processorVector.push_back(pProcessor);
        }

        PROCESS_FILTER_PARALLEL(opt::readsFile, processorVector, pPostProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
            delete processorVector[i];
    }

    delete pPostProcessor;
    delete pWriter;
    delete pDiscardWriter;

    delete pBWT;
    delete pRBWT;

    if(pSharedBV != NULL)
        delete pSharedBV;

    std::cout << "RE-building index for " << opt::outFile << " in memory using ropebwt2\n";
    std::string prefix=stripFilename(opt::outFile);
        //BWT *pBWT, *pRBWT;
		#pragma omp parallel
		{
			#pragma omp single nowait
			{	
			    std::string bwt_filename = prefix + BWT_EXT;
				BWTCA::runRopebwt2(opt::outFile, bwt_filename, opt::numThreads, false);
				std::cout << "\t done bwt construction, generating .sai file\n";
				pBWT = new BWT(bwt_filename);
			}
			#pragma omp single nowait
			{	
				std::string rbwt_filename = prefix + RBWT_EXT;
				BWTCA::runRopebwt2(opt::outFile, rbwt_filename, opt::numThreads, true);
				std::cout << "\t done rbwt construction, generating .rsai file\n";
				pRBWT = new BWT(rbwt_filename);
			}
		}
        std::string sai_filename = prefix + SAI_EXT;
		SampledSuffixArray ssa;
        ssa.buildLexicoIndex(pBWT, opt::numThreads);
        ssa.writeLexicoIndex(sai_filename);
        delete pBWT;

        std::string rsai_filename = prefix + RSAI_EXT;
        SampledSuffixArray rssa;
        rssa.buildLexicoIndex(pRBWT, opt::numThreads);
        rssa.writeLexicoIndex(rsai_filename);
        delete pRBWT;

    // Cleanup
    delete pTimer;

    return 0;
}
Ejemplo n.º 22
0
//
// Main
//
int FMindexWalkMain(int argc, char** argv)
{
    parseFMWalkOptions(argc, argv);

    // Set the error correction parameters
    FMIndexWalkParameters ecParams;
	BWT *pBWT, *pRBWT;
	SampledSuffixArray* pSSA;

    // Load indices
	#pragma omp parallel
	{
		#pragma omp single nowait
		{	//Initialization of large BWT takes some time, pass the disk to next job
			std::cout << std::endl << "Loading BWT: " << opt::prefix + BWT_EXT << "\n";
			pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
		}
		#pragma omp single nowait
		{
			std::cout << "Loading RBWT: " << opt::prefix + RBWT_EXT << "\n";
			pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate);
		}
		#pragma omp single nowait
		{
			std::cout << "Loading Sampled Suffix Array: " << opt::prefix + SAI_EXT << "\n";
			pSSA = new SampledSuffixArray(opt::prefix + SAI_EXT, SSA_FT_SAI);
		}
	}

    BWTIndexSet indexSet;
    indexSet.pBWT = pBWT;
    indexSet.pRBWT = pRBWT;
    indexSet.pSSA = pSSA;
    ecParams.indices = indexSet;

	// Sample 100000 kmer counts into KmerDistribution from reverse BWT 
	// Don't sample from forward BWT as Illumina reads are bad at the 3' end
	ecParams.kd = BWTAlgorithms::sampleKmerCounts(opt::minOverlap, 100000, pRBWT);
	ecParams.kd.computeKDAttributes();
	// const size_t RepeatKmerFreq = ecParams.kd.getCutoffForProportion(0.95); 
	std::cout << "Median kmer frequency: " <<ecParams.kd.getMedian() << "\t Std: " <<  ecParams.kd.getSdv() 
					<<"\t 95% kmer frequency: " << ecParams.kd.getCutoffForProportion(0.95)
					<< "\t Repeat frequency cutoff: " << ecParams.kd.getRepeatKmerCutoff() << "\n";
	
    // Open outfiles and start a timer
    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = (!opt::discardFile.empty() ? createWriter(opt::discardFile) : NULL);
    Timer* pTimer = new Timer(PROGRAM_IDENT);

    ecParams.algorithm = opt::algorithm;
    ecParams.kmerLength = opt::kmerLength;
    ecParams.printOverlaps = opt::verbose > 0;
	ecParams.maxLeaves = opt::maxLeaves;
	ecParams.maxInsertSize = opt::maxInsertSize;
    ecParams.minOverlap = opt::minOverlap;
    ecParams.maxOverlap = opt::maxOverlap;
	
    // Setup post-processor
    FMIndexWalkPostProcess postProcessor(pWriter, pDiscardWriter, ecParams);

    std::cout << "Merge paired end reads into long reads for " << opt::readsFile << " using \n" 
				<< "min overlap=" <<  ecParams.minOverlap << "\t"
				<< "max overlap=" <<  ecParams.maxOverlap << "\t"
				<< "max leaves=" << opt::maxLeaves << "\t"
				<< "max Insert size=" << opt::maxInsertSize << "\t"
				<< "kmer size=" << opt::kmerLength << "\n\n";

    if(opt::numThreads <= 1)
    {
        // Serial mode
        FMIndexWalkProcess processor(ecParams);

		if (ecParams.algorithm == FMW_HYBRID || ecParams.algorithm == FMW_MERGE)
        SequenceProcessFramework::processSequencesSerial<SequenceWorkItemPair,
                                                         FMIndexWalkResult,
                                                         FMIndexWalkProcess,
                                                         FMIndexWalkPostProcess>(opt::readsFile, &processor, &postProcessor);

		else
        SequenceProcessFramework::processSequencesSerial<SequenceWorkItem,
                                                         FMIndexWalkResult,
                                                         FMIndexWalkProcess,
                                                         FMIndexWalkPostProcess>(opt::readsFile, &processor, &postProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<FMIndexWalkProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            FMIndexWalkProcess* pProcessor = new FMIndexWalkProcess(ecParams);
            processorVector.push_back(pProcessor);
        }

		if (ecParams.algorithm == FMW_HYBRID || ecParams.algorithm == FMW_MERGE)
        SequenceProcessFramework::processSequencesParallel<SequenceWorkItemPair,
                                                           FMIndexWalkResult,
                                                           FMIndexWalkProcess,
                                                           FMIndexWalkPostProcess>(opt::readsFile, processorVector, &postProcessor);

		else
        SequenceProcessFramework::processSequencesParallel<SequenceWorkItem,
                                                           FMIndexWalkResult,
                                                           FMIndexWalkProcess,
                                                           FMIndexWalkPostProcess>(opt::readsFile, processorVector, &postProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
        {
            delete processorVector[i];
        }
    }

    delete pBWT;
    if(pRBWT != NULL)
        delete pRBWT;

    if(pSSA != NULL)
        delete pSSA;

    delete pTimer;

    delete pWriter;
    if(pDiscardWriter != NULL)
        delete pDiscardWriter;
	
    return 0;
}
Ejemplo n.º 23
0
ScaffoldWriterVisitor::ScaffoldWriterVisitor(const std::string& filename)
{
    m_pWriter = createWriter(filename);
}
Ejemplo n.º 24
0
bool SaveLoad_v6::GameHandler::save(int16 dataVar, int32 size, int32 offset) {
	uint32 varSize = SaveHandler::getVarSize(_vm);

	if (varSize == 0)
		return false;

	if (size == 0) {
		// Indicator to save all variables
		dataVar = 0;
		size = varSize;
	}

	if (((uint32) offset) < kPropsSize) {
		// Properties

		if (((uint32) (offset + size)) > kPropsSize) {
			warning("Wrong index size (%d, %d)", size, offset);
			return false;
		}

		_vm->_inter->_variables->copyTo(dataVar, _props + offset, size);

		refreshProps();

		// If that screen doesn't save any extra temp saves, write a dummy
		if (_writer && (size == 40) && (offset == 0)) {
			if (!_hasExtra) {
				SavePartMem  mem(1);
				SavePartVars vars(_vm, varSize);

				uint8 extraSaveNumber = 0;
				if (!mem.readFrom(&extraSaveNumber, 0, 1))
					return false;
				if (!vars.readFrom(0, 0, varSize))
					return false;

				if (!_writer->writePart(2, &mem))
					return false;
				if (!_writer->writePart(3, &vars))
					return false;
			}
		}

	}  else if (((uint32) offset) < kPropsSize + kIndexSize) {
		// Save index

		if (((uint32) size) != kIndexSize) {
			warning("Wrong index size (%d, %d)", size, offset);
			return false;
		}

		// Just copy the index into our buffer
		_vm->_inter->_variables->copyTo(dataVar, _index, kIndexSize);

	} else {
		// Save slot, whole variable block

		_hasExtra = false;

		uint32 slot = _slotFile->getSlot(offset);
		int slotRem = _slotFile->getSlotRemainder(offset);

		debugC(2, kDebugSaveLoad, "Saving to slot %d", slot);

		if ((slot >= kSlotCount) || (slotRem != 0) ||
		    (dataVar != 0) || (((uint32) size) != varSize)) {

			warning("Invalid saving procedure (%d, %d, %d, %d, %d)",
					dataVar, size, offset, slot, slotRem);
			return false;
		}

		if (!createWriter(slot))
			return false;

		SavePartInfo info(kSlotNameLength, (uint32) _vm->getGameType(), 0,
				_vm->getEndianness(), varSize);
		SavePartVars vars(_vm, varSize);

		// Write the description
		info.setDesc(_index + (slot * kSlotNameLength), kSlotNameLength);
		// Write all variables
		if (!vars.readFrom(0, 0, varSize))
			return false;

		if (!_writer->writePart(0, &info))
			return false;
		if (!_writer->writePart(1, &vars))
			return false;

		if (!_spriteHandler->get(_writer, 4))
			return false;
	}

	return true;
}
Ejemplo n.º 25
0
void
Gui::renderSelectedNode()
{
    NodeGraph* graph = getLastSelectedGraph();

    if (!graph) {
        return;
    }

    NodesGuiList selectedNodes = graph->getSelectedNodes();

    if ( selectedNodes.empty() ) {
        Dialogs::warningDialog( tr("Render").toStdString(), tr("You must select a node to render first!").toStdString() );

        return;
    }
    std::list<AppInstance::RenderWork> workList;
    bool useStats = getApp()->isRenderStatsActionChecked();
    for (NodesGuiList::const_iterator it = selectedNodes.begin();
         it != selectedNodes.end(); ++it) {
        NodePtr internalNode = (*it)->getNode();
        if (!internalNode) {
            continue;
        }
        EffectInstPtr effect = internalNode->getEffectInstance();
        if (!effect) {
            continue;
        }
        if ( effect->isWriter() ) {
            if ( !effect->areKnobsFrozen() ) {
                //if ((*it)->getNode()->is)
                ///if the node is a writer, just use it to render!
                AppInstance::RenderWork w;
                w.writer = dynamic_cast<OutputEffectInstance*>( effect.get() );
                assert(w.writer);
                w.firstFrame = INT_MIN;
                w.lastFrame = INT_MAX;
                w.frameStep = INT_MIN;
                w.useRenderStats = useStats;
                workList.push_back(w);
            }
        } else {
            if (selectedNodes.size() == 1) {
                ///create a node and connect it to the node and use it to render
#ifndef NATRON_ENABLE_IO_META_NODES
                NodePtr writer = createWriter();
#else
                NodeGraph* graph = selectedNodes.front()->getDagGui();
                NodePtr writer = getApp()->createWriter( "", eCreateNodeReasonInternal, graph->getGroup() );
#endif
                if (writer) {
                    AppInstance::RenderWork w;
                    w.writer = dynamic_cast<OutputEffectInstance*>( writer->getEffectInstance().get() );
                    assert(w.writer);
                    w.firstFrame = INT_MIN;
                    w.lastFrame = INT_MAX;
                    w.frameStep = INT_MIN;
                    w.useRenderStats = useStats;
                    workList.push_back(w);
                }
            }
        }
    }
    _imp->_appInstance->startWritersRendering(false, workList);
} // Gui::renderSelectedNode
Ejemplo n.º 26
0
//
// Main
//
int filterMain(int argc, char** argv)
{
    parseFilterOptions(argc, argv);
    Timer* pTimer = new Timer(PROGRAM_IDENT);


    BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
    BWT* pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate);
    pBWT->printInfo();

    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = createWriter(opt::discardFile);
    QCPostProcess* pPostProcessor = new QCPostProcess(pWriter, pDiscardWriter);

    // If performing duplicate check, create a bitvector to record
    // which reads are duplicates
    BitVector* pSharedBV = NULL;
    if(opt::dupCheck)
        pSharedBV = new BitVector(pBWT->getNumStrings());

    // Set up QC parameters
    QCParameters params;
    params.pBWT = pBWT;
    params.pRevBWT = pRBWT;
    params.pSharedBV = pSharedBV;

    params.checkDuplicates = opt::dupCheck;
    params.substringOnly = opt::substringOnly;
    params.checkKmer = opt::kmerCheck;
    params.kmerBothStrand = opt::kmerBothStrand;
    params.checkHPRuns = opt::hpCheck;
    params.checkDegenerate = opt::lowComplexityCheck;

    params.verbose = opt::verbose;

    params.kmerLength = opt::kmerLength;
    params.kmerThreshold = opt::kmerThreshold;

    params.hpKmerLength = 51;
    params.hpHardAcceptCount = 10;
    params.hpMinProportion = 0.1f;
    params.hpMinLength = 6;

    if(opt::numThreads <= 1)
    {
        // Serial mode
        QCProcess processor(params);
        PROCESS_FILTER_SERIAL(opt::readsFile, &processor, pPostProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<QCProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            QCProcess* pProcessor = new QCProcess(params);
            processorVector.push_back(pProcessor);
        }

        PROCESS_FILTER_PARALLEL(opt::readsFile, processorVector, pPostProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
            delete processorVector[i];
    }

    delete pPostProcessor;
    delete pWriter;
    delete pDiscardWriter;

    delete pBWT;
    delete pRBWT;

    if(pSharedBV != NULL)
        delete pSharedBV;

    // Rebuild the FM-index without the discarded reads
    std::string out_prefix = stripFilename(opt::outFile);
    removeReadsFromIndices(opt::prefix, opt::discardFile, out_prefix, BWT_EXT, SAI_EXT, false, opt::numThreads);
    removeReadsFromIndices(opt::prefix, opt::discardFile, out_prefix, RBWT_EXT, RSAI_EXT, true, opt::numThreads);

    // Cleanup
    delete pTimer;
    if(opt::numThreads > 1)
        pthread_exit(NULL);

    return 0;
}
Ejemplo n.º 27
0
void cluster()
{
    BWT* pBWT = new BWT(opt::prefix + BWT_EXT);
    BWT* pRBWT = new BWT(opt::prefix + RBWT_EXT);
    OverlapAlgorithm* pOverlapper = new OverlapAlgorithm(pBWT, pRBWT,opt::errorRate, opt::seedLength, opt::seedStride, true);

    pOverlapper->setExactModeOverlap(opt::errorRate < 0.001f);
    pOverlapper->setExactModeIrreducible(opt::errorRate < 0.001f);

    BitVector markedReads(pBWT->getNumStrings());

    std::string preclustersFile = opt::outFile + ".preclusters";
    std::ostream* pPreWriter = createWriter(preclustersFile);
    ClusterPostProcess postProcessor(pPreWriter, opt::minSize, &markedReads);
    
    // Set the cluster parameters
    ClusterParameters parameters;
    parameters.pOverlapper = pOverlapper;
    parameters.minOverlap = opt::minOverlap;
    parameters.maxClusterSize = opt::maxSize;
    parameters.maxIterations = opt::maxIterations;
    parameters.pMarkedReads = &markedReads;

    // Read the limit kmer sequences, if provided
    std::set<std::string>* pLimitKmers = NULL;

    if(!opt::limitFile.empty())
    {
        // Read in the limit sequences
        pLimitKmers = new std::set<std::string>;
        readLimitKmers(pLimitKmers);
        parameters.pLimitKmers = pLimitKmers;
        parameters.limitK = opt::limitKmer;
    }
    else
    {
        parameters.pLimitKmers = NULL;
        parameters.limitK = 0;
    }

    // Make pre-clusters from the reads
    if(opt::numThreads <= 1)
    {
        printf("[%s] starting serial-mode read clustering\n", PROGRAM_IDENT);
        ClusterProcess processor(parameters);
        
        // If the extend file is empty, build new clusters
        if(opt::extendFile.empty())
        {
            PROCESS_CLUSTER_SERIAL(opt::readsFile, &processor, &postProcessor);
        }
        else
        {
            // Process a set of preexisting clusters
            ClusterReader clusterReader(opt::extendFile);
            PROCESS_EXTEND_SERIAL(clusterReader, &processor, &postProcessor);
        }
    }
    else
    {
        printf("[%s] starting parallel-mode read clustering computation with %d threads\n", PROGRAM_IDENT, opt::numThreads);
        
        std::vector<ClusterProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            ClusterProcess* pProcessor = new ClusterProcess(parameters);
            processorVector.push_back(pProcessor);
        }
        
        if(opt::extendFile.empty())
        {
            PROCESS_CLUSTER_PARALLEL(opt::readsFile, processorVector, &postProcessor);
        }
        else
        {
            ClusterReader clusterReader(opt::extendFile);
            PROCESS_EXTEND_PARALLEL(clusterReader, processorVector, &postProcessor);
        }
        
        for(size_t i = 0; i < processorVector.size(); ++i)
        {
            delete processorVector[i];
            processorVector[i] = NULL;
        }
    }
    delete pPreWriter;
    delete pBWT;
    delete pRBWT;
    delete pOverlapper;

    // Deallocate limit kmers
    if(pLimitKmers != NULL)
        delete pLimitKmers;

    // Open the preclusters file and convert them to read names
    SuffixArray* pFwdSAI = new SuffixArray(opt::prefix + SAI_EXT);
    ReadInfoTable* pRIT = new ReadInfoTable(opt::readsFile, pFwdSAI->getNumStrings());

    size_t seedIdx = 0;
    std::istream* pPreReader = createReader(preclustersFile);
    std::ostream* pClusterWriter = createWriter(opt::outFile);
    std::string line;
    while(getline(*pPreReader,line))
    {
        std::stringstream parser(line);
        std::string clusterName;
        std::string readSequence;
        size_t clusterSize;
        int64_t lowIdx;
        int64_t highIdx;
        parser >> clusterName >> clusterSize >> readSequence >> lowIdx >> highIdx;

        if(lowIdx > highIdx)
        {
            // This is an extra read that is not present in the FM-index
            // Output a record with a fake read ID
            *pClusterWriter << clusterName << "\t" << clusterSize << "\tseed-" << seedIdx++ << "\t" << readSequence << "\n";
        }
        else
        {
            for(int64_t i = lowIdx; i <= highIdx; ++i)
            {
                const ReadInfo& targetInfo = pRIT->getReadInfo(pFwdSAI->get(i).getID());
                std::string readName = targetInfo.id;
                *pClusterWriter << clusterName << "\t" << clusterSize << "\t" << readName << "\t" << readSequence << "\n";
            }
        }
    }
    unlink(preclustersFile.c_str());

    delete pFwdSAI;
    delete pRIT;
    delete pPreReader;
    delete pClusterWriter;
}
Ejemplo n.º 28
0
//
// Main
//
int overlapLongMain(int argc, char** argv)
{
    parseOverlapLongOptions(argc, argv);

    // Open output file
    std::ostream* pASQGWriter = createWriter(opt::outFile);

    // Build and write the ASQG header
    ASQG::HeaderRecord headerRecord;
    headerRecord.setOverlapTag(opt::minOverlap);
    headerRecord.setErrorRateTag(opt::errorRate);
    headerRecord.setInputFileTag(opt::readsFile);
    headerRecord.setTransitiveTag(true);
    headerRecord.write(*pASQGWriter);

    // Determine which index files to use. If a target file was provided,
    // use the index of the target reads
    std::string indexPrefix;
    if(!opt::targetFile.empty())
        indexPrefix = stripFilename(opt::targetFile);
    else
        indexPrefix = stripFilename(opt::readsFile);

    BWT* pBWT = new BWT(indexPrefix + BWT_EXT, opt::sampleRate);
    SampledSuffixArray* pSSA = new SampledSuffixArray(indexPrefix + SAI_EXT, SSA_FT_SAI);
    
    Timer* pTimer = new Timer(PROGRAM_IDENT);
    pBWT->printInfo();

    // Read the sequence file and write vertex records for each
    // Also store the read names in a vector of strings
    ReadTable reads;
    
    SeqReader* pReader = new SeqReader(opt::readsFile, SRF_NO_VALIDATION);
    SeqRecord record;
    while(pReader->get(record))
    {
        reads.addRead(record.toSeqItem());
        ASQG::VertexRecord vr(record.id, record.seq.toString());
        vr.write(*pASQGWriter);

        if(reads.getCount() % 100000 == 0)
            printf("Read %zu sequences\n", reads.getCount());
    }

    delete pReader;
    pReader = NULL;

    BWTIndexSet index;
    index.pBWT = pBWT;
    index.pSSA = pSSA;
    index.pReadTable = &reads;

    // Make a prefix for the temporary hits files
    size_t n_reads = reads.getCount();

    omp_set_num_threads(opt::numThreads);

#pragma omp parallel for
    for(size_t read_idx = 0; read_idx < n_reads; ++read_idx)
    {
        const SeqItem& curr_read = reads.getRead(read_idx);

        printf("read %s %zubp\n", curr_read.id.c_str(), curr_read.seq.length());
        SequenceOverlapPairVector sopv = 
            KmerOverlaps::retrieveMatches(curr_read.seq.toString(),
                                          opt::seedLength,
                                          opt::minOverlap,
                                          1 - opt::errorRate,
                                          100,
                                          index);

        printf("Found %zu matches\n", sopv.size());
        for(size_t i = 0; i < sopv.size(); ++i)
        {
            std::string match_id = reads.getRead(sopv[i].match_idx).id;

            // We only want to output each edge once so skip this overlap
            // if the matched read has a lexicographically lower ID
            if(curr_read.id > match_id)
                continue;

            std::string ao = ascii_overlap(sopv[i].sequence[0], sopv[i].sequence[1], sopv[i].overlap, 50);
            printf("\t%s\t[%d %d] ID=%s OL=%d PI:%.2lf C=%s\n", ao.c_str(),
                                                                sopv[i].overlap.match[0].start,
                                                                sopv[i].overlap.match[0].end,
                                                                match_id.c_str(),
                                                                sopv[i].overlap.getOverlapLength(),
                                                                sopv[i].overlap.getPercentIdentity(),
                                                                sopv[i].overlap.cigar.c_str());

            // Convert to ASQG
            SeqCoord sc1(sopv[i].overlap.match[0].start, sopv[i].overlap.match[0].end, sopv[i].overlap.length[0]);
            SeqCoord sc2(sopv[i].overlap.match[1].start, sopv[i].overlap.match[1].end, sopv[i].overlap.length[1]);
            
            // KmerOverlaps returns the coordinates of the overlap after flipping the reads
            // to ensure the strand matches. The ASQG file wants the coordinate of the original
            // sequencing strand. Flip here if necessary
            if(sopv[i].is_reversed)
                sc2.flip();

            // Convert the SequenceOverlap the ASQG's overlap format
            Overlap ovr(curr_read.id, sc1, match_id,  sc2, sopv[i].is_reversed, -1);

            ASQG::EdgeRecord er(ovr);
            er.setCigarTag(sopv[i].overlap.cigar);
            er.setPercentIdentityTag(sopv[i].overlap.getPercentIdentity());

#pragma omp critical
            {
                er.write(*pASQGWriter);
            }
        }
    }

    // Cleanup
    delete pReader;
    delete pBWT; 
    delete pSSA;
    
    delete pASQGWriter;
    delete pTimer;
    if(opt::numThreads > 1)
        pthread_exit(NULL);

    return 0;
}
Ejemplo n.º 29
0
//
// Main
//
int preprocessMain(int argc, char** argv)
{
    Timer* pTimer = new Timer("sga preprocess");
    parsePreprocessOptions(argc, argv);

    std::cerr << "Parameters:\n";
    std::cerr << "QualTrim: " << opt::qualityTrim << "\n";

    if(opt::qualityFilter >= 0)
        std::cerr << "QualFilter: at most " << opt::qualityFilter << " low quality bases\n";
    else
        std::cerr << "QualFilter: no filtering\n";

    std::cerr << "HardClip: " << opt::hardClip << "\n";
    std::cerr << "Min length: " << opt::minLength << "\n";
    std::cerr << "Sample freq: " << opt::sampleFreq << "\n";
    std::cerr << "PE Mode: " << opt::peMode << "\n";
    std::cerr << "Quality scaling: " << opt::qualityScale << "\n";
    std::cerr << "MinGC: " << opt::minGC << "\n";
    std::cerr << "MaxGC: " << opt::maxGC << "\n";
    std::cerr << "Outfile: " << (opt::outFile.empty() ? "stdout" : opt::outFile) << "\n";
    std::cerr << "Orphan file: " << (opt::orphanFile.empty() ? "none" : opt::orphanFile) << "\n";
    if(opt::bDiscardAmbiguous)
        std::cerr << "Discarding sequences with ambiguous bases\n";
    if(opt::bDustFilter)
        std::cerr << "Dust threshold: " << opt::dustThreshold << "\n";
    if(!opt::suffix.empty())
        std::cerr << "Suffix: " << opt::suffix << "\n";

    if(opt::adapterF.length() && opt::adapterR.length())
    {
        std::cerr << "Adapter sequence fwd: " << opt::adapterF << "\n";
        std::cerr << "Adapter sequence rev: " << opt::adapterR << "\n";
    }

    // Seed the RNG
    srand(time(NULL));

    std::ostream* pWriter;
    if(opt::outFile.empty())
    {
        pWriter = &std::cout;
    }
    else
    {
        std::ostream* pFile = createWriter(opt::outFile);
        pWriter = pFile;
    }

    // Create a filehandle to write orphaned reads to, if necessary
    std::ostream* pOrphanWriter = NULL;
    if(!opt::orphanFile.empty())
        pOrphanWriter = createWriter(opt::orphanFile);

    if(opt::peMode == 0)
    {
        // Treat files as SE data
        while(optind < argc)
        {
            std::string filename = argv[optind++];
            std::cerr << "Processing " << filename << "\n\n";
            SeqReader reader(filename, SRF_NO_VALIDATION);
            SeqRecord record;

            while(reader.get(record))
            {
                bool passed = processRead(record);
                if(passed && samplePass())
                {
                    if(!opt::suffix.empty())
                        record.id.append(opt::suffix);

                    record.write(*pWriter);
                    ++s_numReadsKept;
                    s_numBasesKept += record.seq.length();
                }
            }
        }
    }
    else
    {
        assert(opt::peMode == 1 || opt::peMode == 2);
        int numFiles = argc - optind;
        if(opt::peMode == 1 && numFiles % 2 == 1)
        {
            std::cerr << "Error: An even number of files must be given for pe-mode 1\n";
            exit(EXIT_FAILURE);
        }

        while(optind < argc)
        {
            SeqReader* pReader1;
            SeqReader* pReader2;

            if(opt::peMode == 1)
            {
                // Read from separate files
                std::string filename1 = argv[optind++];
                std::string filename2 = argv[optind++];

                pReader1 = new SeqReader(filename1, SRF_NO_VALIDATION);
                pReader2 = new SeqReader(filename2, SRF_NO_VALIDATION);

                std::cerr << "Processing pe files " << filename1 << ", " << filename2 << "\n";

            }
            else
            {
                // Read from a single file
                std::string filename = argv[optind++];
                pReader1 = new SeqReader(filename, SRF_NO_VALIDATION);
                pReader2 = pReader1;
                std::cerr << "Processing interleaved pe file " << filename << "\n";
            }

            SeqRecord record1;
            SeqRecord record2;
            while(pReader1->get(record1) && pReader2->get(record2))
            {
                // If the names of the records are the same, append a /1 and /2 to them
                if(record1.id == record2.id)
                {
                    if(!opt::suffix.empty())
                    {
                        record1.id.append(opt::suffix);
                        record2.id.append(opt::suffix);
                    }

                    record1.id.append("/1");
                    record2.id.append("/2");
                }

                // Ensure the read names are sensible
                std::string expectedID2 = getPairID(record1.id);
                std::string expectedID1 = getPairID(record2.id);

                if(expectedID1 != record1.id || expectedID2 != record2.id)
                {
                    std::cerr << "Warning: Pair IDs do not match (expected format /1,/2 or /A,/B)\n";
                    std::cerr << "Read1 ID: " << record1.id << "\n";
                    std::cerr << "Read2 ID: " << record2.id << "\n";
                    s_numInvalidPE += 2;
                }

                bool passed1 = processRead(record1);
                bool passed2 = processRead(record2);

                if(!samplePass())
                    continue;

                if(passed1 && passed2)
                {
                    record1.write(*pWriter);
                    record2.write(*pWriter);
                    s_numReadsKept += 2;
                    s_numBasesKept += record1.seq.length();
                    s_numBasesKept += record2.seq.length();
                }
                else if(passed1 && pOrphanWriter != NULL)
                {
                    record1.write(*pOrphanWriter);
                }
                else if(passed2 && pOrphanWriter != NULL)
                {
                    record2.write(*pOrphanWriter);
                }
            }

            if(pReader2 != pReader1)
            {
                // only delete reader2 if it is a distinct pointer
                delete pReader2;
                pReader2 = NULL;
            }
            delete pReader1;
            pReader1 = NULL;

        }

    }

    if(pWriter != &std::cout)
        delete pWriter;
    if(pOrphanWriter != NULL)
        delete pOrphanWriter;

    std::cerr << "\nPreprocess stats:\n";
    std::cerr << "Reads parsed:\t" << s_numReadsRead << "\n";
    std::cerr << "Reads kept:\t" << s_numReadsKept << " (" << (double)s_numReadsKept / (double)s_numReadsRead << ")\n";
    std::cerr << "Reads failed primer screen:\t" << s_numReadsPrimer << " (" << (double)s_numReadsPrimer / (double)s_numReadsRead << ")\n";
    std::cerr << "Bases parsed:\t" << s_numBasesRead << "\n";
    std::cerr << "Bases kept:\t" << s_numBasesKept << " (" << (double)s_numBasesKept / (double)s_numBasesRead << ")\n";
    std::cerr << "Number of incorrectly paired reads that were discarded: " << s_numInvalidPE << "\n";
    if(opt::bDustFilter)
        std::cerr << "Number of reads failed dust filter: " << s_numFailedDust << "\n";
    delete pTimer;
    return 0;
}
Ejemplo n.º 30
0
//
// SGPairedPathResolveVisitor
//
SGPairedPathResolveVisitor::SGPairedPathResolveVisitor()
{
    m_pWriter = createWriter("fragments.fa");
}