C++ (Cpp) ReadSet示例，ReadSet C++ (Cpp)示例

示例#1

0

显示文件

文件： SQLiteObjectStorage.cpp 项目： MikeSofaer/sirikata

template <typename StorageSet,typename ReadSet> void mergeKeysFromStorageSet(StorageSet &ss, const ReadSet&other) {
    int len = other.reads_size();    
    int sslen = ss.reads_size();    
    int i;
    for (i=0;i<sslen;++i) {
        mergeStorageKey(ss.mutable_reads(i),other.reads(i));
    }
    for (;i<len;++i) {
        ss.add_reads();
        mergeStorageKey(ss.mutable_reads(i),other.reads(i));
    }
}

示例#2

0

显示文件

文件： CassandraStorage.cpp 项目： pathorn/sirikata

void CassandraStorage::executeRangeRead(const Bucket& bucket, SliceRange& range, CommitCallback cb, const String& timestamp) {
	ReadSet* rs = new ReadSet;
    bool success = true;
    try {
    	*rs = mDB->db()->getColumnsValues(bucket.rawHexData(),CF_NAME, timestamp, range);
    }
    catch(...){
       	success = false;
    }
    if (rs->size()==0)
    	success = false;

    mContext->mainStrand->post(std::tr1::bind(&CassandraStorage::completeRange, this, cb, success, rs));
}

示例#3

0

显示文件

文件： StorageTestBase.hpp 项目： SinSiXX/sirikata

    void checkReadValuesImpl(Result expected_result, ReadSet expected, Result result, ReadSet* rs) {
        TS_ASSERT_EQUALS(expected_result, result);
        if ((result != OH::Storage::SUCCESS) || (expected_result != OH::Storage::SUCCESS)) return;

        if (!rs) {
            TS_ASSERT_EQUALS(expected.size(), 0);
            return;
        }

        TS_ASSERT_EQUALS(expected.size(), rs->size());
        for(ReadSet::iterator it = expected.begin(); it != expected.end(); it++) {
            String key = it->first; String value = it->second;
            TS_ASSERT(rs->find(key) != rs->end());
            TS_ASSERT_EQUALS((*rs)[key], value);
        }
    }

示例#4

0

显示文件

文件： DistributedNucleatingAssembler.cpp 项目： JGI-Bioinformatics/Kmernator

std::string extendContigsWithCap3(const ReadSet & contigs,
		ReadSet::ReadSetVector &contigReadSet, ReadSet & changedContigs,
		ReadSet & finalContigs, ReadSet::ReadSetSizeType minimumCoverage) {
	std::stringstream extendLog;

	int poolsWithoutMinimumCoverage = 0;

	// initialize per-thread Cap3 instances
	Cap3 cap3[omp_get_max_threads()];

	#pragma omp parallel for
	for (long i = 0; i < (long) contigs.getSize(); i++) {
		const Read &oldRead = contigs.getRead(i);
		Read newRead = oldRead;
		SequenceLengthType oldLen = oldRead.getLength(), newLen = 0;

		ReadSet::ReadSetSizeType poolSize = contigReadSet[i].getSize();

		double extTime = MPI_Wtime();
		if (poolSize > minimumCoverage) {
			LOG_VERBOSE_OPTIONAL(2, true, "Extending " << oldRead.getName() << " with " << poolSize << " pool of reads");
			newRead = cap3[omp_get_thread_num()].extendContig(oldRead, contigReadSet[i]);
			newLen = newRead.getLength();
		} else {
			poolsWithoutMinimumCoverage++;
		}
		extTime = MPI_Wtime() - extTime;
		long deltaLen = (long)newLen - (long)oldLen;
		if (deltaLen > 0) {
			extendLog << std::endl << "Cap3 Extended " << oldRead.getName() << " "
					<< deltaLen << " bases to " << newRead.getLength() << ": "
					<< newRead.getName() << " with " << poolSize
					<< " reads in the pool, in " << extTime << " sec";
			//#pragma omp critical
			changedContigs.append(newRead);
		} else {
			extendLog << std::endl << "Did not extend " << oldRead.getName() << " with " << poolSize << " reads in the pool, in " << extTime << " sec";
			//#pragma omp critical
			finalContigs.append(oldRead);
		}
	}

	LOG_VERBOSE_OPTIONAL(2, true, "Extended " << contigs.getSize() - poolsWithoutMinimumCoverage << " contigs out of " << contigs.getSize());

	return extendLog.str();
}

示例#5

0

显示文件

static void removeWrittenTo(AliasAnalysis *AA, ReadSet &Reads,
                            SILInstruction *ByInst) {

  // We can ignore retains, cond_fails, and dealloc_stacks.
  if (isa<StrongRetainInst>(ByInst) || isa<RetainValueInst>(ByInst) ||
      isa<CondFailInst>(ByInst) || isa<DeallocStackInst>(ByInst))
    return;

  SmallVector<SILInstruction *, 8> RS(Reads.begin(), Reads.end());
  for (auto R : RS) {
    auto *LI = dyn_cast<LoadInst>(R);
    if (LI && !AA->mayWriteToMemory(ByInst, LI->getOperand()))
      continue;

    DEBUG(llvm::dbgs() << "  mayWriteTo\n" << *ByInst << " to " << *R << "\n");
    Reads.erase(R);
  }
}

示例#6

0

显示文件

文件： Depot.cpp 项目： mariokostelac/ra

void Depot::load_overlaps(OverlapSet& dst, uint32_t begin, uint32_t length,
    const ReadSet& reads) {

    ASSERT(reads.size() != 0, "Depot", "Empty read set!");

    load(dst, begin, length, overlap_data_, overlap_index_);

    for (auto& it: dst) {

        auto id = (uint64_t) it->read_a_;
        ASSERT(id < reads.size(), "Depot", "Missing read %lu!", id);
        it->read_a_ = reads[id];

        id = (uint64_t) it->read_b_;
        ASSERT(id < reads.size(), "Depot", "Missing read %lu!", id);
        it->read_b_ = reads[id];
    }
}

示例#7

0

显示文件

void LoopTreeOptimization::analyzeCurrentLoop(
    std::unique_ptr<LoopNestSummary> &CurrSummary, ReadSet &SafeReads) {
  WriteSet &MayWrites = CurrSummary->MayWrites;
  SILLoop *Loop = CurrSummary->Loop;
  DEBUG(llvm::dbgs() << " Analyzing accesses.\n");

  // Contains function calls in the loop, which only read from memory.
  SmallVector<ApplyInst *, 8> ReadOnlyApplies;

  for (auto *BB : Loop->getBlocks()) {
    for (auto &Inst : *BB) {
      // Ignore fix_lifetime instructions.
      if (isa<FixLifetimeInst>(&Inst))
        continue;

      // Collect loads.
      auto LI = dyn_cast<LoadInst>(&Inst);
      if (LI) {
        if (!mayWriteTo(AA, MayWrites, LI))
          SafeReads.insert(LI);
        continue;
      }
      if (auto *AI = dyn_cast<ApplyInst>(&Inst)) {
        // In contrast to load instructions, we first collect all read-only
        // function calls and add them later to SafeReads.
        SideEffectAnalysis::FunctionEffects E;
        SEA->getEffects(E, AI);

        auto MB = E.getMemBehavior(RetainObserveKind::ObserveRetains);
        if (MB <= SILInstruction::MemoryBehavior::MayRead)
          ReadOnlyApplies.push_back(AI);
      }
      if (Inst.mayHaveSideEffects()) {
        MayWrites.push_back(&Inst);
        // Remove clobbered loads we have seen before.
        removeWrittenTo(AA, SafeReads, &Inst);
      }
    }
  }
  for (auto *AI : ReadOnlyApplies) {
    if (!mayWriteTo(AA, SEA, MayWrites, AI))
      SafeReads.insert(AI);
  }
}

示例#8

0

显示文件

文件： CassandraStorage.cpp 项目： pathorn/sirikata

// Executes a commit. Runs in a separate thread, so the transaction is
// passed in directly
void CassandraStorage::executeCommit(const Bucket& bucket, Transaction* trans, CommitCallback cb, const String& timestamp) {
    ReadSet* rs = new ReadSet;
    Columns* columns = new Columns;
    Keys* eraseKeys = new Keys;
    Keys* readKeys = new Keys;

    for (Transaction::iterator it = trans->begin(); it != trans->end(); it++) {
        (*it).execute(bucket, columns, eraseKeys, readKeys, timestamp);
    }

    bool success = true;
    success = CassandraCommit(mDB, bucket, columns, eraseKeys, readKeys, rs, timestamp);

    if (rs->empty() || !success) {
        delete rs;
        rs = NULL;
    }

    mContext->mainStrand->post(std::tr1::bind(&CassandraStorage::completeCommit, this, trans, cb, success, rs));
}

示例#9

0

显示文件

文件： DistributedNucleatingAssembler.cpp 项目： JGI-Bioinformatics/Kmernator

std::string extendContigsWithContigExtender(ReadSet & contigs,
		ReadSet::ReadSetVector &contigReadSet, ReadSet & changedContigs,
		ReadSet & finalContigs, SequenceLengthType minKmerSize,
		double minimumCoverage, SequenceLengthType maxKmerSize,
		SequenceLengthType maxExtend, SequenceLengthType kmerStep) {

	std::stringstream extendLog;
	//#pragma omp parallel for
	for (ReadSet::ReadSetSizeType i = 0; i < contigs.getSize(); i++) {
		const Read &oldRead = contigs.getRead(i);
		Read newRead;
		SequenceLengthType oldLen = oldRead.getLength(), newLen = 0;
		ReadSet::ReadSetSizeType poolSize = contigReadSet[i].getSize();
		SequenceLengthType myKmerSize = minKmerSize;
		if (poolSize > minimumCoverage) {
			LOG_VERBOSE_OPTIONAL(2, true, "kmer-Extending " << oldRead.getName() << " with " << poolSize << " pool of reads");
			ReadSet myContig;
			myContig.append(oldRead);
			ReadSet newContig;

			while (newLen <= oldLen && myKmerSize <= maxKmerSize) {
				newContig = ContigExtender<KS>::extendContigs(myContig,
						contigReadSet[i], maxExtend, myKmerSize, myKmerSize);
				newLen = newContig.getRead(0).getLength();
				myKmerSize += kmerStep;
			}
			newRead = newContig.getRead(0);
		} else {
			newRead = oldRead;
		}
		long deltaLen = (long) newLen - (long) oldLen;
		if (deltaLen > 0) {
			extendLog << std::endl << "Kmer Extended " << oldRead.getName() << " "
					<< deltaLen << " bases to " << newRead.getLength() << ": "
					<< newRead.getName() << " with " << poolSize
					<< " reads in the pool K " << (myKmerSize - kmerStep);
			//#pragma omp critical
			changedContigs.append(newRead);
		} else {
			extendLog << std::endl << "Did not extend " << oldRead.getName() << " with " << poolSize << " reads in the pool";
			//#pragma omp critical
			finalContigs.append(oldRead);
		}
	}
	return extendLog.str();
}

示例#10

0

显示文件

文件： DistributedNucleatingAssembler.cpp 项目： JGI-Bioinformatics/Kmernator

std::string runPartialBatch(mpi::communicator world, boost::shared_ptr< MatcherInterface > &matcher, ReadSet &_contigs, std::string _contigFile, ReadSet & changedContigs,
		ReadSet & finalContigs, int batchIdx, int maxContigsPerBatch, SequenceLengthType minKmerSize,
		double minimumCoverage, SequenceLengthType maxKmerSize,
		SequenceLengthType maxExtend, SequenceLengthType kmerStep) {

	LOG_DEBUG(1, "Starting runPartialBatch(" << batchIdx << " of " << _contigs.getSize() << "): " << MemoryUtils::getMemoryUsage());

	ReadSet contigs; // new global contigs file a subset of original
	std::string extendLog;
	for(int i = batchIdx; i < (int) _contigs.getSize() && i < batchIdx + maxContigsPerBatch; i++)
		contigs.append(_contigs.getRead(i));

	setGlobalReadSetConstants(world, contigs);
        if (contigs.getGlobalSize() == 0)
		return extendLog;

	std::string contigFile = DistributedOfstreamMap::writeGlobalReadSet(world, contigs, UniqueName::generateUniqueGlobalName(".tmp-batch" + UniqueName::getOurUniqueHandle() + "-", batchIdx), ".fasta", FormatOutput::Fasta());

	MatcherInterface::MatchReadResults contigReadSet = matcher->match(contigs, contigFile);
	assert(contigs.getSize() == contigReadSet.size());

	LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, " batch " << contigs.getSize() << ". Matches made");

	int numThreads = omp_get_max_threads();
	std::string extendLogs[numThreads];
	if (!Cap3Options::getOptions().getCap3Path().empty()) {
		Cap3 cap3Instances[numThreads];
		#pragma omp parallel for
		for(int i = 0; i < numThreads; i++) {
			extendLogs[i] = cap3Instances[i].extendContigs(contigs, contigReadSet, changedContigs, finalContigs, minimumCoverage, i, numThreads);
		}
	} else if (!NewblerOptions::getOptions().getNewblerPath().empty()) {
		Newbler newblerInstances[numThreads];
		#pragma omp parallel for
		for(int i = 0; i < numThreads; i++) {
				extendLogs[i] = newblerInstances[i].extendContigs(contigs, contigReadSet, changedContigs, finalContigs, minimumCoverage, i, numThreads);
		}
	} else {
		extendLog = extendContigsWithContigExtender(contigs, contigReadSet,
				changedContigs, finalContigs,
				minKmerSize, minimumCoverage, maxKmerSize, maxExtend, kmerStep);
	}
	for(int i = 0; i < numThreads; i++)
		extendLog += extendLogs[i];

	unlink(contigFile.c_str());

	return extendLog;
}

示例#11

0

显示文件

文件： DistributedNucleatingAssembler.cpp 项目： JGI-Bioinformatics/Kmernator

void finishLongContigs(long maxContigLength, ReadSet &changedContigs, ReadSet &finalContigs) {
	ReadSet keepContigs;
	for(long i = 0; i < (long) changedContigs.getSize(); i++) {
		const Read &read = changedContigs.getRead(i);
		if ((long) read.getLength() >= maxContigLength) {
			LOG_VERBOSE_OPTIONAL(1, true, read.getName() << " (" << read.getLength() << ") has exceeded maxContiglength, terminating extension");
			finalContigs.append(read);
		} else
			keepContigs.append(read);
	}
	changedContigs.swap(keepContigs);
}

示例#12

0

显示文件

/// Checks if \p Inst has no side effects which prevent hoisting.
/// The \a SafeReads set contain instructions which we already proved to have
/// no such side effects.
static bool hasNoSideEffect(SILInstruction *Inst, ReadSet &SafeReads) {
  // We can (and must) hoist cond_fail instructions if the operand is
  // invariant. We must hoist them so that we preserve memory safety. A
  // cond_fail that would have protected (executed before) a memory access
  // must - after hoisting - also be executed before said access.
  if (isa<CondFailInst>(Inst))
    return true;
  
  // Can't hoist if the instruction could read from memory and is not marked
  // as safe.
  if (SafeReads.count(Inst))
    return true;

  if (Inst->getMemoryBehavior() == SILInstruction::MemoryBehavior::None)
    return true;
  
  return false;
}

示例#13

0

显示文件

文件： Depot.cpp 项目： mariokostelac/ra

void Depot::store_reads(const ReadSet& src)  {

    ASSERT(src.size() != 0, "Depot", "Can not store an empty ReadSet!");
    store(src, read_data_, read_index_);
}

示例#14

0

显示文件

文件： Fastq2Fasta.cpp 项目： JGI-Bioinformatics/Kmernator

int main(int argc, char *argv[]) {

	if (!Fastq2FastaOptions::parseOpts(argc, argv)) exit(1);

	Cleanup::prepare();

	OptionsBaseInterface::FileListType &inputs = Options::getOptions().getInputFiles();
	long splitSizeBase = Fastq2FastaOptions::getOptions().getSplitSizeMegaBase() * 1000000;

	ReadSet reads;
	LOG_VERBOSE(1, "Reading Input Files" );
	reads.appendAllFiles(inputs);

	LOG_VERBOSE(1, "loaded " << reads.getSize() << " Reads, " << reads.getBaseCount()
			<< " Bases ");

	reads.identifyPairs();

	long currentBase = 0;
	OfstreamMap ofmap;
	string outputFilename = Options::getOptions().getOutputFile();
	bool hasOfMap = false;
	ostream *out = &cout;

	int partitionNum = 1;
	if (!outputFilename.empty()) {
		ofmap = OfstreamMap(outputFilename);
		hasOfMap = true;
	} else {
		splitSizeBase = 0; // do not support splitting when no output is specified
	}

	bool splitPairs = Fastq2FastaOptions::getOptions().getSplitPairs() != 0;
	string filekey;
	for(ReadSet::ReadSetSizeType pairIdx = 0 ; pairIdx < reads.getPairSize(); pairIdx++) {
		ReadSet::Pair pair = reads.getPair(pairIdx);

		ReadSet::ReadSetSizeType lesserIdx  = std::min(pair.read1, pair.read2);

		if (hasOfMap) {
			filekey = reads.getReadFileNamePrefix(lesserIdx);
		} else {
			filekey.clear();
		}

		if (splitSizeBase > 0) {
			SequenceLengthType len = reads.getRead(lesserIdx).getLength();
			currentBase += len;
			if (currentBase > splitSizeBase) {
				// new output handle
				partitionNum++;
				currentBase = len;
			}
			filekey += "-" + boost::lexical_cast<string>( partitionNum );
		}


		if (reads.isValidRead(pair.read1) && reads.isValidRead(pair.read2)) {

			const Read read = reads.getRead(pair.read1);
			if (hasOfMap) {
				if (splitPairs) {
					filekey += "-1";
				}
				out = &( ofmap.getOfstream(filekey) );
			}

			reads.getRead(pair.read1).write(*out);
			if (splitPairs) {
				filekey[filekey.length()-1] = '2';
				out = &( ofmap.getOfstream(filekey) );
			}
			reads.getRead(pair.read2).write(*out);

		} else {
			if (hasOfMap) {
				out = &( ofmap.getOfstream(filekey) );
			}
			reads.getRead(lesserIdx).write(*out);
		}

	}

}

示例#15

0

显示文件

文件： SQLiteObjectStorage.cpp 项目： MikeSofaer/sirikata

template <class ReadSet> SQLiteObjectStorage::Error SQLiteObjectStorage::applyReadSet(const SQLiteDBPtr& db, const ReadSet& rs, Protocol::Response&retval) {

    int num_reads=rs.reads_size();
    retval.clear_reads();
    while (retval.reads_size()<num_reads)
        retval.add_reads();
    SQLiteObjectStorage::Error databaseError=None;
    for (int rs_it=0;rs_it<num_reads;++rs_it) {
        String object_hex = getTableName(rs.reads(rs_it));
        String key_name = getKeyName(rs.reads(rs_it));

        String value_query = "SELECT value FROM ";
        value_query += "\"" TABLE_NAME "\"";
        value_query += " WHERE object == x\'" + object_hex + "\' AND key == ?";
        int rc;
        char* remain;
        sqlite3_stmt* value_query_stmt;
        bool newStep=true;
        bool locked=false;
        rc = sqlite3_prepare_v2(db->db(), value_query.c_str(), -1, &value_query_stmt, (const char**)&remain);
        SQLite::check_sql_error(db->db(), rc, NULL, "Error preparing value query statement");
        if (rc==SQLITE_OK) {
            rc = sqlite3_bind_text(value_query_stmt, 1, key_name.data(), (int)key_name.size(), SQLITE_TRANSIENT);
            SQLite::check_sql_error(db->db(), rc, NULL, "Error binding key name to value query statement");
            if (rc==SQLITE_OK) {
                int step_rc = sqlite3_step(value_query_stmt);
                while(step_rc == SQLITE_ROW) {
                    newStep=false;
                    retval.reads(rs_it).set_data((const char*)sqlite3_column_text(value_query_stmt, 0),sqlite3_column_bytes(value_query_stmt, 0));
                    step_rc = sqlite3_step(value_query_stmt);
                }
                if (step_rc != SQLITE_DONE) {
                    // reset the statement so it'll clean up properly
                    rc = sqlite3_reset(value_query_stmt);
                    SQLite::check_sql_error(db->db(), rc, NULL, "Error finalizing value query statement");
                    if (rc==SQLITE_LOCKED||rc==SQLITE_BUSY)
                        locked=true;
                }         
                
            }
        }
        rc = sqlite3_finalize(value_query_stmt);
        SQLite::check_sql_error(db->db(), rc, NULL, "Error finalizing value query statement");
        if (locked||rc == SQLITE_LOCKED||rc==SQLITE_BUSY) {
            retval.clear_reads();
            return DatabaseLocked;
        }

        if (newStep) {
            retval.reads(rs_it).clear_data();
            retval.reads(rs_it).set_return_status(Protocol::StorageElement::KEY_MISSING);
        }
        if(rs.reads(rs_it).has_index()) {
            retval.reads(rs_it).set_index(rs.reads(rs_it).index());
        }
    }
    if (rs.has_options()&&(rs.options()&Protocol::ReadWriteSet::RETURN_READ_NAMES)!=0) {
        // make sure read set is clear before each attempt
        mergeKeysFromStorageSet( retval, rs );
    }
    return databaseError;
}

示例#16

0

显示文件

文件： Depot.cpp 项目： mariokostelac/ra

Read* Depot::load_read(uint32_t index) {

    ReadSet temp;
    load_reads(temp, index, 1);
    return temp.front();
}

示例#17

0

显示文件

文件： DistributedNucleatingAssembler.cpp 项目： JGI-Bioinformatics/Kmernator

int main(int argc, char *argv[]) {

	ForkDaemon::initialize();

	ScopedMPIComm< DistributedNucleatingAssemblerOptions > world(argc, argv);

	Cleanup::prepare();

	try {

		double timing1, timing2;

		timing1 = MPI_Wtime();

		OptionsBaseInterface::FileListType &inputFiles =
				Options::getOptions().getInputFiles();
		std::string contigFile =
				ContigExtenderBaseOptions::getOptions().getContigFile();
		std::string finalContigFile;
		double minimumCoverage =
				ContigExtenderBaseOptions::getOptions().getMinimumCoverage();
		long maxIterations =
				DistributedNucleatingAssemblerOptions::getOptions().getMaxIterations();

		ReadSet reads;
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Reading Input Files" );
		reads.appendAllFiles(inputFiles, world.rank(), world.size());
		reads.identifyPairs();
		setGlobalReadSetConstants(world, reads);

		timing2 = MPI_Wtime();

		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "loaded " << reads.getGlobalSize() << " Reads, (local:" << reads.getSize() << " pair:" << reads.getPairSize() << ") in " << (timing2-timing1) << " seconds" );
		LOG_DEBUG_GATHER(1, MemoryUtils::getMemoryUsage());

		if (FilterKnownOdditiesOptions::getOptions().getSkipArtifactFilter() == 0) {

			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Preparing artifact filter: ");

			FilterKnownOddities filter;
			LOG_VERBOSE_OPTIONAL(2, world.rank() == 0, "Applying sequence artifact filter to Input Files");

			unsigned long filtered = filter.applyFilter(reads);

			LOG_VERBOSE_GATHER(2, "local filter affected (trimmed/removed) " << filtered << " Reads ");
			LOG_DEBUG_GATHER(1, MemoryUtils::getMemoryUsage());

			unsigned long allFiltered;
			mpi::reduce(world, filtered, allFiltered, std::plus<unsigned long>(), 0);
			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "distributed filter (trimmed/removed) " << allFiltered << " Reads.");

		}

		boost::shared_ptr< MatcherInterface > matcher;
		if (KmerBaseOptions::getOptions().getKmerSize() == 0) {
			matcher.reset( new Vmatch(world, UniqueName::generateHashName(inputFiles), reads) );
		} else {
			matcher.reset( new KmerMatch(world, reads) );
		}

		SequenceLengthType minKmerSize, maxKmerSize, kmerStep, maxExtend;
		ContigExtender<KS>::getMinMaxKmerSize(reads, minKmerSize, maxKmerSize,
				kmerStep);
		maxKmerSize = boost::mpi::all_reduce(world, maxKmerSize, mpi::minimum<
				SequenceLengthType>());
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Kmer size ranges: " << minKmerSize << "\t" << maxKmerSize << "\t" << kmerStep);
		maxExtend = maxKmerSize;

		timing1 = timing2;
		timing2 = MPI_Wtime();
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Prepared Matcher indexes in " << (timing2-timing1) << " seconds");

		ReadSet finalContigs;
		ReadSet contigs;
		contigs.appendFastaFile(contigFile, world.rank(), world.size());

		int maxContigsPerBatch = DistributedNucleatingAssemblerOptions::getOptions().getMaxContigsPerBatch();

		short iteration = 0;
		while (++iteration <= maxIterations) {
			LOG_DEBUG_GATHER(1, "Iteration " << iteration << " " << MemoryUtils::getMemoryUsage());
			int batchIdx = 0;

			matcher->resetTimes("Start Iteration", MPI_Wtime());

			setGlobalReadSetConstants(world, contigs);

			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Iteration: " << iteration << ". Contig File: " << contigFile << ". contains " << contigs.getGlobalSize() << " Reads");
			if (contigs.getGlobalSize() == 0) {
				LOG_VERBOSE_OPTIONAL(1, true, "There are no contigs to extend in " << contigFile);
				break;
			}

			std::string extendLog;
			ReadSet changedContigs;
			int lastBatch = contigs.getSize();
			MPI_Allreduce(MPI_IN_PLACE, &lastBatch, 1, MPI_INT, MPI_MAX, world);
			LOG_DEBUG_OPTIONAL(1, world.rank() == 0, "Iteration: " << iteration << " Last batch is " << lastBatch);

			while (batchIdx < lastBatch) {
				extendLog += runPartialBatch(world, matcher, contigs, contigFile, changedContigs, finalContigs, batchIdx, maxContigsPerBatch, minKmerSize, minimumCoverage, maxKmerSize, maxExtend, kmerStep);
				batchIdx += maxContigsPerBatch;
			}

			matcher->recordTime("extendContigs", MPI_Wtime());
			LOG_DEBUG_GATHER(1, (extendLog));

			finishLongContigs(DistributedNucleatingAssemblerOptions::getOptions().getMaxContigLength(), changedContigs, finalContigs);

			LOG_DEBUG_GATHER(1, "Changed contigs: " << changedContigs.getSize() << " finalContigs: " << finalContigs.getSize());
			setGlobalReadSetConstants(world, changedContigs);
			setGlobalReadSetConstants(world, finalContigs);
			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Changed contigs: " << changedContigs.getGlobalSize() << " finalContigs: " << finalContigs.getGlobalSize());

			std::string oldFinalContigFile = finalContigFile;
			std::string oldContigFile = contigFile;
			{
				// write out the state of the contig files (so far) so we do not loose them
				DistributedOfstreamMap om(world,
						Options::getOptions().getOutputFile(), "");
				om.setBuildInMemory();
				if (finalContigs.getGlobalSize() > 0) {
					std::string fileKey = "final-" + boost::lexical_cast<
							std::string>(iteration);
					finalContigs.writeAll(om.getOfstream(fileKey),
							FormatOutput::Fasta());
					finalContigFile = om.getRealFilePath(fileKey);
				}
				if (changedContigs.getGlobalSize() > 0) {
					std::string filekey = "-inputcontigs-" + boost::lexical_cast<
							std::string>(iteration) + ".fasta";
					changedContigs.writeAll(om.getOfstream(filekey),
							FormatOutput::Fasta());
					contigFile = om.getRealFilePath(filekey);
				}
				contigs = changedContigs;
			}

			if (world.rank() == 0) {
				// preserve the final contigs in case of crash
				unlink(Options::getOptions().getOutputFile().c_str());
				link(finalContigFile.c_str(), Options::getOptions().getOutputFile().c_str());
			}

			matcher->recordTime("writeFinalTime", MPI_Wtime());

			if (!Log::isDebug(1) && world.rank() == 0) {
				// remove most recent contig files (if not debugging)
				if (!oldFinalContigFile.empty()) {
					LOG_VERBOSE_OPTIONAL(1, true, "Removing " << oldFinalContigFile);
					unlink(oldFinalContigFile.c_str());
				}

				if (ContigExtenderBaseOptions::getOptions().getContigFile().compare(
						oldContigFile) != 0) {
					LOG_VERBOSE_OPTIONAL(1, true, "Removing " << oldContigFile);
					unlink(oldContigFile.c_str());
				}
			}

			if (changedContigs.getGlobalSize() == 0) {
				LOG_VERBOSE_OPTIONAL(1, world.rank() == 1, "No more contigs to extend " << changedContigs.getSize());
				break;
			}

			matcher->recordTime("finishIteration", MPI_Wtime());
			LOG_DEBUG_GATHER(1, matcher->getTimes("") + ". " + MemoryUtils::getMemoryUsage());

		}

		matcher.reset(); // release the matcher interface

		if (world.rank() == 0 && !Log::isDebug(1)) {
			if (ContigExtenderBaseOptions::getOptions().getContigFile().compare(
					contigFile) != 0) {
				LOG_DEBUG_OPTIONAL(1, true, "Removing " << contigFile);
				unlink(contigFile.c_str());
			}
		}

		// write final contigs (and any unfinished contigs still remaining)
		finalContigs.append(contigs);
		std::string tmpFinalFile = DistributedOfstreamMap::writeGlobalReadSet(world, finalContigs, Options::getOptions().getOutputFile(), ".tmp", FormatOutput::Fasta());
		if (world.rank() == 0 && !finalContigFile.empty()) {
			LOG_DEBUG_OPTIONAL(1, true, "Removing " << finalContigFile);
			unlink(finalContigFile.c_str());
		}
		finalContigFile = tmpFinalFile;
		if (world.rank() == 0) {
			unlink(Options::getOptions().getOutputFile().c_str());
			rename(finalContigFile.c_str(), Options::getOptions().getOutputFile().c_str());
		}
		finalContigFile = Options::getOptions().getOutputFile();
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Final contigs are in: " << finalContigFile);

		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Finished");

		ForkDaemon::finalize();

	} catch (std::exception &e) {
		LOG_ERROR(1, "DistributedNucleatingAssembler threw an exception! Aborting..." << e.what());
		world.abort(1);
	} catch (...) {
		LOG_ERROR(1, "DistributedNucleatingAssembler threw an error!" );
		world.abort(1);
	}

	return 0;
}

示例#18

0

显示文件

文件： EstimateSize-P.cpp 项目： JGI-Bioinformatics/Kmernator

int main(int argc, char *argv[]) {

	ScopedMPIComm< MPIEstimateSizeOptions > world(argc, argv);

	Cleanup::prepare();

	try {

		MemoryUtils::getMemoryUsage();
		std::string outputFilename = Options::getOptions().getOutputFile();

		OptionsBaseInterface::FileListType &inputs = Options::getOptions().getInputFiles();
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Reading Input Files");

		long partitions = MPIEstimateSizeOptions::getOptions().getSamplePartitions();
		double maxFraction = MPIEstimateSizeOptions::getOptions().getMaxSampleFraction();
		assert(maxFraction < 1.0);
		double fraction = 0.0;
		long totalPartitions = (long) partitions / maxFraction;

		unsigned long totalReads = 0;
		unsigned long totalBases = 0;
		long rawKmers = 0;
		KS spectrum(world, 0);
		for (long iter = 0 ; iter < partitions && fraction < maxFraction; iter++) {
			fraction += (double) 1. / (double) totalPartitions;
	
			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0,  "Starting iteration " << iter << " at " << fraction*100 << "%");
	
			ReadSet reads;
			reads.appendAllFiles(inputs, world.rank()*totalPartitions + iter, world.size()*totalPartitions);
			setGlobalReadSetConstants(world, reads);
	
			unsigned long counts[3], totalCounts[3];
	
			mpi::all_reduce(world, (unsigned long*) counts, 3, (unsigned long*) totalCounts, std::plus<unsigned long>());
			totalReads += totalCounts[0];
			totalBases += totalCounts[2];
	
			if (KmerBaseOptions::getOptions().getKmerSize() > 0) {
	
				// lazy allocate
				if (rawKmers == 0) {
					rawKmers = KS::estimateRawKmers(world, inputs);
					spectrum = KS(world, rawKmers);
				}
				spectrum.buildKmerSpectrum(reads);
				spectrum.trackSpectrum(true);
				LOG_DEBUG_OPTIONAL(1, true, "SizeTracker: " << spectrum.getSizeTracker().toString());
	
				if (Log::isDebug(1)) {
					KS::MPIHistogram h = spectrum._getHistogram(false);
					std::string hist = h.toString();
					LOG_DEBUG_OPTIONAL(1, world.rank() == 0, "Collective Kmer Histogram\n" << hist);
				}
			}
	
		}
	
		std::string hist = spectrum.getHistogram(false);
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Collective Kmer Histogram\n" << hist);
	
		KS::SizeTracker reducedSizeTracker = spectrum.reduceSizeTracker(world);
		std::string reducedSizeTrackerFile = outputFilename;
		if (reducedSizeTrackerFile.empty()) {
			reducedSizeTrackerFile = UniqueName::generateUniqueName("tmp-estimateSize");
		}
		float errorRate = TrackingData::getErrorRate();
		LOG_DEBUG_OPTIONAL(1, true, "Kmer error rate: " << errorRate);
		float commonErrorRate = 0.0;
		MPI_Reduce(&errorRate, &commonErrorRate, 1, MPI_FLOAT, MPI_SUM, 0, world);
		commonErrorRate /= (float) world.size();
		if (world.rank() == 0) {
			{
				LOG_VERBOSE_OPTIONAL(1, true, "Writing size tracking file to:" << reducedSizeTrackerFile);
				LOG_DEBUG_OPTIONAL(1, true, "SizeTracker:\n" << reducedSizeTracker.toString());
				OfstreamMap ofm(reducedSizeTrackerFile, "");
				ofm.getOfstream("") << reducedSizeTracker.toString();
			}
			std::string basePath = FileUtils::getBasePath(argv[0]);
			std::stringstream cmdss;
			cmdss << "Rscript " << basePath << "/EstimateSize.R " << reducedSizeTrackerFile; // << " " << (commonErrorRate*1.25);
			std::string command = cmdss.str();
	
			if (!FileUtils::getBasePath("Rscript").empty() || basePath.empty()) {
	
				LOG_DEBUG_OPTIONAL(1, true, "Executing: " << command);
				IPipestream ipipe(command);
				double errorRate = 0.0, genomeSize = 0.0;
				bool readValues = false;
				while (ipipe.good() && !ipipe.eof()) {
					std::string line;
					std::getline(ipipe, line);
					LOG_DEBUG_OPTIONAL(2, true, "Read: " << line);
					if (line.find("errorRate") != std::string::npos) {
						LOG_DEBUG_OPTIONAL(2, true, "Found headers in: " << line);
						readValues = true;
						continue;
					}
					if (readValues) {
						readValues = false;
						LOG_DEBUG_OPTIONAL(2, true, "Reading errorRate and GenomeSize from " << line);
						std::stringstream ss;
						ss << line;
						ss >> errorRate;
						ss >> genomeSize;
					}
				}
				LOG_VERBOSE_OPTIONAL(1, true, "Estimated Kmer-quality errorRate: " << commonErrorRate);
				LOG_VERBOSE_OPTIONAL(1, true, "Distributed readCount: " << totalReads);
				LOG_VERBOSE_OPTIONAL(1, true, "Estimated fractionRead: " << fraction);
				LOG_VERBOSE_OPTIONAL(1, true, "Estimated errorRate: " << errorRate);
				LOG_VERBOSE_OPTIONAL(1, true, "Estimated genomeSize: " << genomeSize);
	
				ipipe.close();
	
				double totalRawKmers = reducedSizeTracker.getLastElement().rawKmers / fraction;
				double estimatedUniqueKmers = totalRawKmers * errorRate + genomeSize;
				LOG_VERBOSE_OPTIONAL(1, true, "Estimated totalRawKmers: " << totalRawKmers);
				LOG_VERBOSE_OPTIONAL(1, true, "Estimated totalUniqueKmers: " << estimatedUniqueKmers);
				if (reducedSizeTrackerFile.compare(outputFilename) != 0) {
					LOG_DEBUG_OPTIONAL(1, true, "Removing temporary size tracking file: " << reducedSizeTrackerFile);
					unlink(reducedSizeTrackerFile.c_str());
				}
			} else {