int main(int argc, char *argv[]) {

	ForkDaemon::initialize();

	ScopedMPIComm< DistributedNucleatingAssemblerOptions > world(argc, argv);

	Cleanup::prepare();

	try {

		double timing1, timing2;

		timing1 = MPI_Wtime();

		OptionsBaseInterface::FileListType &inputFiles =
				Options::getOptions().getInputFiles();
		std::string contigFile =
				ContigExtenderBaseOptions::getOptions().getContigFile();
		std::string finalContigFile;
		double minimumCoverage =
				ContigExtenderBaseOptions::getOptions().getMinimumCoverage();
		long maxIterations =
				DistributedNucleatingAssemblerOptions::getOptions().getMaxIterations();

		ReadSet reads;
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Reading Input Files" );
		reads.appendAllFiles(inputFiles, world.rank(), world.size());
		reads.identifyPairs();
		setGlobalReadSetConstants(world, reads);

		timing2 = MPI_Wtime();

		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "loaded " << reads.getGlobalSize() << " Reads, (local:" << reads.getSize() << " pair:" << reads.getPairSize() << ") in " << (timing2-timing1) << " seconds" );
		LOG_DEBUG_GATHER(1, MemoryUtils::getMemoryUsage());

		if (FilterKnownOdditiesOptions::getOptions().getSkipArtifactFilter() == 0) {

			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Preparing artifact filter: ");

			FilterKnownOddities filter;
			LOG_VERBOSE_OPTIONAL(2, world.rank() == 0, "Applying sequence artifact filter to Input Files");

			unsigned long filtered = filter.applyFilter(reads);

			LOG_VERBOSE_GATHER(2, "local filter affected (trimmed/removed) " << filtered << " Reads ");
			LOG_DEBUG_GATHER(1, MemoryUtils::getMemoryUsage());

			unsigned long allFiltered;
			mpi::reduce(world, filtered, allFiltered, std::plus<unsigned long>(), 0);
			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "distributed filter (trimmed/removed) " << allFiltered << " Reads.");

		}

		boost::shared_ptr< MatcherInterface > matcher;
		if (KmerBaseOptions::getOptions().getKmerSize() == 0) {
			matcher.reset( new Vmatch(world, UniqueName::generateHashName(inputFiles), reads) );
		} else {
			matcher.reset( new KmerMatch(world, reads) );
		}

		SequenceLengthType minKmerSize, maxKmerSize, kmerStep, maxExtend;
		ContigExtender<KS>::getMinMaxKmerSize(reads, minKmerSize, maxKmerSize,
				kmerStep);
		maxKmerSize = boost::mpi::all_reduce(world, maxKmerSize, mpi::minimum<
				SequenceLengthType>());
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Kmer size ranges: " << minKmerSize << "\t" << maxKmerSize << "\t" << kmerStep);
		maxExtend = maxKmerSize;

		timing1 = timing2;
		timing2 = MPI_Wtime();
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Prepared Matcher indexes in " << (timing2-timing1) << " seconds");

		ReadSet finalContigs;
		ReadSet contigs;
		contigs.appendFastaFile(contigFile, world.rank(), world.size());

		int maxContigsPerBatch = DistributedNucleatingAssemblerOptions::getOptions().getMaxContigsPerBatch();

		short iteration = 0;
		while (++iteration <= maxIterations) {
			LOG_DEBUG_GATHER(1, "Iteration " << iteration << " " << MemoryUtils::getMemoryUsage());
			int batchIdx = 0;

			matcher->resetTimes("Start Iteration", MPI_Wtime());

			setGlobalReadSetConstants(world, contigs);

			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Iteration: " << iteration << ". Contig File: " << contigFile << ". contains " << contigs.getGlobalSize() << " Reads");
			if (contigs.getGlobalSize() == 0) {
				LOG_VERBOSE_OPTIONAL(1, true, "There are no contigs to extend in " << contigFile);
				break;
			}

			std::string extendLog;
			ReadSet changedContigs;
			int lastBatch = contigs.getSize();
			MPI_Allreduce(MPI_IN_PLACE, &lastBatch, 1, MPI_INT, MPI_MAX, world);
			LOG_DEBUG_OPTIONAL(1, world.rank() == 0, "Iteration: " << iteration << " Last batch is " << lastBatch);

			while (batchIdx < lastBatch) {
				extendLog += runPartialBatch(world, matcher, contigs, contigFile, changedContigs, finalContigs, batchIdx, maxContigsPerBatch, minKmerSize, minimumCoverage, maxKmerSize, maxExtend, kmerStep);
				batchIdx += maxContigsPerBatch;
			}

			matcher->recordTime("extendContigs", MPI_Wtime());
			LOG_DEBUG_GATHER(1, (extendLog));

			finishLongContigs(DistributedNucleatingAssemblerOptions::getOptions().getMaxContigLength(), changedContigs, finalContigs);

			LOG_DEBUG_GATHER(1, "Changed contigs: " << changedContigs.getSize() << " finalContigs: " << finalContigs.getSize());
			setGlobalReadSetConstants(world, changedContigs);
			setGlobalReadSetConstants(world, finalContigs);
			LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Changed contigs: " << changedContigs.getGlobalSize() << " finalContigs: " << finalContigs.getGlobalSize());

			std::string oldFinalContigFile = finalContigFile;
			std::string oldContigFile = contigFile;
			{
				// write out the state of the contig files (so far) so we do not loose them
				DistributedOfstreamMap om(world,
						Options::getOptions().getOutputFile(), "");
				om.setBuildInMemory();
				if (finalContigs.getGlobalSize() > 0) {
					std::string fileKey = "final-" + boost::lexical_cast<
							std::string>(iteration);
					finalContigs.writeAll(om.getOfstream(fileKey),
							FormatOutput::Fasta());
					finalContigFile = om.getRealFilePath(fileKey);
				}
				if (changedContigs.getGlobalSize() > 0) {
					std::string filekey = "-inputcontigs-" + boost::lexical_cast<
							std::string>(iteration) + ".fasta";
					changedContigs.writeAll(om.getOfstream(filekey),
							FormatOutput::Fasta());
					contigFile = om.getRealFilePath(filekey);
				}
				contigs = changedContigs;
			}

			if (world.rank() == 0) {
				// preserve the final contigs in case of crash
				unlink(Options::getOptions().getOutputFile().c_str());
				link(finalContigFile.c_str(), Options::getOptions().getOutputFile().c_str());
			}

			matcher->recordTime("writeFinalTime", MPI_Wtime());

			if (!Log::isDebug(1) && world.rank() == 0) {
				// remove most recent contig files (if not debugging)
				if (!oldFinalContigFile.empty()) {
					LOG_VERBOSE_OPTIONAL(1, true, "Removing " << oldFinalContigFile);
					unlink(oldFinalContigFile.c_str());
				}

				if (ContigExtenderBaseOptions::getOptions().getContigFile().compare(
						oldContigFile) != 0) {
					LOG_VERBOSE_OPTIONAL(1, true, "Removing " << oldContigFile);
					unlink(oldContigFile.c_str());
				}
			}

			if (changedContigs.getGlobalSize() == 0) {
				LOG_VERBOSE_OPTIONAL(1, world.rank() == 1, "No more contigs to extend " << changedContigs.getSize());
				break;
			}

			matcher->recordTime("finishIteration", MPI_Wtime());
			LOG_DEBUG_GATHER(1, matcher->getTimes("") + ". " + MemoryUtils::getMemoryUsage());

		}

		matcher.reset(); // release the matcher interface

		if (world.rank() == 0 && !Log::isDebug(1)) {
			if (ContigExtenderBaseOptions::getOptions().getContigFile().compare(
					contigFile) != 0) {
				LOG_DEBUG_OPTIONAL(1, true, "Removing " << contigFile);
				unlink(contigFile.c_str());
			}
		}

		// write final contigs (and any unfinished contigs still remaining)
		finalContigs.append(contigs);
		std::string tmpFinalFile = DistributedOfstreamMap::writeGlobalReadSet(world, finalContigs, Options::getOptions().getOutputFile(), ".tmp", FormatOutput::Fasta());
		if (world.rank() == 0 && !finalContigFile.empty()) {
			LOG_DEBUG_OPTIONAL(1, true, "Removing " << finalContigFile);
			unlink(finalContigFile.c_str());
		}
		finalContigFile = tmpFinalFile;
		if (world.rank() == 0) {
			unlink(Options::getOptions().getOutputFile().c_str());
			rename(finalContigFile.c_str(), Options::getOptions().getOutputFile().c_str());
		}
		finalContigFile = Options::getOptions().getOutputFile();
		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Final contigs are in: " << finalContigFile);

		LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, "Finished");

		ForkDaemon::finalize();

	} catch (std::exception &e) {
		LOG_ERROR(1, "DistributedNucleatingAssembler threw an exception! Aborting..." << e.what());
		world.abort(1);
	} catch (...) {
		LOG_ERROR(1, "DistributedNucleatingAssembler threw an error!" );
		world.abort(1);
	}

	return 0;
}
int main(int argc, char *argv[]) {

	if (!Fastq2FastaOptions::parseOpts(argc, argv)) exit(1);

	Cleanup::prepare();

	OptionsBaseInterface::FileListType &inputs = Options::getOptions().getInputFiles();
	long splitSizeBase = Fastq2FastaOptions::getOptions().getSplitSizeMegaBase() * 1000000;

	ReadSet reads;
	LOG_VERBOSE(1, "Reading Input Files" );
	reads.appendAllFiles(inputs);

	LOG_VERBOSE(1, "loaded " << reads.getSize() << " Reads, " << reads.getBaseCount()
			<< " Bases ");

	reads.identifyPairs();

	long currentBase = 0;
	OfstreamMap ofmap;
	string outputFilename = Options::getOptions().getOutputFile();
	bool hasOfMap = false;
	ostream *out = &cout;

	int partitionNum = 1;
	if (!outputFilename.empty()) {
		ofmap = OfstreamMap(outputFilename);
		hasOfMap = true;
	} else {
		splitSizeBase = 0; // do not support splitting when no output is specified
	}

	bool splitPairs = Fastq2FastaOptions::getOptions().getSplitPairs() != 0;
	string filekey;
	for(ReadSet::ReadSetSizeType pairIdx = 0 ; pairIdx < reads.getPairSize(); pairIdx++) {
		ReadSet::Pair pair = reads.getPair(pairIdx);

		ReadSet::ReadSetSizeType lesserIdx  = std::min(pair.read1, pair.read2);

		if (hasOfMap) {
			filekey = reads.getReadFileNamePrefix(lesserIdx);
		} else {
			filekey.clear();
		}

		if (splitSizeBase > 0) {
			SequenceLengthType len = reads.getRead(lesserIdx).getLength();
			currentBase += len;
			if (currentBase > splitSizeBase) {
				// new output handle
				partitionNum++;
				currentBase = len;
			}
			filekey += "-" + boost::lexical_cast<string>( partitionNum );
		}


		if (reads.isValidRead(pair.read1) && reads.isValidRead(pair.read2)) {

			const Read read = reads.getRead(pair.read1);
			if (hasOfMap) {
				if (splitPairs) {
					filekey += "-1";
				}
				out = &( ofmap.getOfstream(filekey) );
			}

			reads.getRead(pair.read1).write(*out);
			if (splitPairs) {
				filekey[filekey.length()-1] = '2';
				out = &( ofmap.getOfstream(filekey) );
			}
			reads.getRead(pair.read2).write(*out);

		} else {
			if (hasOfMap) {
				out = &( ofmap.getOfstream(filekey) );
			}
			reads.getRead(lesserIdx).write(*out);
		}

	}

}