예제 #1
0
파일: PathOverlap.cpp 프로젝트: bcgsc/abyss
/** Index the first and last contig of each path to facilitate finding
 * overlaps between paths. */
static SeedMap makeSeedMap(const Paths& paths)
{
	SeedMap seedMap;
	for (Paths::const_iterator it = paths.begin();
			it != paths.end(); ++it) {
		if (it->empty())
			continue;
		assert(!it->front().ambiguous());
		seedMap.insert(make_pair(it->front(),
					Vertex(it - paths.begin(), false)));
		assert(!it->back().ambiguous());
		seedMap.insert(make_pair(it->back() ^ 1,
					Vertex(it - paths.begin(), true)));
	}
	return seedMap;
}
예제 #2
0
파일: PathOverlap.cpp 프로젝트: bcgsc/abyss
int main(int argc, char** argv)
{
	string commandLine;
	{
		ostringstream ss;
		char** last = argv + argc - 1;
		copy(argv, last, ostream_iterator<const char *>(ss, " "));
		ss << *last;
		commandLine = ss.str();
	}

	if (!opt::db.empty())
		opt::metaVars.resize(3);

	bool die = false;
	for (int c; (c = getopt_long(argc, argv,
					shortopts, longopts, NULL)) != -1;) {
		istringstream arg(optarg != NULL ? optarg : "");
		switch (c) {
			case '?': die = true; break;
			case 'g': arg >> opt::graphPath; break;
			case 'k': arg >> opt::k; break;
			case 'r': arg >> opt::repeatContigs; break;
			case 'v': opt::verbose++; break;
			case OPT_HELP:
				cout << USAGE_MESSAGE;
				exit(EXIT_SUCCESS);
			case OPT_VERSION:
				cout << VERSION_MESSAGE;
				exit(EXIT_SUCCESS);
			case OPT_DB:
				arg >> opt::db; break;
			case OPT_LIBRARY:
				arg >> opt::metaVars[0]; break;
			case OPT_STRAIN:
				arg >> opt::metaVars[1]; break;
			case OPT_SPECIES:
				arg >> opt::metaVars[2]; break;
		}
		if (optarg != NULL && !arg.eof()) {
			cerr << PROGRAM ": invalid option: `-"
				<< (char)c << optarg << "'\n";
			exit(EXIT_FAILURE);
		}
	}

	if (opt::k <= 0) {
		cerr << PROGRAM ": missing -k,--kmer option\n";
		die = true;
	}

	if (argc - optind < 2) {
		cerr << PROGRAM ": missing arguments\n";
		die = true;
	} else if (argc - optind > 2) {
		cerr << PROGRAM ": too many arguments\n";
		die = true;
	}

	if (die) {
		cerr << "Try `" << PROGRAM
			<< " --help' for more information.\n";
		exit(EXIT_FAILURE);
	}

	const char *adjPath = argv[optind++];
	if (opt::verbose > 0)
		cerr << "Reading `" << adjPath << "'..." << endl;
	ifstream fin(adjPath);
	assert_good(fin, adjPath);
	Graph g;
	fin >> g;
	Vertex::s_offset = g.num_vertices() / 2;

	string pathsFile(argv[optind++]);
	vector<string> pathIDs;
	Paths paths = readPaths(g, pathsFile, pathIDs);

	switch (opt::mode) {
	  case opt::OVERLAP:
		// Find overlapping paths, do not assemble.
		addPathOverlapEdges(g, paths, pathIDs,
				findOverlaps(g, paths));
		paths.clear();
		if (opt::graphPath.empty())
			opt::graphPath = "-";
		break;

	  case opt::ASSEMBLE:
		// Assemble overlapping paths.
		assembleOverlappingPaths(g, paths, pathIDs);
		break;

	  case opt::TRIM:
		// Trim overlapping paths.
		trimOverlaps(g, paths);
		// Remove paths consisting of a single contig.
		for_each_if(paths.begin(), paths.end(),
				mem_fun_ref(&ContigPath::clear),
				compose1(
					bind2nd(equal_to<ContigPath::size_type>(), 1),
					mem_fun_ref(&ContigPath::size)));
		// Add the paths to the graph.
		addPathOverlapEdges(g, paths, pathIDs, Overlaps());
		break;
	}

	// Output the paths.
	for (Paths::const_iterator it = paths.begin();
			it != paths.end(); ++it) {
		if (it->empty())
			continue;
		assert(it->size() != 1);
		cout << pathIDs[it - paths.begin()] << '\t' << *it << '\n';
	}
	assert(cout.good());

	// Output the graph.
	if (!opt::graphPath.empty()) {
		ofstream fout;
		ostream& out = opt::graphPath == "-" ? cout
		   	: (fout.open(opt::graphPath.c_str()), fout);
		assert_good(out, opt::graphPath);
		write_graph(out, g, PROGRAM, commandLine);
		assert_good(out, opt::graphPath);
	}

	// Output the repeat contigs.
	if (!opt::repeatContigs.empty()) {
		sort(s_trimmedContigs.begin(), s_trimmedContigs.end());
		s_trimmedContigs.erase(
				unique(s_trimmedContigs.begin(),
					s_trimmedContigs.end()), s_trimmedContigs.end());
		ofstream out(opt::repeatContigs.c_str());
		assert_good(out, opt::repeatContigs);
		for (vector<ContigID>::const_iterator it
				= s_trimmedContigs.begin();
				it != s_trimmedContigs.end(); ++it)
			out << get(g_contigNames, *it) << '\n';
		assert_good(out, opt::repeatContigs);
	}

	if (!opt::db.empty()) {
		init(db,
				opt::db,
				opt::verbose,
				PROGRAM,
				opt::getCommand(argc, argv),
				opt::metaVars);
		addToDb(db, "SS", opt::ss);
		addToDb(db, "K", opt::k);
	}

	return 0;
}