예제 #1
0
void AddTargets(SeqDB &DB, const SeqData &Query, set<unsigned> &TargetIndexes)
	{
	const unsigned SeqCount = DB.GetSeqCount();
	if (SeqCount == 0)
		return;

	vector<float> WordCounts;
	vector<unsigned> Order;
	USort(Query, DB, WordCounts, Order);
	asserta(SIZE(Order) == SeqCount);
	unsigned TopSeqIndex = Order[0];
	float TopWordCount = WordCounts[TopSeqIndex];
	for (unsigned i = 0; i < SeqCount; ++i)
		{
		unsigned SeqIndex = Order[i];
		float WordCount = WordCounts[SeqIndex];
		if (TopWordCount - WordCount > MAX_WORD_COUNT_DROP)
			return;
		TargetIndexes.insert(SeqIndex);
		}
	}
예제 #2
0
파일: biodb.c 프로젝트: darthsuogles/BioSeq
int main()
{
  // open the database named mein_database.db
  // if the database does not exist, create it
  /* init(); */
  
  /* DB *db; */
  /* open(&db, "mein_database.db");   */
  /* close(db); */

  /* shutdown(); */

  SeqDB db;
  db.init();
  db.protein_parser("../data/NC_004347.faa");
  db.protein_parser("../data/NC_004349.faa");
  db.protein_parser("../data/NC_010622.faa");
  db.protein_parser("../data/NC_010623.faa");
  db.protein_parser("../data/NC_010625.faa");
  db.protein_parser("../data/NC_010627.faa");  

  db.shutdown();
  return 0;
}
예제 #3
0
int main(int argc, char *argv[])
	{
		
	MyCmdLine(argc, argv);

	if (argc < 2)
		{
		Usage();
		return 0;
		}

	if (opt_version)
		{
		printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);
		return 0;
		}

	printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);
	printf("by Robert C. Edgar\n");
	printf("http://drive5.com/uchime\n");
	printf("This code is donated to the public domain.\n");
	printf("\n");
	if (!optset_w)
		opt_w = 8;
	
	float MinFractId = 0.95f;
	if (optset_id)
		MinFractId = (float) opt_id;

	Log("%8.2f  minh\n", opt_minh);
	Log("%8.2f  xn\n", opt_xn);
	Log("%8.2f  dn\n", opt_dn);
	Log("%8.2f  xa\n", opt_xa);
	Log("%8.2f  mindiv\n", opt_mindiv);
	Log("%8u  maxp\n", opt_maxp);

	if (opt_input == "" && opt_uchime != "")
		opt_input = opt_uchime;

	if (opt_input == "")
		Die("Missing --input");

	g_UchimeDeNovo = (opt_db == "");

	if (opt_uchimeout != "")
		g_fUChime = CreateStdioFile(opt_uchimeout);

	if (opt_uchimealns != "")
		g_fUChimeAlns = CreateStdioFile(opt_uchimealns);

	SeqDB Input;
	SeqDB DB;

	Input.FromFasta(opt_input);
	if (!Input.IsNucleo())
		Die("Input contains amino acid sequences");

	const unsigned QuerySeqCount = Input.GetSeqCount();
	vector<unsigned> Order;
	for (unsigned i = 0; i < QuerySeqCount; ++i)
		Order.push_back(i);

	if (g_UchimeDeNovo)
		{
		vector<float> Abs;
		for (unsigned i = 0; i < QuerySeqCount; ++i)
			{
			const char *Label = Input.GetLabel(i);
			float Ab = GetAbFromLabel(Label);
			Abs.push_back(Ab);
			}
		SortDescending(Abs, Order);
		DB.m_IsNucleoSet = true;
		DB.m_IsNucleo = true;
		}
	else
		{
		DB.FromFasta(opt_db);
		if (!DB.IsNucleo())
			Die("Database contains amino acid sequences");
		}

	vector<ChimeHit2> Hits;
	unsigned HitCount = 0;
	for (unsigned i = 0; i < QuerySeqCount; ++i)
		{
		unsigned QuerySeqIndex = Order[i];

		SeqData QSD;
		Input.GetSeqData(QuerySeqIndex, QSD);

		float QAb = -1.0;
		if (g_UchimeDeNovo)
			QAb = GetAbFromLabel(QSD.Label);

		ChimeHit2 Hit;
		AlnParams &AP = *(AlnParams *) 0;
		AlnHeuristics &AH = *(AlnHeuristics *) 0;
		HSPFinder &HF = *(HSPFinder *) 0;
		bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);
		if (Found)
			++HitCount;
		else
			{
			if (g_UchimeDeNovo)
				DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);
			}

		WriteChimeHit(g_fUChime, Hit);

		ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));
		}

	Log("\n");
	Log("%s: %u/%u chimeras found (%.1f%%)\n",
	  opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));

	CloseStdioFile(g_fUChime);
	CloseStdioFile(g_fUChimeAlns);

	ProgressExit();
	return 0;
	}