Esempio n. 1
0
void TRS2Fasta()
	{
	const char *TRSFileName = RequiredValueOpt("trs2fasta");
	const char *SeqFileName = RequiredValueOpt("seq");
	const char *Path = ValueOpt("path");
	const char *strMaxFam = ValueOpt("maxfam");
	const char *Prefix = ValueOpt("prefix");

	int MaxFam = DEFAULT_MAX_FAM;
	if (strMaxFam != 0)
		MaxFam = atoi(strMaxFam);

	if (0 == Path)
		Path = ".";

	ProgressStart("Reading seq file");
	int SeqLength;
	const char *Seq = ReadMFA(SeqFileName, &SeqLength);
	ProgressDone();

	Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6);

	ProgressStart("Read TRS file");
	int TRSCount;
	TRSData *TRSs = ReadTRS(TRSFileName, &TRSCount);
	ProgressDone();

	Progress("%d records", TRSCount);

	ProgressStart("Sorting by family");
	qsort((void *) TRSs, TRSCount, sizeof(TRSData), CmpTRS);
	ProgressDone();

	FILE *f = 0;
	int CurrentFamily = -1;
	int MemberCount = 0;
	for (int TRSIndex = 0; TRSIndex < TRSCount; ++TRSIndex)
		{
		const TRSData &TRS = TRSs[TRSIndex];
		if (TRS.FamIndex != CurrentFamily)
			{
			if (f != 0)
				fclose(f);
			char *FastaFileName = FamFileName(Path, TRS.FamIndex, TRS.SuperFamIndex);
			f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly);
			CurrentFamily = TRS.FamIndex;
			MemberCount = 0;
			}
		++MemberCount;
		if (MemberCount > MaxFam)
			continue;
		const int From = ContigToGlobal(TRS.ContigFrom, TRS.ContigLabel);
		const int Length = TRS.ContigTo - TRS.ContigFrom + 1;
		char *Label = TRSLabel(Prefix, TRS);
		WriteFasta(f, Seq + From, Length, Label, TRS.Rev);
		freemem(Label);
		}
	}
Esempio n. 2
0
void Tanmotif2Fasta()
	{
	const char *MotifFileName = RequiredValueOpt("tanmotif2fasta");
	const char *SeqFileName = RequiredValueOpt("seq");
	const char *Path = ValueOpt("path");
	const char *strMaxFam = ValueOpt("maxfam");
	const char *Prefix = ValueOpt("prefix");

	int MaxFam = DEFAULT_MAX_FAM;
	if (strMaxFam != 0)
		MaxFam = atoi(strMaxFam);

	if (0 == Path)
		Path = ".";

	ProgressStart("Reading seq file");
	int SeqLength;
	const char *Seq = ReadMFA(SeqFileName, &SeqLength);
	ProgressDone();

	Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6);

	ProgressStart("Read Motif file");
	int MotifCount;
	MotifData *Motifs = ReadMotif(MotifFileName, &MotifCount);
	ProgressDone();

	Progress("%d records", MotifCount);

	ProgressStart("Sorting by family");
	qsort((void *) Motifs, MotifCount, sizeof(MotifData), CmpMotif);
	ProgressDone();

	FILE *f = 0;
	int CurrentFamily = -1;
	int MemberCount = 0;
	for (int MotifIndex = 0; MotifIndex < MotifCount; ++MotifIndex)
		{
		const MotifData &Motif = Motifs[MotifIndex];
		if (Motif.FamIndex != CurrentFamily)
			{
			if (f != 0)
				fclose(f);
			char *FastaFileName = FamFileName(Path, Motif.FamIndex);
			f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly);
			CurrentFamily = Motif.FamIndex;
			MemberCount = 0;
			}
		++MemberCount;
		if (MemberCount > MaxFam)
			continue;
		const int From = ContigToGlobal(Motif.ContigFrom, Motif.ContigLabel);
		const int Length = Motif.ContigTo - Motif.ContigFrom + 1;
		char *Label = MotifLabel(Prefix, Motif);
		WriteFasta(f, Seq + From, Length, Label, false);
		freemem(Label);
		}
	}
Esempio n. 3
0
void AnnotEdge()
	{
	const char *InputFileName = RequiredValueOpt("annotedge");
	const char *RepeatFileName = RequiredValueOpt("rep");
	const char *OutputFileName = RequiredValueOpt("out");

	ProgressStart("Reading repeat file");
	int RepCount;
	RepData *Reps = ReadReps(RepeatFileName, &RepCount);
	ProgressDone();

	Progress("%d records", RepCount);

	FILE *fInput = OpenStdioFile(InputFileName);
	FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly);

	ProgressStart("Transferring annotation");
	GFFRecord Rec;
	while (GetNextGFFRecord(fInput, Rec))
		{
		const bool Rev = (Rec.Strand == '-');
		const char *Annot = MakeAnnotEdge(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev,
		  Reps, RepCount);

		fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c",
		//                0   1   2   3   4   5     6
		  Rec.SeqName,	// 0
		  Rec.Source,	// 1
		  Rec.Feature,	// 2
		  Rec.Start,	// 3
		  Rec.End,		// 4
		  Rec.Score,	// 5
		  Rec.Strand);	// 6

		if (-1 == Rec.Frame)
			fprintf(fOutput, "\t.");
		else
			fprintf(fOutput, "\t%d", Rec.Frame);

		fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot);
		}
	fclose(fInput);
	fclose(fOutput);
	ProgressDone();
	}
Esempio n. 4
0
void Tan()
	{
// Image file annotated with from-to pile indexes
// Produced by:
//		piler2 -trs banded_hits.gff -images mainband_images.gff
	const char *HitFileName = RequiredValueOpt("tan");
	const char *OutFileName = RequiredValueOpt("out");
	const char *PyramidFileName = ValueOpt("pyramid");
	const char *MotifFileName = ValueOpt("motif");
	const char *strMinHits = ValueOpt("minhits");
	const char *strMaxMargin = ValueOpt("maxmargin");
	const char *strMinRatio = ValueOpt("minratio");

	if (0 != strMinHits)
		MIN_HIT_COUNT = atoi(strMinHits);
	if (0 != strMaxMargin)
		MAX_FRACT_MARGIN = atof(strMaxMargin);
	if (0 != strMinRatio)
		MIN_RATIO = atof(strMinRatio);

	FILE *fInput = OpenStdioFile(HitFileName);

	ProgressStart("Initialize piles");
	GFFRecord Rec;
	int HitCount = 0;
	while (GetNextGFFRecord(fInput, Rec))
		{
		if (0 != strcmp(Rec.Feature, "hit"))
			continue;

		int QueryPileIndex = -1;
		int TargetPileIndex = -1;
		ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex);
		if (QueryPileIndex != TargetPileIndex)
			continue;

		char TargetLabel[128];
		int TargetStart;
		int TargetEnd;
		ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd);
		if (0 != strcmp(Rec.SeqName, TargetLabel))
			Quit("Labels don't match");

		const int QueryFrom = Rec.Start - 1;
		const int QueryTo = Rec.End - 1;
		const int TargetFrom = TargetStart - 1;
		const int TargetTo = TargetEnd - 1;
		const bool Rev = (Rec.Strand == '-');

		AddHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev);
		++HitCount;
		}
	ProgressDone();

	Progress("%d hits, %d piles", HitCount, PileCount);

	ProgressStart("Allocate piles");
	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
		{
		TanPile &Pile = Piles[PileIndex];
		Pile.Hits = all(HitData, Pile.HitCount);
		Pile.HitCount = 0;
		}
	ProgressDone();

	ProgressStart("Assign hits to piles");
	Rewind(fInput);
	while (GetNextGFFRecord(fInput, Rec))
		{
		if (0 != strcmp(Rec.Feature, "hit"))
			continue;

		int QueryPileIndex = -1;
		int TargetPileIndex = -1;
		ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex);
		if (QueryPileIndex != TargetPileIndex)
			continue;

		char TargetLabel[128];
		int TargetStart;
		int TargetEnd;
		ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd);
		if (0 != strcmp(Rec.SeqName, TargetLabel))
			Quit("Labels don't match");

		const int QueryFrom = Rec.Start - 1;
		const int QueryTo = Rec.End - 1;
		const int TargetFrom = TargetStart - 1;
		const int TargetTo = TargetEnd - 1;
		const bool Rev = (Rec.Strand == '-');

		AssignHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev);
		}
	ProgressDone();

	fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly);
	fPyramid = (0 == PyramidFileName ? 0 : OpenStdioFile(PyramidFileName, FILEIO_MODE_WriteOnly));
	fMotif = (0 == PyramidFileName ? 0 : OpenStdioFile(MotifFileName, FILEIO_MODE_WriteOnly));

	ProgressStart("Find pyramids");
	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
		FindPyramids(PileIndex);
	int PyramidCount = PyramidIndex;
	ProgressDone();

	Progress("%d pyramids", PyramidCount);
	}
Esempio n. 5
0
void TR()
{
#if defined(DEBUG) && defined(_MSC_VER)
    _CrtSetDbgFlag(0);	// too expensive
#endif

    const char *HitFileName = RequiredValueOpt("tr");
    const char *OutFileName = RequiredValueOpt("out");
    const char *CandFileName = ValueOpt("cand");

    const char *strMinTrSpacing = ValueOpt("mintrspacing");
    const char *strMaxTrSpacing = ValueOpt("maxtrspacing");
    const char *strMinTrLength = ValueOpt("mintrlength");
    const char *strMaxTrLength = ValueOpt("minspacingratio");
    const char *strMinFam = ValueOpt("minfam");
    const char *strMinHitRatio = ValueOpt("minhitratio");
    const char *strMinDistPairs = ValueOpt("mindistpairs");

    if (0 != strMinTrSpacing)
        MIN_LENGTH_LINE = atoi(strMinTrSpacing);
    if (0 != strMaxTrSpacing)
        MAX_LENGTH_LINE = atoi(strMaxTrSpacing);
    if (0 != strMinTrLength)
        MIN_LENGTH_LTR = atoi(strMinTrLength);
    if (0 != strMaxTrLength)
        MAX_LENGTH_LTR = atoi(strMaxTrLength);
    if (0 != strMinFam)
        MIN_FAM_SIZE = atoi(strMinFam);
    if (0 != strMinHitRatio)
        MIN_HIT_LENGTH_RATIO = atoi(strMinHitRatio);
    if (0 != strMinDistPairs)
        MIN_DIST_EDGE = atoi(strMinDistPairs);

    FILE *fHit = OpenStdioFile(HitFileName, FILEIO_MODE_ReadOnly);

    ProgressStart("Index hits");
    GLIX HitGlix;
    HitGlix.Init();
    HitGlix.FromGFFFile(fHit);
    HitGlix.MakeGlobalToLocalIndex();
    ProgressDone();

    const int GlobalLength = HitGlix.GetGlobalLength();
    IIX IntervalIndex;
    IntervalIndex.Init(GlobalLength);

    ProgressStart("Find candidate TRs");
    Rewind(fHit);
    GFFRecord Rec;
    while (GetNextGFFRecord(fHit, Rec))
    {
        HitData Hit;
        GFFRecordToHit(HitGlix, Rec, Hit);
        if (IsCandLTR(Hit))
            AddCand(Hit, IntervalIndex);
    }
    ProgressDone();

    Progress("%d candidates", CandCount);

    if (0 != CandFileName)
    {
        ProgressStart("Write candidates");
        FILE *fCand = OpenStdioFile(CandFileName, FILEIO_MODE_WriteOnly);
        WriteCands(fCand, HitGlix);
        ProgressDone();
    }

    ProgressStart("Make graph");
    Rewind(fHit);
    while (GetNextGFFRecord(fHit, Rec))
    {
        HitData Hit;
        GFFRecordToHit(HitGlix, Rec, Hit);
        FindEdges(Hit, HitGlix, IntervalIndex);
    }
    fclose(fHit);
    fHit = 0;

    ProgressDone();

    Progress("%d edges", (int) Edges.size());

    ProgressStart("Find families");
    FamList Fams;
    FindConnectedComponents(Edges, Fams, MIN_FAM_SIZE);
    ProgressDone();

    Progress("%d families", (int) Fams.size());

    FILE *fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly);
    WriteOutputFile(fOut, HitGlix, Fams);
}
Esempio n. 6
0
void TRS()
	{
	const char *InputFileName = RequiredValueOpt("trs");

	const char *OutputFileName = ValueOpt("out");
	const char *PilesFileName = ValueOpt("piles");
	const char *ImagesFileName = ValueOpt("images");

	const char *strMinFamSize = ValueOpt("famsize");
	const char *strMaxLengthDiffPct = ValueOpt("maxlengthdiffpct");
	g_paramSingleHitCoverage = !FlagOpt("multihit");

	if (0 == OutputFileName && 0 == PilesFileName && 0 == ImagesFileName)
		Quit("No output file specified, must be at least one of -out, -piles, -images");

	if (0 != strMinFamSize)
		g_paramMinFamSize = atoi(strMinFamSize);
	if (0 != strMaxLengthDiffPct)
		g_paramMaxLengthDiffPct = atoi(strMaxLengthDiffPct);

	Log("singlehit=%s famsize=%d maxlengthdiffpct=%d\n",
	  g_paramSingleHitCoverage ? "True" : "False",
	  g_paramMinFamSize,
	  g_paramMaxLengthDiffPct);

	ProgressStart("Read hit file");
	int HitCount;
	int SeqLength;
	HitData *Hits = ReadHits(InputFileName, &HitCount, &SeqLength);
	ProgressDone();

	Progress("%d hits", HitCount);

	SeqLengthChunks = (SeqLength + CHUNK_LENGTH - 1)/CHUNK_LENGTH;

	const int BitVectorLength = (SeqLengthChunks + BITS_PER_INT - 1)/BITS_PER_INT;
	int *CopyCount = all(int, BitVectorLength);
	zero(CopyCount, int, BitVectorLength);

	ProgressStart("Compute copy counts");
	for (int i = 0; i < HitCount; ++i)
		IncCopyCount(CopyCount, Hits[i]);
	ProgressDone();

	ProgressStart("Identify piles");
	PILE_INDEX_TYPE *PileIndexes = IdentifyPiles(CopyCount);
	ProgressDone();

	Progress("%d stacks", PileCount);

	freemem(CopyCount);
	CopyCount = 0;

	CreatePiles(Hits, HitCount, PileIndexes);

	if (0 != ImagesFileName)
		{
		ProgressStart("Writing images file");
		WriteImages(ImagesFileName, Hits, HitCount, PileIndexes);
		ProgressDone();
		}

	freemem(Hits);
	Hits = 0;

	if (0 != PilesFileName)
		{
		ProgressStart("Writing piles file");
		WritePiles(PilesFileName, Piles, PileCount);
		ProgressDone();
		}

	freemem(PileIndexes);
	PileIndexes = 0;

	if (0 == OutputFileName)
		return;

	ProgressStart("Find edges");
	EdgeList Edges;
	FindGlobalEdges(Edges, MaxImageCount);
	ProgressDone();

	Progress("%d edges", (int) Edges.size());

	ProgressStart("Find families");
	FamList Fams;
	FindConnectedComponents(Edges, Fams, g_paramMinFamSize);
	AssignFamsToPiles(Fams);
	ProgressDone();

	Progress("%d families", (int) Fams.size());

	ProgressStart("Find superfamilies");
	EdgeList SuperEdges;
	FindSuperFamEdges(Fams, SuperEdges);

	FamList SuperFams;
	FindConnectedComponents(SuperEdges, SuperFams, 1);
	FindSingletonSuperFams(Fams, SuperFams);

	AssignSuperFamsToPiles(Fams, SuperFams);
	ProgressDone();

	Progress("%d superfamilies", (int) SuperFams.size());

	ProgressStart("Write TRS output file");
	WriteTRSFile(OutputFileName, Piles, PileCount);
	ProgressDone();
	}
Esempio n. 7
0
static void CreatePiles(const HitData *Hits, int HitCount,
  PILE_INDEX_TYPE *PileIndexes)
	{
	Piles = all(PileData, PileCount);
	zero(Piles, PileData, PileCount);
	for (int i = 0; i < PileCount; ++i)
		{
		Piles[i].FamIndex = -1;
		Piles[i].SuperFamIndex = -1;
		Piles[i].Rev = -1;
		}

// Count images in stack
	ProgressStart("Create stacks: count images");
	for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex)
		{
		const HitData &Hit = Hits[HitIndex];

		int Pos = Hit.QueryFrom/CHUNK_LENGTH;
		PILE_INDEX_TYPE PileIndex = PileIndexes[Pos];
		assert(PileIndex == PileIndexes[Hit.QueryTo/CHUNK_LENGTH]);
		assert(PileIndex >= 0 && PileIndex < PileCount);
		++(Piles[PileIndex].ImageCount);

		Pos = Hit.TargetFrom/CHUNK_LENGTH;
		PileIndex = PileIndexes[Pos];
		assert(PileIndex >= 0 && PileIndex < PileCount);
		assert(PileIndex == PileIndexes[Hit.TargetTo/CHUNK_LENGTH]);
		++(Piles[PileIndex].ImageCount);
		}
	ProgressDone();

// Allocate memory for image list
	int TotalImageCount = 0;
	ProgressStart("Create stacks: allocate image memory");
	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
		{
		PileData &Pile = Piles[PileIndex];
		const int ImageCount = Pile.ImageCount;
		TotalImageCount += ImageCount;
		assert(ImageCount > 0);
		Pile.Images = all(PileImageData, ImageCount);
		}
	ProgressDone();

// Build image list
	ProgressStart("Create stacks: build image list");
	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
		{
		PileData &Pile = Piles[PileIndex];
		Pile.ImageCount = 0;
		Pile.From = -1;
		Pile.To = -1;
		}

	for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex)
		{
		const HitData &Hit = Hits[HitIndex];

		const bool Rev = Hit.Rev;

		const int Length1 = Hit.QueryTo - Hit.QueryFrom;
		const int Length2 = Hit.TargetTo - Hit.TargetFrom;

		const int From1 = Hit.QueryFrom;
		const int From2 = Hit.TargetFrom;

		const int To1 = Hit.QueryTo;
		const int To2 = Hit.TargetTo;

		const int Pos1 = From1/CHUNK_LENGTH;
		const int Pos2 = From2/CHUNK_LENGTH;

		PILE_INDEX_TYPE PileIndex1 = PileIndexes[Pos1];
		PILE_INDEX_TYPE PileIndex2 = PileIndexes[Pos2];

		assert(PileIndex1 == PileIndexes[(From1 + Length1 - 1)/CHUNK_LENGTH]);
		assert(PileIndex1 >= 0 && PileIndex1 < PileCount);

		assert(PileIndex2 == PileIndexes[(From2 + Length2 - 1)/CHUNK_LENGTH]);
		assert(PileIndex2 >= 0 && PileIndex2 < PileCount);

		PileData &Pile1 = Piles[PileIndex1];
		PileImageData &Image1 = Pile1.Images[Pile1.ImageCount++];
		Image1.SILength = Length2;
		Image1.SIPile = PileIndex2;
		Image1.SIRev = Rev;

		PileData &Pile2 = Piles[PileIndex2];
		PileImageData &Image2 = Pile2.Images[Pile2.ImageCount++];
		Image2.SILength = Length1;
		Image2.SIPile = PileIndex1;
		Image2.SIRev = Rev;

		if (Pile1.From == -1 || From1 < Pile1.From)
			Pile1.From = From1;
		if (Pile1.To == -1 || To1 > Pile1.To)
			Pile1.To = To1;

		if (Pile2.From == -1 || From2 < Pile2.From)
			Pile2.From = From2;
		if (Pile2.To == -1 || To2 > Pile2.To)
			Pile2.To = To2;

		if (Pile1.ImageCount > MaxImageCount)
			MaxImageCount = Pile1.ImageCount;
		if (Pile2.ImageCount > MaxImageCount)
			MaxImageCount = Pile2.ImageCount;
		}
	ProgressDone();
	}
Esempio n. 8
0
LRESULT CPIMDIFrameWndEx::OnProgressDone(WPARAM wParam, LPARAM lParam)
{
	ProgressDone();

	return 0;
}