Example #1
0
static int ReadMotifPass2(FILE *f, MotifData *Motifs)
	{
	rewind(f);

	GFFLineNr = 0;
	int MotifCount = 0;
	GFFRecord Rec;
	while (GetNextGFFRecord(f, Rec))
		{
		if (0 != strcmp(Rec.Feature, "tandemmotif"))
			continue;
		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
			Warning("GFF line %d: invalid start %d / end %d",
			  GFFLineNr, Rec.Start, Rec.End);

		int FamIndex = GetFam(Rec);

		MotifData &Motif = Motifs[MotifCount];

		const int Length = Rec.End - Rec.Start + 1;

		Motif.ContigLabel = strsave(Rec.SeqName);
		Motif.ContigFrom = Rec.Start - 1;
		Motif.ContigTo = Motif.ContigFrom + Length - 1;
		Motif.FamIndex = FamIndex;

		++MotifCount;
		}
	return MotifCount;
	}
Example #2
0
void GFFSet::FromFile(FILE *f)
	{
	Free();

	GFFRecord Rec;
	while (GetNextGFFRecord(f, Rec))
		{
		SaveGFFStrings(Rec);
		Add(Rec);
		}
	}
Example #3
0
static int ReadTRSPass1(FILE *f)
	{
	GFFLineNr = 0;
	int TRSCount = 0;
	GFFRecord Rec;
	while (GetNextGFFRecord(f, Rec))
		{
		if (0 != strcmp(Rec.Feature, "trs"))
			continue;
		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
			Warning("GFF line %d: invalid start %d / end %d",
			  GFFLineNr, Rec.Start, Rec.End);
		++TRSCount;
		}
	return TRSCount;
	}
Example #4
0
static int ReadTRSPass2(FILE *f, TRSData *TRSs)
	{
	rewind(f);

	GFFLineNr = 0;
	int TRSCount = 0;
	GFFRecord Rec;
	while (GetNextGFFRecord(f, Rec))
		{
		if (0 != strcmp(Rec.Feature, "trs"))
			continue;
		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
			Warning("GFF line %d: invalid start %d / end %d",
			  GFFLineNr, Rec.Start, Rec.End);

		static char *Fam = GetFam(Rec);

		int FamIndex = 0;
		int SuperFamIndex = 0;
		int n = sscanf(Fam, "%d.%d", &FamIndex, &SuperFamIndex);
		if (n == 1)
			SuperFamIndex = -1;
		else if (n != 2)
			Quit("Invalid Family %s", Fam);

		TRSData &TRS = TRSs[TRSCount];

		const int Length = Rec.End - Rec.Start + 1;

		TRS.ContigLabel = strsave(Rec.SeqName);
		TRS.ContigFrom = Rec.Start - 1;
		TRS.ContigTo = TRS.ContigFrom + Length - 1;
		TRS.FamIndex = FamIndex;
		TRS.SuperFamIndex = SuperFamIndex;

		if (Rec.Strand == '+')
			TRS.Rev = false;
		else if (Rec.Strand == '-')
			TRS.Rev = true;
		else
			Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand);

		++TRSCount;
		}
	return TRSCount;
	}
Example #5
0
void AnnotEdge()
	{
	const char *InputFileName = RequiredValueOpt("annotedge");
	const char *RepeatFileName = RequiredValueOpt("rep");
	const char *OutputFileName = RequiredValueOpt("out");

	ProgressStart("Reading repeat file");
	int RepCount;
	RepData *Reps = ReadReps(RepeatFileName, &RepCount);
	ProgressDone();

	Progress("%d records", RepCount);

	FILE *fInput = OpenStdioFile(InputFileName);
	FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly);

	ProgressStart("Transferring annotation");
	GFFRecord Rec;
	while (GetNextGFFRecord(fInput, Rec))
		{
		const bool Rev = (Rec.Strand == '-');
		const char *Annot = MakeAnnotEdge(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev,
		  Reps, RepCount);

		fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c",
		//                0   1   2   3   4   5     6
		  Rec.SeqName,	// 0
		  Rec.Source,	// 1
		  Rec.Feature,	// 2
		  Rec.Start,	// 3
		  Rec.End,		// 4
		  Rec.Score,	// 5
		  Rec.Strand);	// 6

		if (-1 == Rec.Frame)
			fprintf(fOutput, "\t.");
		else
			fprintf(fOutput, "\t%d", Rec.Frame);

		fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot);
		}
	fclose(fInput);
	fclose(fOutput);
	ProgressDone();
	}
Example #6
0
int GLIX::FromGFFFile(FILE *f)
	{
	int RecordCount = 0;
	GFFRecord Rec;
	while (GetNextGFFRecord(f, Rec))
		{
		++RecordCount;
		Add(Rec.SeqName, Rec.End - 1);
		if (HasTargetAttrs(Rec.Attrs))
			{
			char TargetName[MAX_GFF_FEATURE_LENGTH+1];
			int Start;
			int End;
			ParseTargetAttrs(Rec.Attrs, TargetName, sizeof(TargetName), &Start, &End);
			Add(TargetName, End - 1);
			}
		}
	AssignOffsets();
	return RecordCount;
	}
Example #7
0
static int ReadRepsPass2(FILE *f, RepData *Reps)
	{
	rewind(f);

	GFFLineNr = 0;
	int RepCount = 0;
	GFFRecord Rec;
	while (GetNextGFFRecord(f, Rec))
		{
		if (0 != strcmp(Rec.Feature, "repeat"))
			continue;

		static char *Repeat = GetRepeat(Rec);

		RepData &Rep = Reps[RepCount];
		ParseRepeat(Repeat, Rep);

		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
			Warning("GFF line %d: invalid start %d / end %d",
			  GFFLineNr, Rec.Start, Rec.End);

		const int Length = Rec.End - Rec.Start + 1;

		Rep.ContigLabel = strsave(Rec.SeqName);
		Rep.ContigFrom = Rec.Start - 1;
		Rep.ContigTo = Rep.ContigFrom + Length - 1;

		if (Rec.Strand == '+')
			Rep.Rev = false;
		else if (Rec.Strand == '-')
			Rep.Rev = true;
		else
			Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand);

		++RepCount;
		}
	return RepCount;
	}
Example #8
0
static int ReadRepsPass1(FILE *f)
	{
	GFFLineNr = 0;
	int RepCount = 0;
	GFFRecord Rec;
	while (GetNextGFFRecord(f, Rec))
		{
		if (0 != strcmp(Rec.Feature, "repeat"))
			{
			static bool WarningIssued = false;
			if (!WarningIssued)
				{
				Warning("GFF record feature '%s' is not a repeat", Rec.Feature);
				WarningIssued = true;
				}
			continue;
			}
		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
			Warning("GFF line %d: invalid start %d / end %d",
			  GFFLineNr, Rec.Start, Rec.End);
		++RepCount;
		}
	return RepCount;
	}
Example #9
0
void Tan()
	{
// Image file annotated with from-to pile indexes
// Produced by:
//		piler2 -trs banded_hits.gff -images mainband_images.gff
	const char *HitFileName = RequiredValueOpt("tan");
	const char *OutFileName = RequiredValueOpt("out");
	const char *PyramidFileName = ValueOpt("pyramid");
	const char *MotifFileName = ValueOpt("motif");
	const char *strMinHits = ValueOpt("minhits");
	const char *strMaxMargin = ValueOpt("maxmargin");
	const char *strMinRatio = ValueOpt("minratio");

	if (0 != strMinHits)
		MIN_HIT_COUNT = atoi(strMinHits);
	if (0 != strMaxMargin)
		MAX_FRACT_MARGIN = atof(strMaxMargin);
	if (0 != strMinRatio)
		MIN_RATIO = atof(strMinRatio);

	FILE *fInput = OpenStdioFile(HitFileName);

	ProgressStart("Initialize piles");
	GFFRecord Rec;
	int HitCount = 0;
	while (GetNextGFFRecord(fInput, Rec))
		{
		if (0 != strcmp(Rec.Feature, "hit"))
			continue;

		int QueryPileIndex = -1;
		int TargetPileIndex = -1;
		ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex);
		if (QueryPileIndex != TargetPileIndex)
			continue;

		char TargetLabel[128];
		int TargetStart;
		int TargetEnd;
		ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd);
		if (0 != strcmp(Rec.SeqName, TargetLabel))
			Quit("Labels don't match");

		const int QueryFrom = Rec.Start - 1;
		const int QueryTo = Rec.End - 1;
		const int TargetFrom = TargetStart - 1;
		const int TargetTo = TargetEnd - 1;
		const bool Rev = (Rec.Strand == '-');

		AddHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev);
		++HitCount;
		}
	ProgressDone();

	Progress("%d hits, %d piles", HitCount, PileCount);

	ProgressStart("Allocate piles");
	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
		{
		TanPile &Pile = Piles[PileIndex];
		Pile.Hits = all(HitData, Pile.HitCount);
		Pile.HitCount = 0;
		}
	ProgressDone();

	ProgressStart("Assign hits to piles");
	Rewind(fInput);
	while (GetNextGFFRecord(fInput, Rec))
		{
		if (0 != strcmp(Rec.Feature, "hit"))
			continue;

		int QueryPileIndex = -1;
		int TargetPileIndex = -1;
		ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex);
		if (QueryPileIndex != TargetPileIndex)
			continue;

		char TargetLabel[128];
		int TargetStart;
		int TargetEnd;
		ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd);
		if (0 != strcmp(Rec.SeqName, TargetLabel))
			Quit("Labels don't match");

		const int QueryFrom = Rec.Start - 1;
		const int QueryTo = Rec.End - 1;
		const int TargetFrom = TargetStart - 1;
		const int TargetTo = TargetEnd - 1;
		const bool Rev = (Rec.Strand == '-');

		AssignHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev);
		}
	ProgressDone();

	fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly);
	fPyramid = (0 == PyramidFileName ? 0 : OpenStdioFile(PyramidFileName, FILEIO_MODE_WriteOnly));
	fMotif = (0 == PyramidFileName ? 0 : OpenStdioFile(MotifFileName, FILEIO_MODE_WriteOnly));

	ProgressStart("Find pyramids");
	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
		FindPyramids(PileIndex);
	int PyramidCount = PyramidIndex;
	ProgressDone();

	Progress("%d pyramids", PyramidCount);
	}
Example #10
0
void TR()
{
#if defined(DEBUG) && defined(_MSC_VER)
    _CrtSetDbgFlag(0);	// too expensive
#endif

    const char *HitFileName = RequiredValueOpt("tr");
    const char *OutFileName = RequiredValueOpt("out");
    const char *CandFileName = ValueOpt("cand");

    const char *strMinTrSpacing = ValueOpt("mintrspacing");
    const char *strMaxTrSpacing = ValueOpt("maxtrspacing");
    const char *strMinTrLength = ValueOpt("mintrlength");
    const char *strMaxTrLength = ValueOpt("minspacingratio");
    const char *strMinFam = ValueOpt("minfam");
    const char *strMinHitRatio = ValueOpt("minhitratio");
    const char *strMinDistPairs = ValueOpt("mindistpairs");

    if (0 != strMinTrSpacing)
        MIN_LENGTH_LINE = atoi(strMinTrSpacing);
    if (0 != strMaxTrSpacing)
        MAX_LENGTH_LINE = atoi(strMaxTrSpacing);
    if (0 != strMinTrLength)
        MIN_LENGTH_LTR = atoi(strMinTrLength);
    if (0 != strMaxTrLength)
        MAX_LENGTH_LTR = atoi(strMaxTrLength);
    if (0 != strMinFam)
        MIN_FAM_SIZE = atoi(strMinFam);
    if (0 != strMinHitRatio)
        MIN_HIT_LENGTH_RATIO = atoi(strMinHitRatio);
    if (0 != strMinDistPairs)
        MIN_DIST_EDGE = atoi(strMinDistPairs);

    FILE *fHit = OpenStdioFile(HitFileName, FILEIO_MODE_ReadOnly);

    ProgressStart("Index hits");
    GLIX HitGlix;
    HitGlix.Init();
    HitGlix.FromGFFFile(fHit);
    HitGlix.MakeGlobalToLocalIndex();
    ProgressDone();

    const int GlobalLength = HitGlix.GetGlobalLength();
    IIX IntervalIndex;
    IntervalIndex.Init(GlobalLength);

    ProgressStart("Find candidate TRs");
    Rewind(fHit);
    GFFRecord Rec;
    while (GetNextGFFRecord(fHit, Rec))
    {
        HitData Hit;
        GFFRecordToHit(HitGlix, Rec, Hit);
        if (IsCandLTR(Hit))
            AddCand(Hit, IntervalIndex);
    }
    ProgressDone();

    Progress("%d candidates", CandCount);

    if (0 != CandFileName)
    {
        ProgressStart("Write candidates");
        FILE *fCand = OpenStdioFile(CandFileName, FILEIO_MODE_WriteOnly);
        WriteCands(fCand, HitGlix);
        ProgressDone();
    }

    ProgressStart("Make graph");
    Rewind(fHit);
    while (GetNextGFFRecord(fHit, Rec))
    {
        HitData Hit;
        GFFRecordToHit(HitGlix, Rec, Hit);
        FindEdges(Hit, HitGlix, IntervalIndex);
    }
    fclose(fHit);
    fHit = 0;

    ProgressDone();

    Progress("%d edges", (int) Edges.size());

    ProgressStart("Find families");
    FamList Fams;
    FindConnectedComponents(Edges, Fams, MIN_FAM_SIZE);
    ProgressDone();

    Progress("%d families", (int) Fams.size());

    FILE *fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly);
    WriteOutputFile(fOut, HitGlix, Fams);
}