예제 #1
0
char *ReadAFA(const char FileName[], int *ptrSeqLength, int *ptrSeqCount)
	{
	FILE *f = OpenStdioFile(FileName);
	char *Seqs = ReadAFA(f, ptrSeqLength, ptrSeqCount);
	fclose(f);
	return Seqs;
	}
예제 #2
0
void Tanmotif2Fasta()
	{
	const char *MotifFileName = RequiredValueOpt("tanmotif2fasta");
	const char *SeqFileName = RequiredValueOpt("seq");
	const char *Path = ValueOpt("path");
	const char *strMaxFam = ValueOpt("maxfam");
	const char *Prefix = ValueOpt("prefix");

	int MaxFam = DEFAULT_MAX_FAM;
	if (strMaxFam != 0)
		MaxFam = atoi(strMaxFam);

	if (0 == Path)
		Path = ".";

	ProgressStart("Reading seq file");
	int SeqLength;
	const char *Seq = ReadMFA(SeqFileName, &SeqLength);
	ProgressDone();

	Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6);

	ProgressStart("Read Motif file");
	int MotifCount;
	MotifData *Motifs = ReadMotif(MotifFileName, &MotifCount);
	ProgressDone();

	Progress("%d records", MotifCount);

	ProgressStart("Sorting by family");
	qsort((void *) Motifs, MotifCount, sizeof(MotifData), CmpMotif);
	ProgressDone();

	FILE *f = 0;
	int CurrentFamily = -1;
	int MemberCount = 0;
	for (int MotifIndex = 0; MotifIndex < MotifCount; ++MotifIndex)
		{
		const MotifData &Motif = Motifs[MotifIndex];
		if (Motif.FamIndex != CurrentFamily)
			{
			if (f != 0)
				fclose(f);
			char *FastaFileName = FamFileName(Path, Motif.FamIndex);
			f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly);
			CurrentFamily = Motif.FamIndex;
			MemberCount = 0;
			}
		++MemberCount;
		if (MemberCount > MaxFam)
			continue;
		const int From = ContigToGlobal(Motif.ContigFrom, Motif.ContigLabel);
		const int Length = Motif.ContigTo - Motif.ContigFrom + 1;
		char *Label = MotifLabel(Prefix, Motif);
		WriteFasta(f, Seq + From, Length, Label, false);
		freemem(Label);
		}
	}
예제 #3
0
void TRS2Fasta()
	{
	const char *TRSFileName = RequiredValueOpt("trs2fasta");
	const char *SeqFileName = RequiredValueOpt("seq");
	const char *Path = ValueOpt("path");
	const char *strMaxFam = ValueOpt("maxfam");
	const char *Prefix = ValueOpt("prefix");

	int MaxFam = DEFAULT_MAX_FAM;
	if (strMaxFam != 0)
		MaxFam = atoi(strMaxFam);

	if (0 == Path)
		Path = ".";

	ProgressStart("Reading seq file");
	int SeqLength;
	const char *Seq = ReadMFA(SeqFileName, &SeqLength);
	ProgressDone();

	Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6);

	ProgressStart("Read TRS file");
	int TRSCount;
	TRSData *TRSs = ReadTRS(TRSFileName, &TRSCount);
	ProgressDone();

	Progress("%d records", TRSCount);

	ProgressStart("Sorting by family");
	qsort((void *) TRSs, TRSCount, sizeof(TRSData), CmpTRS);
	ProgressDone();

	FILE *f = 0;
	int CurrentFamily = -1;
	int MemberCount = 0;
	for (int TRSIndex = 0; TRSIndex < TRSCount; ++TRSIndex)
		{
		const TRSData &TRS = TRSs[TRSIndex];
		if (TRS.FamIndex != CurrentFamily)
			{
			if (f != 0)
				fclose(f);
			char *FastaFileName = FamFileName(Path, TRS.FamIndex, TRS.SuperFamIndex);
			f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly);
			CurrentFamily = TRS.FamIndex;
			MemberCount = 0;
			}
		++MemberCount;
		if (MemberCount > MaxFam)
			continue;
		const int From = ContigToGlobal(TRS.ContigFrom, TRS.ContigLabel);
		const int Length = TRS.ContigTo - TRS.ContigFrom + 1;
		char *Label = TRSLabel(Prefix, TRS);
		WriteFasta(f, Seq + From, Length, Label, TRS.Rev);
		freemem(Label);
		}
	}
예제 #4
0
void AnnotEdge()
	{
	const char *InputFileName = RequiredValueOpt("annotedge");
	const char *RepeatFileName = RequiredValueOpt("rep");
	const char *OutputFileName = RequiredValueOpt("out");

	ProgressStart("Reading repeat file");
	int RepCount;
	RepData *Reps = ReadReps(RepeatFileName, &RepCount);
	ProgressDone();

	Progress("%d records", RepCount);

	FILE *fInput = OpenStdioFile(InputFileName);
	FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly);

	ProgressStart("Transferring annotation");
	GFFRecord Rec;
	while (GetNextGFFRecord(fInput, Rec))
		{
		const bool Rev = (Rec.Strand == '-');
		const char *Annot = MakeAnnotEdge(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev,
		  Reps, RepCount);

		fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c",
		//                0   1   2   3   4   5     6
		  Rec.SeqName,	// 0
		  Rec.Source,	// 1
		  Rec.Feature,	// 2
		  Rec.Start,	// 3
		  Rec.End,		// 4
		  Rec.Score,	// 5
		  Rec.Strand);	// 6

		if (-1 == Rec.Frame)
			fprintf(fOutput, "\t.");
		else
			fprintf(fOutput, "\t%d", Rec.Frame);

		fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot);
		}
	fclose(fInput);
	fclose(fOutput);
	ProgressDone();
	}
예제 #5
0
TRSData *ReadTRS(const char *FileName, int *ptrTRSCount)
	{
	FILE *f = OpenStdioFile(FileName);

	int TRSCount = ReadTRSPass1(f);
	TRSData *TRSs = all(TRSData, TRSCount);
	ReadTRSPass2(f, TRSs);
	fclose(f);

	*ptrTRSCount = TRSCount;
	return TRSs;
	}
예제 #6
0
MotifData *ReadMotif(const char *FileName, int *ptrMotifCount)
	{
	FILE *f = OpenStdioFile(FileName);

	int MotifCount = ReadMotifPass1(f);
	MotifData *Motifs = all(MotifData, MotifCount);
	ReadMotifPass2(f, Motifs);
	fclose(f);

	*ptrMotifCount = MotifCount;
	return Motifs;
	}
예제 #7
0
RepData *ReadReps(const char *FileName, int *ptrRepCount)
	{
	FILE *f = OpenStdioFile(FileName);

	int RepCount = ReadRepsPass1(f);
	RepData *Reps = all(RepData, RepCount);
	ReadRepsPass2(f, Reps);
	fclose(f);

	*ptrRepCount = RepCount;
	return Reps;
	}
예제 #8
0
void Tan()
	{
// Image file annotated with from-to pile indexes
// Produced by:
//		piler2 -trs banded_hits.gff -images mainband_images.gff
	const char *HitFileName = RequiredValueOpt("tan");
	const char *OutFileName = RequiredValueOpt("out");
	const char *PyramidFileName = ValueOpt("pyramid");
	const char *MotifFileName = ValueOpt("motif");
	const char *strMinHits = ValueOpt("minhits");
	const char *strMaxMargin = ValueOpt("maxmargin");
	const char *strMinRatio = ValueOpt("minratio");

	if (0 != strMinHits)
		MIN_HIT_COUNT = atoi(strMinHits);
	if (0 != strMaxMargin)
		MAX_FRACT_MARGIN = atof(strMaxMargin);
	if (0 != strMinRatio)
		MIN_RATIO = atof(strMinRatio);

	FILE *fInput = OpenStdioFile(HitFileName);

	ProgressStart("Initialize piles");
	GFFRecord Rec;
	int HitCount = 0;
	while (GetNextGFFRecord(fInput, Rec))
		{
		if (0 != strcmp(Rec.Feature, "hit"))
			continue;

		int QueryPileIndex = -1;
		int TargetPileIndex = -1;
		ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex);
		if (QueryPileIndex != TargetPileIndex)
			continue;

		char TargetLabel[128];
		int TargetStart;
		int TargetEnd;
		ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd);
		if (0 != strcmp(Rec.SeqName, TargetLabel))
			Quit("Labels don't match");

		const int QueryFrom = Rec.Start - 1;
		const int QueryTo = Rec.End - 1;
		const int TargetFrom = TargetStart - 1;
		const int TargetTo = TargetEnd - 1;
		const bool Rev = (Rec.Strand == '-');

		AddHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev);
		++HitCount;
		}
	ProgressDone();

	Progress("%d hits, %d piles", HitCount, PileCount);

	ProgressStart("Allocate piles");
	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
		{
		TanPile &Pile = Piles[PileIndex];
		Pile.Hits = all(HitData, Pile.HitCount);
		Pile.HitCount = 0;
		}
	ProgressDone();

	ProgressStart("Assign hits to piles");
	Rewind(fInput);
	while (GetNextGFFRecord(fInput, Rec))
		{
		if (0 != strcmp(Rec.Feature, "hit"))
			continue;

		int QueryPileIndex = -1;
		int TargetPileIndex = -1;
		ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex);
		if (QueryPileIndex != TargetPileIndex)
			continue;

		char TargetLabel[128];
		int TargetStart;
		int TargetEnd;
		ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd);
		if (0 != strcmp(Rec.SeqName, TargetLabel))
			Quit("Labels don't match");

		const int QueryFrom = Rec.Start - 1;
		const int QueryTo = Rec.End - 1;
		const int TargetFrom = TargetStart - 1;
		const int TargetTo = TargetEnd - 1;
		const bool Rev = (Rec.Strand == '-');

		AssignHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev);
		}
	ProgressDone();

	fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly);
	fPyramid = (0 == PyramidFileName ? 0 : OpenStdioFile(PyramidFileName, FILEIO_MODE_WriteOnly));
	fMotif = (0 == PyramidFileName ? 0 : OpenStdioFile(MotifFileName, FILEIO_MODE_WriteOnly));

	ProgressStart("Find pyramids");
	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
		FindPyramids(PileIndex);
	int PyramidCount = PyramidIndex;
	ProgressDone();

	Progress("%d pyramids", PyramidCount);
	}
예제 #9
0
void TR()
{
#if defined(DEBUG) && defined(_MSC_VER)
    _CrtSetDbgFlag(0);	// too expensive
#endif

    const char *HitFileName = RequiredValueOpt("tr");
    const char *OutFileName = RequiredValueOpt("out");
    const char *CandFileName = ValueOpt("cand");

    const char *strMinTrSpacing = ValueOpt("mintrspacing");
    const char *strMaxTrSpacing = ValueOpt("maxtrspacing");
    const char *strMinTrLength = ValueOpt("mintrlength");
    const char *strMaxTrLength = ValueOpt("minspacingratio");
    const char *strMinFam = ValueOpt("minfam");
    const char *strMinHitRatio = ValueOpt("minhitratio");
    const char *strMinDistPairs = ValueOpt("mindistpairs");

    if (0 != strMinTrSpacing)
        MIN_LENGTH_LINE = atoi(strMinTrSpacing);
    if (0 != strMaxTrSpacing)
        MAX_LENGTH_LINE = atoi(strMaxTrSpacing);
    if (0 != strMinTrLength)
        MIN_LENGTH_LTR = atoi(strMinTrLength);
    if (0 != strMaxTrLength)
        MAX_LENGTH_LTR = atoi(strMaxTrLength);
    if (0 != strMinFam)
        MIN_FAM_SIZE = atoi(strMinFam);
    if (0 != strMinHitRatio)
        MIN_HIT_LENGTH_RATIO = atoi(strMinHitRatio);
    if (0 != strMinDistPairs)
        MIN_DIST_EDGE = atoi(strMinDistPairs);

    FILE *fHit = OpenStdioFile(HitFileName, FILEIO_MODE_ReadOnly);

    ProgressStart("Index hits");
    GLIX HitGlix;
    HitGlix.Init();
    HitGlix.FromGFFFile(fHit);
    HitGlix.MakeGlobalToLocalIndex();
    ProgressDone();

    const int GlobalLength = HitGlix.GetGlobalLength();
    IIX IntervalIndex;
    IntervalIndex.Init(GlobalLength);

    ProgressStart("Find candidate TRs");
    Rewind(fHit);
    GFFRecord Rec;
    while (GetNextGFFRecord(fHit, Rec))
    {
        HitData Hit;
        GFFRecordToHit(HitGlix, Rec, Hit);
        if (IsCandLTR(Hit))
            AddCand(Hit, IntervalIndex);
    }
    ProgressDone();

    Progress("%d candidates", CandCount);

    if (0 != CandFileName)
    {
        ProgressStart("Write candidates");
        FILE *fCand = OpenStdioFile(CandFileName, FILEIO_MODE_WriteOnly);
        WriteCands(fCand, HitGlix);
        ProgressDone();
    }

    ProgressStart("Make graph");
    Rewind(fHit);
    while (GetNextGFFRecord(fHit, Rec))
    {
        HitData Hit;
        GFFRecordToHit(HitGlix, Rec, Hit);
        FindEdges(Hit, HitGlix, IntervalIndex);
    }
    fclose(fHit);
    fHit = 0;

    ProgressDone();

    Progress("%d edges", (int) Edges.size());

    ProgressStart("Find families");
    FamList Fams;
    FindConnectedComponents(Edges, Fams, MIN_FAM_SIZE);
    ProgressDone();

    Progress("%d families", (int) Fams.size());

    FILE *fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly);
    WriteOutputFile(fOut, HitGlix, Fams);
}