static int ReadMotifPass2(FILE *f, MotifData *Motifs) { rewind(f); GFFLineNr = 0; int MotifCount = 0; GFFRecord Rec; while (GetNextGFFRecord(f, Rec)) { if (0 != strcmp(Rec.Feature, "tandemmotif")) continue; if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End) Warning("GFF line %d: invalid start %d / end %d", GFFLineNr, Rec.Start, Rec.End); int FamIndex = GetFam(Rec); MotifData &Motif = Motifs[MotifCount]; const int Length = Rec.End - Rec.Start + 1; Motif.ContigLabel = strsave(Rec.SeqName); Motif.ContigFrom = Rec.Start - 1; Motif.ContigTo = Motif.ContigFrom + Length - 1; Motif.FamIndex = FamIndex; ++MotifCount; } return MotifCount; }
void GFFSet::FromFile(FILE *f) { Free(); GFFRecord Rec; while (GetNextGFFRecord(f, Rec)) { SaveGFFStrings(Rec); Add(Rec); } }
static int ReadTRSPass1(FILE *f) { GFFLineNr = 0; int TRSCount = 0; GFFRecord Rec; while (GetNextGFFRecord(f, Rec)) { if (0 != strcmp(Rec.Feature, "trs")) continue; if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End) Warning("GFF line %d: invalid start %d / end %d", GFFLineNr, Rec.Start, Rec.End); ++TRSCount; } return TRSCount; }
static int ReadTRSPass2(FILE *f, TRSData *TRSs) { rewind(f); GFFLineNr = 0; int TRSCount = 0; GFFRecord Rec; while (GetNextGFFRecord(f, Rec)) { if (0 != strcmp(Rec.Feature, "trs")) continue; if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End) Warning("GFF line %d: invalid start %d / end %d", GFFLineNr, Rec.Start, Rec.End); static char *Fam = GetFam(Rec); int FamIndex = 0; int SuperFamIndex = 0; int n = sscanf(Fam, "%d.%d", &FamIndex, &SuperFamIndex); if (n == 1) SuperFamIndex = -1; else if (n != 2) Quit("Invalid Family %s", Fam); TRSData &TRS = TRSs[TRSCount]; const int Length = Rec.End - Rec.Start + 1; TRS.ContigLabel = strsave(Rec.SeqName); TRS.ContigFrom = Rec.Start - 1; TRS.ContigTo = TRS.ContigFrom + Length - 1; TRS.FamIndex = FamIndex; TRS.SuperFamIndex = SuperFamIndex; if (Rec.Strand == '+') TRS.Rev = false; else if (Rec.Strand == '-') TRS.Rev = true; else Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand); ++TRSCount; } return TRSCount; }
void AnnotEdge() { const char *InputFileName = RequiredValueOpt("annotedge"); const char *RepeatFileName = RequiredValueOpt("rep"); const char *OutputFileName = RequiredValueOpt("out"); ProgressStart("Reading repeat file"); int RepCount; RepData *Reps = ReadReps(RepeatFileName, &RepCount); ProgressDone(); Progress("%d records", RepCount); FILE *fInput = OpenStdioFile(InputFileName); FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly); ProgressStart("Transferring annotation"); GFFRecord Rec; while (GetNextGFFRecord(fInput, Rec)) { const bool Rev = (Rec.Strand == '-'); const char *Annot = MakeAnnotEdge(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev, Reps, RepCount); fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c", // 0 1 2 3 4 5 6 Rec.SeqName, // 0 Rec.Source, // 1 Rec.Feature, // 2 Rec.Start, // 3 Rec.End, // 4 Rec.Score, // 5 Rec.Strand); // 6 if (-1 == Rec.Frame) fprintf(fOutput, "\t."); else fprintf(fOutput, "\t%d", Rec.Frame); fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot); } fclose(fInput); fclose(fOutput); ProgressDone(); }
int GLIX::FromGFFFile(FILE *f) { int RecordCount = 0; GFFRecord Rec; while (GetNextGFFRecord(f, Rec)) { ++RecordCount; Add(Rec.SeqName, Rec.End - 1); if (HasTargetAttrs(Rec.Attrs)) { char TargetName[MAX_GFF_FEATURE_LENGTH+1]; int Start; int End; ParseTargetAttrs(Rec.Attrs, TargetName, sizeof(TargetName), &Start, &End); Add(TargetName, End - 1); } } AssignOffsets(); return RecordCount; }
static int ReadRepsPass2(FILE *f, RepData *Reps) { rewind(f); GFFLineNr = 0; int RepCount = 0; GFFRecord Rec; while (GetNextGFFRecord(f, Rec)) { if (0 != strcmp(Rec.Feature, "repeat")) continue; static char *Repeat = GetRepeat(Rec); RepData &Rep = Reps[RepCount]; ParseRepeat(Repeat, Rep); if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End) Warning("GFF line %d: invalid start %d / end %d", GFFLineNr, Rec.Start, Rec.End); const int Length = Rec.End - Rec.Start + 1; Rep.ContigLabel = strsave(Rec.SeqName); Rep.ContigFrom = Rec.Start - 1; Rep.ContigTo = Rep.ContigFrom + Length - 1; if (Rec.Strand == '+') Rep.Rev = false; else if (Rec.Strand == '-') Rep.Rev = true; else Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand); ++RepCount; } return RepCount; }
static int ReadRepsPass1(FILE *f) { GFFLineNr = 0; int RepCount = 0; GFFRecord Rec; while (GetNextGFFRecord(f, Rec)) { if (0 != strcmp(Rec.Feature, "repeat")) { static bool WarningIssued = false; if (!WarningIssued) { Warning("GFF record feature '%s' is not a repeat", Rec.Feature); WarningIssued = true; } continue; } if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End) Warning("GFF line %d: invalid start %d / end %d", GFFLineNr, Rec.Start, Rec.End); ++RepCount; } return RepCount; }
void Tan() { // Image file annotated with from-to pile indexes // Produced by: // piler2 -trs banded_hits.gff -images mainband_images.gff const char *HitFileName = RequiredValueOpt("tan"); const char *OutFileName = RequiredValueOpt("out"); const char *PyramidFileName = ValueOpt("pyramid"); const char *MotifFileName = ValueOpt("motif"); const char *strMinHits = ValueOpt("minhits"); const char *strMaxMargin = ValueOpt("maxmargin"); const char *strMinRatio = ValueOpt("minratio"); if (0 != strMinHits) MIN_HIT_COUNT = atoi(strMinHits); if (0 != strMaxMargin) MAX_FRACT_MARGIN = atof(strMaxMargin); if (0 != strMinRatio) MIN_RATIO = atof(strMinRatio); FILE *fInput = OpenStdioFile(HitFileName); ProgressStart("Initialize piles"); GFFRecord Rec; int HitCount = 0; while (GetNextGFFRecord(fInput, Rec)) { if (0 != strcmp(Rec.Feature, "hit")) continue; int QueryPileIndex = -1; int TargetPileIndex = -1; ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex); if (QueryPileIndex != TargetPileIndex) continue; char TargetLabel[128]; int TargetStart; int TargetEnd; ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd); if (0 != strcmp(Rec.SeqName, TargetLabel)) Quit("Labels don't match"); const int QueryFrom = Rec.Start - 1; const int QueryTo = Rec.End - 1; const int TargetFrom = TargetStart - 1; const int TargetTo = TargetEnd - 1; const bool Rev = (Rec.Strand == '-'); AddHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev); ++HitCount; } ProgressDone(); Progress("%d hits, %d piles", HitCount, PileCount); ProgressStart("Allocate piles"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) { TanPile &Pile = Piles[PileIndex]; Pile.Hits = all(HitData, Pile.HitCount); Pile.HitCount = 0; } ProgressDone(); ProgressStart("Assign hits to piles"); Rewind(fInput); while (GetNextGFFRecord(fInput, Rec)) { if (0 != strcmp(Rec.Feature, "hit")) continue; int QueryPileIndex = -1; int TargetPileIndex = -1; ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex); if (QueryPileIndex != TargetPileIndex) continue; char TargetLabel[128]; int TargetStart; int TargetEnd; ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd); if (0 != strcmp(Rec.SeqName, TargetLabel)) Quit("Labels don't match"); const int QueryFrom = Rec.Start - 1; const int QueryTo = Rec.End - 1; const int TargetFrom = TargetStart - 1; const int TargetTo = TargetEnd - 1; const bool Rev = (Rec.Strand == '-'); AssignHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev); } ProgressDone(); fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly); fPyramid = (0 == PyramidFileName ? 0 : OpenStdioFile(PyramidFileName, FILEIO_MODE_WriteOnly)); fMotif = (0 == PyramidFileName ? 0 : OpenStdioFile(MotifFileName, FILEIO_MODE_WriteOnly)); ProgressStart("Find pyramids"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) FindPyramids(PileIndex); int PyramidCount = PyramidIndex; ProgressDone(); Progress("%d pyramids", PyramidCount); }
void TR() { #if defined(DEBUG) && defined(_MSC_VER) _CrtSetDbgFlag(0); // too expensive #endif const char *HitFileName = RequiredValueOpt("tr"); const char *OutFileName = RequiredValueOpt("out"); const char *CandFileName = ValueOpt("cand"); const char *strMinTrSpacing = ValueOpt("mintrspacing"); const char *strMaxTrSpacing = ValueOpt("maxtrspacing"); const char *strMinTrLength = ValueOpt("mintrlength"); const char *strMaxTrLength = ValueOpt("minspacingratio"); const char *strMinFam = ValueOpt("minfam"); const char *strMinHitRatio = ValueOpt("minhitratio"); const char *strMinDistPairs = ValueOpt("mindistpairs"); if (0 != strMinTrSpacing) MIN_LENGTH_LINE = atoi(strMinTrSpacing); if (0 != strMaxTrSpacing) MAX_LENGTH_LINE = atoi(strMaxTrSpacing); if (0 != strMinTrLength) MIN_LENGTH_LTR = atoi(strMinTrLength); if (0 != strMaxTrLength) MAX_LENGTH_LTR = atoi(strMaxTrLength); if (0 != strMinFam) MIN_FAM_SIZE = atoi(strMinFam); if (0 != strMinHitRatio) MIN_HIT_LENGTH_RATIO = atoi(strMinHitRatio); if (0 != strMinDistPairs) MIN_DIST_EDGE = atoi(strMinDistPairs); FILE *fHit = OpenStdioFile(HitFileName, FILEIO_MODE_ReadOnly); ProgressStart("Index hits"); GLIX HitGlix; HitGlix.Init(); HitGlix.FromGFFFile(fHit); HitGlix.MakeGlobalToLocalIndex(); ProgressDone(); const int GlobalLength = HitGlix.GetGlobalLength(); IIX IntervalIndex; IntervalIndex.Init(GlobalLength); ProgressStart("Find candidate TRs"); Rewind(fHit); GFFRecord Rec; while (GetNextGFFRecord(fHit, Rec)) { HitData Hit; GFFRecordToHit(HitGlix, Rec, Hit); if (IsCandLTR(Hit)) AddCand(Hit, IntervalIndex); } ProgressDone(); Progress("%d candidates", CandCount); if (0 != CandFileName) { ProgressStart("Write candidates"); FILE *fCand = OpenStdioFile(CandFileName, FILEIO_MODE_WriteOnly); WriteCands(fCand, HitGlix); ProgressDone(); } ProgressStart("Make graph"); Rewind(fHit); while (GetNextGFFRecord(fHit, Rec)) { HitData Hit; GFFRecordToHit(HitGlix, Rec, Hit); FindEdges(Hit, HitGlix, IntervalIndex); } fclose(fHit); fHit = 0; ProgressDone(); Progress("%d edges", (int) Edges.size()); ProgressStart("Find families"); FamList Fams; FindConnectedComponents(Edges, Fams, MIN_FAM_SIZE); ProgressDone(); Progress("%d families", (int) Fams.size()); FILE *fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly); WriteOutputFile(fOut, HitGlix, Fams); }