char *ReadAFA(const char FileName[], int *ptrSeqLength, int *ptrSeqCount) { FILE *f = OpenStdioFile(FileName); char *Seqs = ReadAFA(f, ptrSeqLength, ptrSeqCount); fclose(f); return Seqs; }
void Tanmotif2Fasta() { const char *MotifFileName = RequiredValueOpt("tanmotif2fasta"); const char *SeqFileName = RequiredValueOpt("seq"); const char *Path = ValueOpt("path"); const char *strMaxFam = ValueOpt("maxfam"); const char *Prefix = ValueOpt("prefix"); int MaxFam = DEFAULT_MAX_FAM; if (strMaxFam != 0) MaxFam = atoi(strMaxFam); if (0 == Path) Path = "."; ProgressStart("Reading seq file"); int SeqLength; const char *Seq = ReadMFA(SeqFileName, &SeqLength); ProgressDone(); Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6); ProgressStart("Read Motif file"); int MotifCount; MotifData *Motifs = ReadMotif(MotifFileName, &MotifCount); ProgressDone(); Progress("%d records", MotifCount); ProgressStart("Sorting by family"); qsort((void *) Motifs, MotifCount, sizeof(MotifData), CmpMotif); ProgressDone(); FILE *f = 0; int CurrentFamily = -1; int MemberCount = 0; for (int MotifIndex = 0; MotifIndex < MotifCount; ++MotifIndex) { const MotifData &Motif = Motifs[MotifIndex]; if (Motif.FamIndex != CurrentFamily) { if (f != 0) fclose(f); char *FastaFileName = FamFileName(Path, Motif.FamIndex); f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly); CurrentFamily = Motif.FamIndex; MemberCount = 0; } ++MemberCount; if (MemberCount > MaxFam) continue; const int From = ContigToGlobal(Motif.ContigFrom, Motif.ContigLabel); const int Length = Motif.ContigTo - Motif.ContigFrom + 1; char *Label = MotifLabel(Prefix, Motif); WriteFasta(f, Seq + From, Length, Label, false); freemem(Label); } }
void TRS2Fasta() { const char *TRSFileName = RequiredValueOpt("trs2fasta"); const char *SeqFileName = RequiredValueOpt("seq"); const char *Path = ValueOpt("path"); const char *strMaxFam = ValueOpt("maxfam"); const char *Prefix = ValueOpt("prefix"); int MaxFam = DEFAULT_MAX_FAM; if (strMaxFam != 0) MaxFam = atoi(strMaxFam); if (0 == Path) Path = "."; ProgressStart("Reading seq file"); int SeqLength; const char *Seq = ReadMFA(SeqFileName, &SeqLength); ProgressDone(); Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6); ProgressStart("Read TRS file"); int TRSCount; TRSData *TRSs = ReadTRS(TRSFileName, &TRSCount); ProgressDone(); Progress("%d records", TRSCount); ProgressStart("Sorting by family"); qsort((void *) TRSs, TRSCount, sizeof(TRSData), CmpTRS); ProgressDone(); FILE *f = 0; int CurrentFamily = -1; int MemberCount = 0; for (int TRSIndex = 0; TRSIndex < TRSCount; ++TRSIndex) { const TRSData &TRS = TRSs[TRSIndex]; if (TRS.FamIndex != CurrentFamily) { if (f != 0) fclose(f); char *FastaFileName = FamFileName(Path, TRS.FamIndex, TRS.SuperFamIndex); f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly); CurrentFamily = TRS.FamIndex; MemberCount = 0; } ++MemberCount; if (MemberCount > MaxFam) continue; const int From = ContigToGlobal(TRS.ContigFrom, TRS.ContigLabel); const int Length = TRS.ContigTo - TRS.ContigFrom + 1; char *Label = TRSLabel(Prefix, TRS); WriteFasta(f, Seq + From, Length, Label, TRS.Rev); freemem(Label); } }
void AnnotEdge() { const char *InputFileName = RequiredValueOpt("annotedge"); const char *RepeatFileName = RequiredValueOpt("rep"); const char *OutputFileName = RequiredValueOpt("out"); ProgressStart("Reading repeat file"); int RepCount; RepData *Reps = ReadReps(RepeatFileName, &RepCount); ProgressDone(); Progress("%d records", RepCount); FILE *fInput = OpenStdioFile(InputFileName); FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly); ProgressStart("Transferring annotation"); GFFRecord Rec; while (GetNextGFFRecord(fInput, Rec)) { const bool Rev = (Rec.Strand == '-'); const char *Annot = MakeAnnotEdge(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev, Reps, RepCount); fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c", // 0 1 2 3 4 5 6 Rec.SeqName, // 0 Rec.Source, // 1 Rec.Feature, // 2 Rec.Start, // 3 Rec.End, // 4 Rec.Score, // 5 Rec.Strand); // 6 if (-1 == Rec.Frame) fprintf(fOutput, "\t."); else fprintf(fOutput, "\t%d", Rec.Frame); fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot); } fclose(fInput); fclose(fOutput); ProgressDone(); }
TRSData *ReadTRS(const char *FileName, int *ptrTRSCount) { FILE *f = OpenStdioFile(FileName); int TRSCount = ReadTRSPass1(f); TRSData *TRSs = all(TRSData, TRSCount); ReadTRSPass2(f, TRSs); fclose(f); *ptrTRSCount = TRSCount; return TRSs; }
MotifData *ReadMotif(const char *FileName, int *ptrMotifCount) { FILE *f = OpenStdioFile(FileName); int MotifCount = ReadMotifPass1(f); MotifData *Motifs = all(MotifData, MotifCount); ReadMotifPass2(f, Motifs); fclose(f); *ptrMotifCount = MotifCount; return Motifs; }
RepData *ReadReps(const char *FileName, int *ptrRepCount) { FILE *f = OpenStdioFile(FileName); int RepCount = ReadRepsPass1(f); RepData *Reps = all(RepData, RepCount); ReadRepsPass2(f, Reps); fclose(f); *ptrRepCount = RepCount; return Reps; }
void Tan() { // Image file annotated with from-to pile indexes // Produced by: // piler2 -trs banded_hits.gff -images mainband_images.gff const char *HitFileName = RequiredValueOpt("tan"); const char *OutFileName = RequiredValueOpt("out"); const char *PyramidFileName = ValueOpt("pyramid"); const char *MotifFileName = ValueOpt("motif"); const char *strMinHits = ValueOpt("minhits"); const char *strMaxMargin = ValueOpt("maxmargin"); const char *strMinRatio = ValueOpt("minratio"); if (0 != strMinHits) MIN_HIT_COUNT = atoi(strMinHits); if (0 != strMaxMargin) MAX_FRACT_MARGIN = atof(strMaxMargin); if (0 != strMinRatio) MIN_RATIO = atof(strMinRatio); FILE *fInput = OpenStdioFile(HitFileName); ProgressStart("Initialize piles"); GFFRecord Rec; int HitCount = 0; while (GetNextGFFRecord(fInput, Rec)) { if (0 != strcmp(Rec.Feature, "hit")) continue; int QueryPileIndex = -1; int TargetPileIndex = -1; ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex); if (QueryPileIndex != TargetPileIndex) continue; char TargetLabel[128]; int TargetStart; int TargetEnd; ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd); if (0 != strcmp(Rec.SeqName, TargetLabel)) Quit("Labels don't match"); const int QueryFrom = Rec.Start - 1; const int QueryTo = Rec.End - 1; const int TargetFrom = TargetStart - 1; const int TargetTo = TargetEnd - 1; const bool Rev = (Rec.Strand == '-'); AddHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev); ++HitCount; } ProgressDone(); Progress("%d hits, %d piles", HitCount, PileCount); ProgressStart("Allocate piles"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) { TanPile &Pile = Piles[PileIndex]; Pile.Hits = all(HitData, Pile.HitCount); Pile.HitCount = 0; } ProgressDone(); ProgressStart("Assign hits to piles"); Rewind(fInput); while (GetNextGFFRecord(fInput, Rec)) { if (0 != strcmp(Rec.Feature, "hit")) continue; int QueryPileIndex = -1; int TargetPileIndex = -1; ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex); if (QueryPileIndex != TargetPileIndex) continue; char TargetLabel[128]; int TargetStart; int TargetEnd; ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd); if (0 != strcmp(Rec.SeqName, TargetLabel)) Quit("Labels don't match"); const int QueryFrom = Rec.Start - 1; const int QueryTo = Rec.End - 1; const int TargetFrom = TargetStart - 1; const int TargetTo = TargetEnd - 1; const bool Rev = (Rec.Strand == '-'); AssignHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev); } ProgressDone(); fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly); fPyramid = (0 == PyramidFileName ? 0 : OpenStdioFile(PyramidFileName, FILEIO_MODE_WriteOnly)); fMotif = (0 == PyramidFileName ? 0 : OpenStdioFile(MotifFileName, FILEIO_MODE_WriteOnly)); ProgressStart("Find pyramids"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) FindPyramids(PileIndex); int PyramidCount = PyramidIndex; ProgressDone(); Progress("%d pyramids", PyramidCount); }
void TR() { #if defined(DEBUG) && defined(_MSC_VER) _CrtSetDbgFlag(0); // too expensive #endif const char *HitFileName = RequiredValueOpt("tr"); const char *OutFileName = RequiredValueOpt("out"); const char *CandFileName = ValueOpt("cand"); const char *strMinTrSpacing = ValueOpt("mintrspacing"); const char *strMaxTrSpacing = ValueOpt("maxtrspacing"); const char *strMinTrLength = ValueOpt("mintrlength"); const char *strMaxTrLength = ValueOpt("minspacingratio"); const char *strMinFam = ValueOpt("minfam"); const char *strMinHitRatio = ValueOpt("minhitratio"); const char *strMinDistPairs = ValueOpt("mindistpairs"); if (0 != strMinTrSpacing) MIN_LENGTH_LINE = atoi(strMinTrSpacing); if (0 != strMaxTrSpacing) MAX_LENGTH_LINE = atoi(strMaxTrSpacing); if (0 != strMinTrLength) MIN_LENGTH_LTR = atoi(strMinTrLength); if (0 != strMaxTrLength) MAX_LENGTH_LTR = atoi(strMaxTrLength); if (0 != strMinFam) MIN_FAM_SIZE = atoi(strMinFam); if (0 != strMinHitRatio) MIN_HIT_LENGTH_RATIO = atoi(strMinHitRatio); if (0 != strMinDistPairs) MIN_DIST_EDGE = atoi(strMinDistPairs); FILE *fHit = OpenStdioFile(HitFileName, FILEIO_MODE_ReadOnly); ProgressStart("Index hits"); GLIX HitGlix; HitGlix.Init(); HitGlix.FromGFFFile(fHit); HitGlix.MakeGlobalToLocalIndex(); ProgressDone(); const int GlobalLength = HitGlix.GetGlobalLength(); IIX IntervalIndex; IntervalIndex.Init(GlobalLength); ProgressStart("Find candidate TRs"); Rewind(fHit); GFFRecord Rec; while (GetNextGFFRecord(fHit, Rec)) { HitData Hit; GFFRecordToHit(HitGlix, Rec, Hit); if (IsCandLTR(Hit)) AddCand(Hit, IntervalIndex); } ProgressDone(); Progress("%d candidates", CandCount); if (0 != CandFileName) { ProgressStart("Write candidates"); FILE *fCand = OpenStdioFile(CandFileName, FILEIO_MODE_WriteOnly); WriteCands(fCand, HitGlix); ProgressDone(); } ProgressStart("Make graph"); Rewind(fHit); while (GetNextGFFRecord(fHit, Rec)) { HitData Hit; GFFRecordToHit(HitGlix, Rec, Hit); FindEdges(Hit, HitGlix, IntervalIndex); } fclose(fHit); fHit = 0; ProgressDone(); Progress("%d edges", (int) Edges.size()); ProgressStart("Find families"); FamList Fams; FindConnectedComponents(Edges, Fams, MIN_FAM_SIZE); ProgressDone(); Progress("%d families", (int) Fams.size()); FILE *fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly); WriteOutputFile(fOut, HitGlix, Fams); }