void TRS2Fasta() { const char *TRSFileName = RequiredValueOpt("trs2fasta"); const char *SeqFileName = RequiredValueOpt("seq"); const char *Path = ValueOpt("path"); const char *strMaxFam = ValueOpt("maxfam"); const char *Prefix = ValueOpt("prefix"); int MaxFam = DEFAULT_MAX_FAM; if (strMaxFam != 0) MaxFam = atoi(strMaxFam); if (0 == Path) Path = "."; ProgressStart("Reading seq file"); int SeqLength; const char *Seq = ReadMFA(SeqFileName, &SeqLength); ProgressDone(); Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6); ProgressStart("Read TRS file"); int TRSCount; TRSData *TRSs = ReadTRS(TRSFileName, &TRSCount); ProgressDone(); Progress("%d records", TRSCount); ProgressStart("Sorting by family"); qsort((void *) TRSs, TRSCount, sizeof(TRSData), CmpTRS); ProgressDone(); FILE *f = 0; int CurrentFamily = -1; int MemberCount = 0; for (int TRSIndex = 0; TRSIndex < TRSCount; ++TRSIndex) { const TRSData &TRS = TRSs[TRSIndex]; if (TRS.FamIndex != CurrentFamily) { if (f != 0) fclose(f); char *FastaFileName = FamFileName(Path, TRS.FamIndex, TRS.SuperFamIndex); f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly); CurrentFamily = TRS.FamIndex; MemberCount = 0; } ++MemberCount; if (MemberCount > MaxFam) continue; const int From = ContigToGlobal(TRS.ContigFrom, TRS.ContigLabel); const int Length = TRS.ContigTo - TRS.ContigFrom + 1; char *Label = TRSLabel(Prefix, TRS); WriteFasta(f, Seq + From, Length, Label, TRS.Rev); freemem(Label); } }
void Tanmotif2Fasta() { const char *MotifFileName = RequiredValueOpt("tanmotif2fasta"); const char *SeqFileName = RequiredValueOpt("seq"); const char *Path = ValueOpt("path"); const char *strMaxFam = ValueOpt("maxfam"); const char *Prefix = ValueOpt("prefix"); int MaxFam = DEFAULT_MAX_FAM; if (strMaxFam != 0) MaxFam = atoi(strMaxFam); if (0 == Path) Path = "."; ProgressStart("Reading seq file"); int SeqLength; const char *Seq = ReadMFA(SeqFileName, &SeqLength); ProgressDone(); Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6); ProgressStart("Read Motif file"); int MotifCount; MotifData *Motifs = ReadMotif(MotifFileName, &MotifCount); ProgressDone(); Progress("%d records", MotifCount); ProgressStart("Sorting by family"); qsort((void *) Motifs, MotifCount, sizeof(MotifData), CmpMotif); ProgressDone(); FILE *f = 0; int CurrentFamily = -1; int MemberCount = 0; for (int MotifIndex = 0; MotifIndex < MotifCount; ++MotifIndex) { const MotifData &Motif = Motifs[MotifIndex]; if (Motif.FamIndex != CurrentFamily) { if (f != 0) fclose(f); char *FastaFileName = FamFileName(Path, Motif.FamIndex); f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly); CurrentFamily = Motif.FamIndex; MemberCount = 0; } ++MemberCount; if (MemberCount > MaxFam) continue; const int From = ContigToGlobal(Motif.ContigFrom, Motif.ContigLabel); const int Length = Motif.ContigTo - Motif.ContigFrom + 1; char *Label = MotifLabel(Prefix, Motif); WriteFasta(f, Seq + From, Length, Label, false); freemem(Label); } }
void AnnotEdge() { const char *InputFileName = RequiredValueOpt("annotedge"); const char *RepeatFileName = RequiredValueOpt("rep"); const char *OutputFileName = RequiredValueOpt("out"); ProgressStart("Reading repeat file"); int RepCount; RepData *Reps = ReadReps(RepeatFileName, &RepCount); ProgressDone(); Progress("%d records", RepCount); FILE *fInput = OpenStdioFile(InputFileName); FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly); ProgressStart("Transferring annotation"); GFFRecord Rec; while (GetNextGFFRecord(fInput, Rec)) { const bool Rev = (Rec.Strand == '-'); const char *Annot = MakeAnnotEdge(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev, Reps, RepCount); fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c", // 0 1 2 3 4 5 6 Rec.SeqName, // 0 Rec.Source, // 1 Rec.Feature, // 2 Rec.Start, // 3 Rec.End, // 4 Rec.Score, // 5 Rec.Strand); // 6 if (-1 == Rec.Frame) fprintf(fOutput, "\t."); else fprintf(fOutput, "\t%d", Rec.Frame); fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot); } fclose(fInput); fclose(fOutput); ProgressDone(); }
void Tan() { // Image file annotated with from-to pile indexes // Produced by: // piler2 -trs banded_hits.gff -images mainband_images.gff const char *HitFileName = RequiredValueOpt("tan"); const char *OutFileName = RequiredValueOpt("out"); const char *PyramidFileName = ValueOpt("pyramid"); const char *MotifFileName = ValueOpt("motif"); const char *strMinHits = ValueOpt("minhits"); const char *strMaxMargin = ValueOpt("maxmargin"); const char *strMinRatio = ValueOpt("minratio"); if (0 != strMinHits) MIN_HIT_COUNT = atoi(strMinHits); if (0 != strMaxMargin) MAX_FRACT_MARGIN = atof(strMaxMargin); if (0 != strMinRatio) MIN_RATIO = atof(strMinRatio); FILE *fInput = OpenStdioFile(HitFileName); ProgressStart("Initialize piles"); GFFRecord Rec; int HitCount = 0; while (GetNextGFFRecord(fInput, Rec)) { if (0 != strcmp(Rec.Feature, "hit")) continue; int QueryPileIndex = -1; int TargetPileIndex = -1; ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex); if (QueryPileIndex != TargetPileIndex) continue; char TargetLabel[128]; int TargetStart; int TargetEnd; ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd); if (0 != strcmp(Rec.SeqName, TargetLabel)) Quit("Labels don't match"); const int QueryFrom = Rec.Start - 1; const int QueryTo = Rec.End - 1; const int TargetFrom = TargetStart - 1; const int TargetTo = TargetEnd - 1; const bool Rev = (Rec.Strand == '-'); AddHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev); ++HitCount; } ProgressDone(); Progress("%d hits, %d piles", HitCount, PileCount); ProgressStart("Allocate piles"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) { TanPile &Pile = Piles[PileIndex]; Pile.Hits = all(HitData, Pile.HitCount); Pile.HitCount = 0; } ProgressDone(); ProgressStart("Assign hits to piles"); Rewind(fInput); while (GetNextGFFRecord(fInput, Rec)) { if (0 != strcmp(Rec.Feature, "hit")) continue; int QueryPileIndex = -1; int TargetPileIndex = -1; ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex); if (QueryPileIndex != TargetPileIndex) continue; char TargetLabel[128]; int TargetStart; int TargetEnd; ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd); if (0 != strcmp(Rec.SeqName, TargetLabel)) Quit("Labels don't match"); const int QueryFrom = Rec.Start - 1; const int QueryTo = Rec.End - 1; const int TargetFrom = TargetStart - 1; const int TargetTo = TargetEnd - 1; const bool Rev = (Rec.Strand == '-'); AssignHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev); } ProgressDone(); fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly); fPyramid = (0 == PyramidFileName ? 0 : OpenStdioFile(PyramidFileName, FILEIO_MODE_WriteOnly)); fMotif = (0 == PyramidFileName ? 0 : OpenStdioFile(MotifFileName, FILEIO_MODE_WriteOnly)); ProgressStart("Find pyramids"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) FindPyramids(PileIndex); int PyramidCount = PyramidIndex; ProgressDone(); Progress("%d pyramids", PyramidCount); }
void TR() { #if defined(DEBUG) && defined(_MSC_VER) _CrtSetDbgFlag(0); // too expensive #endif const char *HitFileName = RequiredValueOpt("tr"); const char *OutFileName = RequiredValueOpt("out"); const char *CandFileName = ValueOpt("cand"); const char *strMinTrSpacing = ValueOpt("mintrspacing"); const char *strMaxTrSpacing = ValueOpt("maxtrspacing"); const char *strMinTrLength = ValueOpt("mintrlength"); const char *strMaxTrLength = ValueOpt("minspacingratio"); const char *strMinFam = ValueOpt("minfam"); const char *strMinHitRatio = ValueOpt("minhitratio"); const char *strMinDistPairs = ValueOpt("mindistpairs"); if (0 != strMinTrSpacing) MIN_LENGTH_LINE = atoi(strMinTrSpacing); if (0 != strMaxTrSpacing) MAX_LENGTH_LINE = atoi(strMaxTrSpacing); if (0 != strMinTrLength) MIN_LENGTH_LTR = atoi(strMinTrLength); if (0 != strMaxTrLength) MAX_LENGTH_LTR = atoi(strMaxTrLength); if (0 != strMinFam) MIN_FAM_SIZE = atoi(strMinFam); if (0 != strMinHitRatio) MIN_HIT_LENGTH_RATIO = atoi(strMinHitRatio); if (0 != strMinDistPairs) MIN_DIST_EDGE = atoi(strMinDistPairs); FILE *fHit = OpenStdioFile(HitFileName, FILEIO_MODE_ReadOnly); ProgressStart("Index hits"); GLIX HitGlix; HitGlix.Init(); HitGlix.FromGFFFile(fHit); HitGlix.MakeGlobalToLocalIndex(); ProgressDone(); const int GlobalLength = HitGlix.GetGlobalLength(); IIX IntervalIndex; IntervalIndex.Init(GlobalLength); ProgressStart("Find candidate TRs"); Rewind(fHit); GFFRecord Rec; while (GetNextGFFRecord(fHit, Rec)) { HitData Hit; GFFRecordToHit(HitGlix, Rec, Hit); if (IsCandLTR(Hit)) AddCand(Hit, IntervalIndex); } ProgressDone(); Progress("%d candidates", CandCount); if (0 != CandFileName) { ProgressStart("Write candidates"); FILE *fCand = OpenStdioFile(CandFileName, FILEIO_MODE_WriteOnly); WriteCands(fCand, HitGlix); ProgressDone(); } ProgressStart("Make graph"); Rewind(fHit); while (GetNextGFFRecord(fHit, Rec)) { HitData Hit; GFFRecordToHit(HitGlix, Rec, Hit); FindEdges(Hit, HitGlix, IntervalIndex); } fclose(fHit); fHit = 0; ProgressDone(); Progress("%d edges", (int) Edges.size()); ProgressStart("Find families"); FamList Fams; FindConnectedComponents(Edges, Fams, MIN_FAM_SIZE); ProgressDone(); Progress("%d families", (int) Fams.size()); FILE *fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly); WriteOutputFile(fOut, HitGlix, Fams); }
void TRS() { const char *InputFileName = RequiredValueOpt("trs"); const char *OutputFileName = ValueOpt("out"); const char *PilesFileName = ValueOpt("piles"); const char *ImagesFileName = ValueOpt("images"); const char *strMinFamSize = ValueOpt("famsize"); const char *strMaxLengthDiffPct = ValueOpt("maxlengthdiffpct"); g_paramSingleHitCoverage = !FlagOpt("multihit"); if (0 == OutputFileName && 0 == PilesFileName && 0 == ImagesFileName) Quit("No output file specified, must be at least one of -out, -piles, -images"); if (0 != strMinFamSize) g_paramMinFamSize = atoi(strMinFamSize); if (0 != strMaxLengthDiffPct) g_paramMaxLengthDiffPct = atoi(strMaxLengthDiffPct); Log("singlehit=%s famsize=%d maxlengthdiffpct=%d\n", g_paramSingleHitCoverage ? "True" : "False", g_paramMinFamSize, g_paramMaxLengthDiffPct); ProgressStart("Read hit file"); int HitCount; int SeqLength; HitData *Hits = ReadHits(InputFileName, &HitCount, &SeqLength); ProgressDone(); Progress("%d hits", HitCount); SeqLengthChunks = (SeqLength + CHUNK_LENGTH - 1)/CHUNK_LENGTH; const int BitVectorLength = (SeqLengthChunks + BITS_PER_INT - 1)/BITS_PER_INT; int *CopyCount = all(int, BitVectorLength); zero(CopyCount, int, BitVectorLength); ProgressStart("Compute copy counts"); for (int i = 0; i < HitCount; ++i) IncCopyCount(CopyCount, Hits[i]); ProgressDone(); ProgressStart("Identify piles"); PILE_INDEX_TYPE *PileIndexes = IdentifyPiles(CopyCount); ProgressDone(); Progress("%d stacks", PileCount); freemem(CopyCount); CopyCount = 0; CreatePiles(Hits, HitCount, PileIndexes); if (0 != ImagesFileName) { ProgressStart("Writing images file"); WriteImages(ImagesFileName, Hits, HitCount, PileIndexes); ProgressDone(); } freemem(Hits); Hits = 0; if (0 != PilesFileName) { ProgressStart("Writing piles file"); WritePiles(PilesFileName, Piles, PileCount); ProgressDone(); } freemem(PileIndexes); PileIndexes = 0; if (0 == OutputFileName) return; ProgressStart("Find edges"); EdgeList Edges; FindGlobalEdges(Edges, MaxImageCount); ProgressDone(); Progress("%d edges", (int) Edges.size()); ProgressStart("Find families"); FamList Fams; FindConnectedComponents(Edges, Fams, g_paramMinFamSize); AssignFamsToPiles(Fams); ProgressDone(); Progress("%d families", (int) Fams.size()); ProgressStart("Find superfamilies"); EdgeList SuperEdges; FindSuperFamEdges(Fams, SuperEdges); FamList SuperFams; FindConnectedComponents(SuperEdges, SuperFams, 1); FindSingletonSuperFams(Fams, SuperFams); AssignSuperFamsToPiles(Fams, SuperFams); ProgressDone(); Progress("%d superfamilies", (int) SuperFams.size()); ProgressStart("Write TRS output file"); WriteTRSFile(OutputFileName, Piles, PileCount); ProgressDone(); }
static void CreatePiles(const HitData *Hits, int HitCount, PILE_INDEX_TYPE *PileIndexes) { Piles = all(PileData, PileCount); zero(Piles, PileData, PileCount); for (int i = 0; i < PileCount; ++i) { Piles[i].FamIndex = -1; Piles[i].SuperFamIndex = -1; Piles[i].Rev = -1; } // Count images in stack ProgressStart("Create stacks: count images"); for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex) { const HitData &Hit = Hits[HitIndex]; int Pos = Hit.QueryFrom/CHUNK_LENGTH; PILE_INDEX_TYPE PileIndex = PileIndexes[Pos]; assert(PileIndex == PileIndexes[Hit.QueryTo/CHUNK_LENGTH]); assert(PileIndex >= 0 && PileIndex < PileCount); ++(Piles[PileIndex].ImageCount); Pos = Hit.TargetFrom/CHUNK_LENGTH; PileIndex = PileIndexes[Pos]; assert(PileIndex >= 0 && PileIndex < PileCount); assert(PileIndex == PileIndexes[Hit.TargetTo/CHUNK_LENGTH]); ++(Piles[PileIndex].ImageCount); } ProgressDone(); // Allocate memory for image list int TotalImageCount = 0; ProgressStart("Create stacks: allocate image memory"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) { PileData &Pile = Piles[PileIndex]; const int ImageCount = Pile.ImageCount; TotalImageCount += ImageCount; assert(ImageCount > 0); Pile.Images = all(PileImageData, ImageCount); } ProgressDone(); // Build image list ProgressStart("Create stacks: build image list"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) { PileData &Pile = Piles[PileIndex]; Pile.ImageCount = 0; Pile.From = -1; Pile.To = -1; } for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex) { const HitData &Hit = Hits[HitIndex]; const bool Rev = Hit.Rev; const int Length1 = Hit.QueryTo - Hit.QueryFrom; const int Length2 = Hit.TargetTo - Hit.TargetFrom; const int From1 = Hit.QueryFrom; const int From2 = Hit.TargetFrom; const int To1 = Hit.QueryTo; const int To2 = Hit.TargetTo; const int Pos1 = From1/CHUNK_LENGTH; const int Pos2 = From2/CHUNK_LENGTH; PILE_INDEX_TYPE PileIndex1 = PileIndexes[Pos1]; PILE_INDEX_TYPE PileIndex2 = PileIndexes[Pos2]; assert(PileIndex1 == PileIndexes[(From1 + Length1 - 1)/CHUNK_LENGTH]); assert(PileIndex1 >= 0 && PileIndex1 < PileCount); assert(PileIndex2 == PileIndexes[(From2 + Length2 - 1)/CHUNK_LENGTH]); assert(PileIndex2 >= 0 && PileIndex2 < PileCount); PileData &Pile1 = Piles[PileIndex1]; PileImageData &Image1 = Pile1.Images[Pile1.ImageCount++]; Image1.SILength = Length2; Image1.SIPile = PileIndex2; Image1.SIRev = Rev; PileData &Pile2 = Piles[PileIndex2]; PileImageData &Image2 = Pile2.Images[Pile2.ImageCount++]; Image2.SILength = Length1; Image2.SIPile = PileIndex1; Image2.SIRev = Rev; if (Pile1.From == -1 || From1 < Pile1.From) Pile1.From = From1; if (Pile1.To == -1 || To1 > Pile1.To) Pile1.To = To1; if (Pile2.From == -1 || From2 < Pile2.From) Pile2.From = From2; if (Pile2.To == -1 || To2 > Pile2.To) Pile2.To = To2; if (Pile1.ImageCount > MaxImageCount) MaxImageCount = Pile1.ImageCount; if (Pile2.ImageCount > MaxImageCount) MaxImageCount = Pile2.ImageCount; } ProgressDone(); }
LRESULT CPIMDIFrameWndEx::OnProgressDone(WPARAM wParam, LPARAM lParam) { ProgressDone(); return 0; }