void TRS2Fasta() { const char *TRSFileName = RequiredValueOpt("trs2fasta"); const char *SeqFileName = RequiredValueOpt("seq"); const char *Path = ValueOpt("path"); const char *strMaxFam = ValueOpt("maxfam"); const char *Prefix = ValueOpt("prefix"); int MaxFam = DEFAULT_MAX_FAM; if (strMaxFam != 0) MaxFam = atoi(strMaxFam); if (0 == Path) Path = "."; ProgressStart("Reading seq file"); int SeqLength; const char *Seq = ReadMFA(SeqFileName, &SeqLength); ProgressDone(); Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6); ProgressStart("Read TRS file"); int TRSCount; TRSData *TRSs = ReadTRS(TRSFileName, &TRSCount); ProgressDone(); Progress("%d records", TRSCount); ProgressStart("Sorting by family"); qsort((void *) TRSs, TRSCount, sizeof(TRSData), CmpTRS); ProgressDone(); FILE *f = 0; int CurrentFamily = -1; int MemberCount = 0; for (int TRSIndex = 0; TRSIndex < TRSCount; ++TRSIndex) { const TRSData &TRS = TRSs[TRSIndex]; if (TRS.FamIndex != CurrentFamily) { if (f != 0) fclose(f); char *FastaFileName = FamFileName(Path, TRS.FamIndex, TRS.SuperFamIndex); f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly); CurrentFamily = TRS.FamIndex; MemberCount = 0; } ++MemberCount; if (MemberCount > MaxFam) continue; const int From = ContigToGlobal(TRS.ContigFrom, TRS.ContigLabel); const int Length = TRS.ContigTo - TRS.ContigFrom + 1; char *Label = TRSLabel(Prefix, TRS); WriteFasta(f, Seq + From, Length, Label, TRS.Rev); freemem(Label); } }
void Tanmotif2Fasta() { const char *MotifFileName = RequiredValueOpt("tanmotif2fasta"); const char *SeqFileName = RequiredValueOpt("seq"); const char *Path = ValueOpt("path"); const char *strMaxFam = ValueOpt("maxfam"); const char *Prefix = ValueOpt("prefix"); int MaxFam = DEFAULT_MAX_FAM; if (strMaxFam != 0) MaxFam = atoi(strMaxFam); if (0 == Path) Path = "."; ProgressStart("Reading seq file"); int SeqLength; const char *Seq = ReadMFA(SeqFileName, &SeqLength); ProgressDone(); Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6); ProgressStart("Read Motif file"); int MotifCount; MotifData *Motifs = ReadMotif(MotifFileName, &MotifCount); ProgressDone(); Progress("%d records", MotifCount); ProgressStart("Sorting by family"); qsort((void *) Motifs, MotifCount, sizeof(MotifData), CmpMotif); ProgressDone(); FILE *f = 0; int CurrentFamily = -1; int MemberCount = 0; for (int MotifIndex = 0; MotifIndex < MotifCount; ++MotifIndex) { const MotifData &Motif = Motifs[MotifIndex]; if (Motif.FamIndex != CurrentFamily) { if (f != 0) fclose(f); char *FastaFileName = FamFileName(Path, Motif.FamIndex); f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly); CurrentFamily = Motif.FamIndex; MemberCount = 0; } ++MemberCount; if (MemberCount > MaxFam) continue; const int From = ContigToGlobal(Motif.ContigFrom, Motif.ContigLabel); const int Length = Motif.ContigTo - Motif.ContigFrom + 1; char *Label = MotifLabel(Prefix, Motif); WriteFasta(f, Seq + From, Length, Label, false); freemem(Label); } }
hlVoid ExtractItemStartCallback(HLDirectoryItem *pItem) { #if 0 PyEval_RestoreThread(g_extract_save); if( pStartExtractFunc == Py_None ) { if(!bSilent) { if(hlItemGetType(pItem) == HL_ITEM_FILE) { PySys_WriteStdout(" Extracting %s: ", hlItemGetName(pItem)); ProgressStart(); } else { PySys_WriteStdout(" Extracting %s:\n", hlItemGetName(pItem)); } } } else { PyEval_CallFunction(pStartExtractFunc, "si", hlItemGetName(pItem), hlItemGetType(pItem)); } g_extract_save = PyEval_SaveThread(); #endif // 0 }
void AnnotEdge() { const char *InputFileName = RequiredValueOpt("annotedge"); const char *RepeatFileName = RequiredValueOpt("rep"); const char *OutputFileName = RequiredValueOpt("out"); ProgressStart("Reading repeat file"); int RepCount; RepData *Reps = ReadReps(RepeatFileName, &RepCount); ProgressDone(); Progress("%d records", RepCount); FILE *fInput = OpenStdioFile(InputFileName); FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly); ProgressStart("Transferring annotation"); GFFRecord Rec; while (GetNextGFFRecord(fInput, Rec)) { const bool Rev = (Rec.Strand == '-'); const char *Annot = MakeAnnotEdge(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev, Reps, RepCount); fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c", // 0 1 2 3 4 5 6 Rec.SeqName, // 0 Rec.Source, // 1 Rec.Feature, // 2 Rec.Start, // 3 Rec.End, // 4 Rec.Score, // 5 Rec.Strand); // 6 if (-1 == Rec.Frame) fprintf(fOutput, "\t."); else fprintf(fOutput, "\t%d", Rec.Frame); fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot); } fclose(fInput); fclose(fOutput); ProgressDone(); }
hlVoid ExtractItemStartCallback(HLDirectoryItem *pItem) { if(!bSilent) { if(hlItemGetType(pItem) == HL_ITEM_FILE) { printf(" Extracting %s: ", hlItemGetName(pItem)); ProgressStart(); } else { Print(FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY, " Extracting %s:\n", hlItemGetName(pItem)); } } }
void Tan() { // Image file annotated with from-to pile indexes // Produced by: // piler2 -trs banded_hits.gff -images mainband_images.gff const char *HitFileName = RequiredValueOpt("tan"); const char *OutFileName = RequiredValueOpt("out"); const char *PyramidFileName = ValueOpt("pyramid"); const char *MotifFileName = ValueOpt("motif"); const char *strMinHits = ValueOpt("minhits"); const char *strMaxMargin = ValueOpt("maxmargin"); const char *strMinRatio = ValueOpt("minratio"); if (0 != strMinHits) MIN_HIT_COUNT = atoi(strMinHits); if (0 != strMaxMargin) MAX_FRACT_MARGIN = atof(strMaxMargin); if (0 != strMinRatio) MIN_RATIO = atof(strMinRatio); FILE *fInput = OpenStdioFile(HitFileName); ProgressStart("Initialize piles"); GFFRecord Rec; int HitCount = 0; while (GetNextGFFRecord(fInput, Rec)) { if (0 != strcmp(Rec.Feature, "hit")) continue; int QueryPileIndex = -1; int TargetPileIndex = -1; ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex); if (QueryPileIndex != TargetPileIndex) continue; char TargetLabel[128]; int TargetStart; int TargetEnd; ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd); if (0 != strcmp(Rec.SeqName, TargetLabel)) Quit("Labels don't match"); const int QueryFrom = Rec.Start - 1; const int QueryTo = Rec.End - 1; const int TargetFrom = TargetStart - 1; const int TargetTo = TargetEnd - 1; const bool Rev = (Rec.Strand == '-'); AddHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev); ++HitCount; } ProgressDone(); Progress("%d hits, %d piles", HitCount, PileCount); ProgressStart("Allocate piles"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) { TanPile &Pile = Piles[PileIndex]; Pile.Hits = all(HitData, Pile.HitCount); Pile.HitCount = 0; } ProgressDone(); ProgressStart("Assign hits to piles"); Rewind(fInput); while (GetNextGFFRecord(fInput, Rec)) { if (0 != strcmp(Rec.Feature, "hit")) continue; int QueryPileIndex = -1; int TargetPileIndex = -1; ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex); if (QueryPileIndex != TargetPileIndex) continue; char TargetLabel[128]; int TargetStart; int TargetEnd; ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd); if (0 != strcmp(Rec.SeqName, TargetLabel)) Quit("Labels don't match"); const int QueryFrom = Rec.Start - 1; const int QueryTo = Rec.End - 1; const int TargetFrom = TargetStart - 1; const int TargetTo = TargetEnd - 1; const bool Rev = (Rec.Strand == '-'); AssignHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev); } ProgressDone(); fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly); fPyramid = (0 == PyramidFileName ? 0 : OpenStdioFile(PyramidFileName, FILEIO_MODE_WriteOnly)); fMotif = (0 == PyramidFileName ? 0 : OpenStdioFile(MotifFileName, FILEIO_MODE_WriteOnly)); ProgressStart("Find pyramids"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) FindPyramids(PileIndex); int PyramidCount = PyramidIndex; ProgressDone(); Progress("%d pyramids", PyramidCount); }
int main(hlInt argc, hlChar* argv[]) { hlUInt i; // Arguments. hlUInt uiArgumentCount = (hlUInt)argc; hlChar *lpPackage = 0; hlUInt uiExtractItems = 0; hlChar *lpExtractItems[MAX_ITEMS]; hlUInt uiValidateItems = 0; hlChar *lpValidateItems[MAX_ITEMS]; hlChar *lpList = 0; hlBool bDefragment = hlFalse; hlChar *lpNCFRootPath = 0; hlBool bList = hlFalse; hlBool bListFolders = hlFalse; hlBool bListFiles = hlFalse; FILE *pFile = 0; hlBool bConsoleMode = hlFalse; hlUInt uiConsoleCommands = 0; hlChar *lpConsoleCommands[MAX_ITEMS]; hlBool bFileMapping = hlFalse; hlBool bQuickFileMapping = hlFalse; hlBool bVolatileAccess = hlFalse; hlBool bOverwriteFiles = hlTrue; hlBool bForceDefragment = hlFalse; // Package stuff. HLPackageType ePackageType = HL_PACKAGE_NONE; hlUInt uiPackage = HL_ID_INVALID, uiMode = HL_MODE_INVALID; HLDirectoryItem *pItem = 0; if(hlGetUnsignedInteger(HL_VERSION) < HL_VERSION_NUMBER) { printf("Wrong HLLib version: v%s.\n", hlGetString(HL_VERSION)); return 1; } // Process switches. if(uiArgumentCount == 2) { // The user just specified a file, drop into console mode. lpPackage = argv[1]; bConsoleMode = hlTrue; bVolatileAccess = hlTrue; } else { for(i = 1; i < uiArgumentCount; i++) { if(stricmp(argv[i], "-p") == 0 || stricmp(argv[i], "--package") == 0) { if(lpPackage == 0 && i + 1 < uiArgumentCount) { lpPackage = argv[++i]; } else { PrintUsage(); return 2; } } else if(stricmp(argv[i], "-d") == 0 || stricmp(argv[i], "--dest") == 0) { if(*lpDestination == 0 && i + 1 < uiArgumentCount) { strcpy(lpDestination, argv[++i]); } else { PrintUsage(); return 2; } } else if(stricmp(argv[i], "-e") == 0 || stricmp(argv[i], "--extract") == 0) { if(i + 1 < uiArgumentCount) { if(uiExtractItems == MAX_ITEMS) { Print(FOREGROUND_RED | FOREGROUND_INTENSITY, "Error loading package:\nMAX_ITEMS\n"); return 2; } lpExtractItems[uiExtractItems++] = argv[++i]; } else { PrintUsage(); return 2; } } else if(stricmp(argv[i], "-t") == 0 || stricmp(argv[i], "--validate") == 0) { if(i + 1 < uiArgumentCount) { if(uiValidateItems == MAX_ITEMS) { Print(FOREGROUND_RED | FOREGROUND_INTENSITY, "Error loading package:\nMAX_ITEMS\n"); return 2; } lpValidateItems[uiValidateItems++] = argv[++i]; } else { PrintUsage(); return 2; } } else if(strnicmp(argv[i], "-l", 2) == 0 || stricmp(argv[i], "--list") == 0) { if(bList) { PrintUsage(); return 2; } bList = hlTrue; if(stricmp(argv[i], "-l") == 0 || stricmp(argv[i], "--list") == 0) { // By default list everything. bListFolders = hlTrue; bListFiles = hlTrue; } else { // List folders and files if specified. bListFolders = strcspn(argv[i], "dD") != strlen(argv[i]); bListFiles = strcspn(argv[i], "fF") != strlen(argv[i]); } // Check to see if we need to dump our list to a file. if(i + 1 < uiArgumentCount && *argv[i + 1] != '-') { lpList = argv[++i]; } } else if(stricmp(argv[i], "-f") == 0 || stricmp(argv[i], "--defragment") == 0) { bDefragment = hlTrue; } else if(stricmp(argv[i], "-n") == 0 || stricmp(argv[i], "--ncfroot") == 0) { if(lpNCFRootPath == 0 && i + 1 < uiArgumentCount) { lpNCFRootPath = argv[++i]; } else { PrintUsage(); return 2; } } else if(stricmp(argv[i], "-s") == 0 || stricmp(argv[i], "--silent") == 0) { bSilent = hlTrue; } else if(stricmp(argv[i], "-c") == 0 || stricmp(argv[i], "--console") == 0) { bConsoleMode = hlTrue; } else if(stricmp(argv[i], "-x") == 0 || stricmp(argv[i], "--execute") == 0) { if(i + 1 < uiArgumentCount) { if(uiConsoleCommands == MAX_ITEMS) { Print(FOREGROUND_RED | FOREGROUND_INTENSITY, "Error loading package:\nMAX_ITEMS\n"); return 2; } lpConsoleCommands[uiConsoleCommands++] = argv[++i]; } else { PrintUsage(); return 2; } } else if(stricmp(argv[i], "-m") == 0 || stricmp(argv[i], "--filemapping") == 0) { bFileMapping = hlTrue; } else if(stricmp(argv[i], "-q") == 0 || stricmp(argv[i], "--quick-filemapping") == 0) { bFileMapping = hlTrue; bQuickFileMapping = hlTrue; } else if(stricmp(argv[i], "-v") == 0 || stricmp(argv[i], "--volatile") == 0) { bVolatileAccess = hlTrue; } else if(stricmp(argv[i], "-o") == 0 || stricmp(argv[i], "--overwrite") == 0) { bOverwriteFiles = hlFalse; } else if(stricmp(argv[i], "-r") == 0 || stricmp(argv[i], "--force-defragment") == 0) { bDefragment = hlTrue; bForceDefragment = hlTrue; } else { PrintUsage(); return 2; } } } // Make sure we have something to do. if(lpPackage == 0 || (uiExtractItems == 0 && uiValidateItems == 0 && !bList && !bDefragment && !bConsoleMode)) { PrintUsage(); return 2; } // If the destination directory is not specified, make it the input directory. if(*lpDestination == 0) { const hlChar *pForward = strrchr(lpPackage, '\\'); const hlChar *pBackward = strrchr(lpPackage, '/'); const hlChar *pEnd = pForward > pBackward ? pForward : pBackward; if(pEnd != 0) { strncpy(lpDestination, lpPackage, pEnd - lpPackage); lpDestination[pEnd - lpPackage] = '\0'; } } hlInitialize(); hlSetBoolean(HL_OVERWRITE_FILES, bOverwriteFiles); hlSetBoolean(HL_FORCE_DEFRAGMENT, bForceDefragment); hlSetVoid(HL_PROC_EXTRACT_ITEM_START, ExtractItemStartCallback); hlSetVoid(HL_PROC_EXTRACT_ITEM_END, ExtractItemEndCallback); hlSetVoid(HL_PROC_EXTRACT_FILE_PROGRESS, FileProgressCallback); hlSetVoid(HL_PROC_VALIDATE_FILE_PROGRESS, FileProgressCallback); hlSetVoid(HL_PROC_DEFRAGMENT_PROGRESS_EX, DefragmentProgressCallback); // Get the package type from the filename extension. ePackageType = hlGetPackageTypeFromName(lpPackage); // If the above fails, try getting the package type from the data at the start of the file. if(ePackageType == HL_PACKAGE_NONE) { pFile = fopen(lpPackage, "rb"); if(pFile != 0) { hlByte lpBuffer[HL_DEFAULT_PACKAGE_TEST_BUFFER_SIZE]; hlUInt uiBufferSize = (hlUInt)fread(lpBuffer, 1, HL_DEFAULT_PACKAGE_TEST_BUFFER_SIZE, pFile); ePackageType = hlGetPackageTypeFromMemory(lpBuffer, uiBufferSize); fclose(pFile); pFile = 0; } } if(ePackageType == HL_PACKAGE_NONE) { Print(FOREGROUND_RED | FOREGROUND_INTENSITY, "Error loading %s:\nUnsupported package type.\n", lpPackage); hlShutdown(); return 3; } // Create a package element, the element is allocated by the library and cleaned // up by the library. An ID is generated which must be bound to apply operations // to the package. if(!hlCreatePackage(ePackageType, &uiPackage)) { Print(FOREGROUND_RED | FOREGROUND_INTENSITY, "Error loading %s:\n%s\n", lpPackage, hlGetString(HL_ERROR_SHORT_FORMATED)); hlShutdown(); return 3; } hlBindPackage(uiPackage); uiMode = HL_MODE_READ | (bDefragment ? HL_MODE_WRITE : 0); uiMode |= !bFileMapping ? HL_MODE_NO_FILEMAPPING : 0; uiMode |= bQuickFileMapping ? HL_MODE_QUICK_FILEMAPPING : 0; uiMode |= bVolatileAccess ? HL_MODE_VOLATILE : 0; // Open the package. // Of the above modes, only HL_MODE_READ is required. HL_MODE_WRITE is present // only for future use. File mapping is recommended as an efficient way to load // packages. Quick file mapping maps the entire file (instead of bits as they are // needed) and thus should only be used in Windows 2000 and up (older versions of // Windows have poor virtual memory management which means large files won't be able // to find a continues block and will fail to load). Volatile access allows HLLib // to share files with other applications that have those file open for writing. // This is useful for, say, loading .gcf files while Steam is running. if(!hlPackageOpenFile(lpPackage, uiMode)) { Print(FOREGROUND_RED | FOREGROUND_INTENSITY, "Error loading %s:\n%s\n", lpPackage, hlGetString(HL_ERROR_SHORT_FORMATED)); hlShutdown(); return 3; } // If we have a .ncf file, the package file data is stored externally. In order to // validate the file data etc., HLLib needs to know where to look. Tell it where. if(ePackageType == HL_PACKAGE_NCF) { hlNCFFileSetRootPath(lpNCFRootPath); } if(!bSilent) Print(FOREGROUND_GREEN | FOREGROUND_INTENSITY, "%s opened.\n", lpPackage); // Extract the requested items. for(i = 0; i < uiExtractItems; i++) { // Find the item. pItem = hlFolderGetItemByPath(hlPackageGetRoot(), lpExtractItems[i], HL_FIND_ALL); if(pItem == 0) { printf("%s not found in package.\n", lpExtractItems[i]); continue; } if(!bSilent) { Print(FOREGROUND_GREEN | FOREGROUND_INTENSITY, "Extracting %s...\n", hlItemGetName(pItem)); printf("\n"); } // Extract the item. // Item is extracted to cDestination\Item->GetName(). hlItemExtract(pItem, lpDestination); if(!bSilent) { printf("\n"); printf("Done.\n"); } } // Validate the requested items. for(i = 0; i < uiValidateItems; i++) { // Find the item. pItem = hlFolderGetItemByPath(hlPackageGetRoot(), lpValidateItems[i], HL_FIND_ALL); if(pItem == 0) { printf("%s not found in package.\n", lpValidateItems[i]); continue; } if(!bSilent) { Print(FOREGROUND_GREEN | FOREGROUND_INTENSITY, "Validating %s...\n", hlItemGetName(pItem)); printf("\n"); } // Validate the item. Validate(pItem); if(!bSilent) { printf("\n"); printf("Done.\n"); } } // List items in package. if(bList) { if(!bSilent) { printf("Listing...\n"); printf("\n"); } pFile = stdout; if(lpList != 0) { pFile = fopen(lpList, "wt"); if(pFile == 0) { Print(FOREGROUND_RED | FOREGROUND_INTENSITY, "Error opening %s:\n%s\n", lpList, "fopen() failed."); pFile = stdout; } } List(pFile, hlPackageGetRoot(), bListFolders, bListFiles); if(lpList != 0) { fclose(pFile); pFile = 0; } if(!bSilent) { printf("\n"); printf("Done.\n"); } } if(bDefragment) { if(!bSilent) { printf("Defragmenting...\n"); printf("\n"); ProgressStart(); printf(" Progress: "); } if(!hlPackageDefragment()) { Print(FOREGROUND_RED | FOREGROUND_INTENSITY, " %s", hlGetString(HL_ERROR_SHORT_FORMATED)); } if(!bSilent) { printf("\n"); printf("\n"); printf("Done.\n"); } } // Interactive console mode. // Commands: dir, cd, root, info, extract, find, type, cls, help, exit. if(bConsoleMode) { EnterConsole(uiPackage, uiConsoleCommands, lpConsoleCommands); } // Close the package. hlPackageClose(); if(!bSilent) Print(FOREGROUND_GREEN | FOREGROUND_INTENSITY, "%s closed.\n", lpPackage); // Free up the allocated memory. hlDeletePackage(uiPackage); hlShutdown(); return 0; }
HLValidation Validate(HLDirectoryItem *pItem) { hlUInt i, uiItemCount; hlChar lpPath[512] = ""; HLValidation eValidation = HL_VALIDATES_OK, eTest; switch(hlItemGetType(pItem)) { case HL_ITEM_FOLDER: if(!bSilent) { Print(FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY, " Validating %s:\n", hlItemGetName(pItem)); } uiItemCount = hlFolderGetCount(pItem); for(i = 0; i < uiItemCount; i++) { eTest = Validate(hlFolderGetItem(pItem, i)); if(eTest > eValidation) { eValidation = eTest; } } if(!bSilent) { Print(FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY, " Done %s: ", hlItemGetName(pItem)); PrintValidation(eValidation); printf("\n"); } break; case HL_ITEM_FILE: if(!bSilent) { printf(" Validating %s: ", hlItemGetName(pItem)); ProgressStart(); } eValidation = hlFileGetValidation(pItem); if(bSilent) { switch(eValidation) { case HL_VALIDATES_INCOMPLETE: case HL_VALIDATES_CORRUPT: hlItemGetPath(pItem, lpPath, sizeof(lpPath)); printf(" Validating %s: ", lpPath); PrintValidation(eValidation); printf("\n"); break; } } else { PrintValidation(eValidation); printf(" \n"); } break; } return eValidation; }
void TR() { #if defined(DEBUG) && defined(_MSC_VER) _CrtSetDbgFlag(0); // too expensive #endif const char *HitFileName = RequiredValueOpt("tr"); const char *OutFileName = RequiredValueOpt("out"); const char *CandFileName = ValueOpt("cand"); const char *strMinTrSpacing = ValueOpt("mintrspacing"); const char *strMaxTrSpacing = ValueOpt("maxtrspacing"); const char *strMinTrLength = ValueOpt("mintrlength"); const char *strMaxTrLength = ValueOpt("minspacingratio"); const char *strMinFam = ValueOpt("minfam"); const char *strMinHitRatio = ValueOpt("minhitratio"); const char *strMinDistPairs = ValueOpt("mindistpairs"); if (0 != strMinTrSpacing) MIN_LENGTH_LINE = atoi(strMinTrSpacing); if (0 != strMaxTrSpacing) MAX_LENGTH_LINE = atoi(strMaxTrSpacing); if (0 != strMinTrLength) MIN_LENGTH_LTR = atoi(strMinTrLength); if (0 != strMaxTrLength) MAX_LENGTH_LTR = atoi(strMaxTrLength); if (0 != strMinFam) MIN_FAM_SIZE = atoi(strMinFam); if (0 != strMinHitRatio) MIN_HIT_LENGTH_RATIO = atoi(strMinHitRatio); if (0 != strMinDistPairs) MIN_DIST_EDGE = atoi(strMinDistPairs); FILE *fHit = OpenStdioFile(HitFileName, FILEIO_MODE_ReadOnly); ProgressStart("Index hits"); GLIX HitGlix; HitGlix.Init(); HitGlix.FromGFFFile(fHit); HitGlix.MakeGlobalToLocalIndex(); ProgressDone(); const int GlobalLength = HitGlix.GetGlobalLength(); IIX IntervalIndex; IntervalIndex.Init(GlobalLength); ProgressStart("Find candidate TRs"); Rewind(fHit); GFFRecord Rec; while (GetNextGFFRecord(fHit, Rec)) { HitData Hit; GFFRecordToHit(HitGlix, Rec, Hit); if (IsCandLTR(Hit)) AddCand(Hit, IntervalIndex); } ProgressDone(); Progress("%d candidates", CandCount); if (0 != CandFileName) { ProgressStart("Write candidates"); FILE *fCand = OpenStdioFile(CandFileName, FILEIO_MODE_WriteOnly); WriteCands(fCand, HitGlix); ProgressDone(); } ProgressStart("Make graph"); Rewind(fHit); while (GetNextGFFRecord(fHit, Rec)) { HitData Hit; GFFRecordToHit(HitGlix, Rec, Hit); FindEdges(Hit, HitGlix, IntervalIndex); } fclose(fHit); fHit = 0; ProgressDone(); Progress("%d edges", (int) Edges.size()); ProgressStart("Find families"); FamList Fams; FindConnectedComponents(Edges, Fams, MIN_FAM_SIZE); ProgressDone(); Progress("%d families", (int) Fams.size()); FILE *fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly); WriteOutputFile(fOut, HitGlix, Fams); }
void TRS() { const char *InputFileName = RequiredValueOpt("trs"); const char *OutputFileName = ValueOpt("out"); const char *PilesFileName = ValueOpt("piles"); const char *ImagesFileName = ValueOpt("images"); const char *strMinFamSize = ValueOpt("famsize"); const char *strMaxLengthDiffPct = ValueOpt("maxlengthdiffpct"); g_paramSingleHitCoverage = !FlagOpt("multihit"); if (0 == OutputFileName && 0 == PilesFileName && 0 == ImagesFileName) Quit("No output file specified, must be at least one of -out, -piles, -images"); if (0 != strMinFamSize) g_paramMinFamSize = atoi(strMinFamSize); if (0 != strMaxLengthDiffPct) g_paramMaxLengthDiffPct = atoi(strMaxLengthDiffPct); Log("singlehit=%s famsize=%d maxlengthdiffpct=%d\n", g_paramSingleHitCoverage ? "True" : "False", g_paramMinFamSize, g_paramMaxLengthDiffPct); ProgressStart("Read hit file"); int HitCount; int SeqLength; HitData *Hits = ReadHits(InputFileName, &HitCount, &SeqLength); ProgressDone(); Progress("%d hits", HitCount); SeqLengthChunks = (SeqLength + CHUNK_LENGTH - 1)/CHUNK_LENGTH; const int BitVectorLength = (SeqLengthChunks + BITS_PER_INT - 1)/BITS_PER_INT; int *CopyCount = all(int, BitVectorLength); zero(CopyCount, int, BitVectorLength); ProgressStart("Compute copy counts"); for (int i = 0; i < HitCount; ++i) IncCopyCount(CopyCount, Hits[i]); ProgressDone(); ProgressStart("Identify piles"); PILE_INDEX_TYPE *PileIndexes = IdentifyPiles(CopyCount); ProgressDone(); Progress("%d stacks", PileCount); freemem(CopyCount); CopyCount = 0; CreatePiles(Hits, HitCount, PileIndexes); if (0 != ImagesFileName) { ProgressStart("Writing images file"); WriteImages(ImagesFileName, Hits, HitCount, PileIndexes); ProgressDone(); } freemem(Hits); Hits = 0; if (0 != PilesFileName) { ProgressStart("Writing piles file"); WritePiles(PilesFileName, Piles, PileCount); ProgressDone(); } freemem(PileIndexes); PileIndexes = 0; if (0 == OutputFileName) return; ProgressStart("Find edges"); EdgeList Edges; FindGlobalEdges(Edges, MaxImageCount); ProgressDone(); Progress("%d edges", (int) Edges.size()); ProgressStart("Find families"); FamList Fams; FindConnectedComponents(Edges, Fams, g_paramMinFamSize); AssignFamsToPiles(Fams); ProgressDone(); Progress("%d families", (int) Fams.size()); ProgressStart("Find superfamilies"); EdgeList SuperEdges; FindSuperFamEdges(Fams, SuperEdges); FamList SuperFams; FindConnectedComponents(SuperEdges, SuperFams, 1); FindSingletonSuperFams(Fams, SuperFams); AssignSuperFamsToPiles(Fams, SuperFams); ProgressDone(); Progress("%d superfamilies", (int) SuperFams.size()); ProgressStart("Write TRS output file"); WriteTRSFile(OutputFileName, Piles, PileCount); ProgressDone(); }
static void CreatePiles(const HitData *Hits, int HitCount, PILE_INDEX_TYPE *PileIndexes) { Piles = all(PileData, PileCount); zero(Piles, PileData, PileCount); for (int i = 0; i < PileCount; ++i) { Piles[i].FamIndex = -1; Piles[i].SuperFamIndex = -1; Piles[i].Rev = -1; } // Count images in stack ProgressStart("Create stacks: count images"); for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex) { const HitData &Hit = Hits[HitIndex]; int Pos = Hit.QueryFrom/CHUNK_LENGTH; PILE_INDEX_TYPE PileIndex = PileIndexes[Pos]; assert(PileIndex == PileIndexes[Hit.QueryTo/CHUNK_LENGTH]); assert(PileIndex >= 0 && PileIndex < PileCount); ++(Piles[PileIndex].ImageCount); Pos = Hit.TargetFrom/CHUNK_LENGTH; PileIndex = PileIndexes[Pos]; assert(PileIndex >= 0 && PileIndex < PileCount); assert(PileIndex == PileIndexes[Hit.TargetTo/CHUNK_LENGTH]); ++(Piles[PileIndex].ImageCount); } ProgressDone(); // Allocate memory for image list int TotalImageCount = 0; ProgressStart("Create stacks: allocate image memory"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) { PileData &Pile = Piles[PileIndex]; const int ImageCount = Pile.ImageCount; TotalImageCount += ImageCount; assert(ImageCount > 0); Pile.Images = all(PileImageData, ImageCount); } ProgressDone(); // Build image list ProgressStart("Create stacks: build image list"); for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex) { PileData &Pile = Piles[PileIndex]; Pile.ImageCount = 0; Pile.From = -1; Pile.To = -1; } for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex) { const HitData &Hit = Hits[HitIndex]; const bool Rev = Hit.Rev; const int Length1 = Hit.QueryTo - Hit.QueryFrom; const int Length2 = Hit.TargetTo - Hit.TargetFrom; const int From1 = Hit.QueryFrom; const int From2 = Hit.TargetFrom; const int To1 = Hit.QueryTo; const int To2 = Hit.TargetTo; const int Pos1 = From1/CHUNK_LENGTH; const int Pos2 = From2/CHUNK_LENGTH; PILE_INDEX_TYPE PileIndex1 = PileIndexes[Pos1]; PILE_INDEX_TYPE PileIndex2 = PileIndexes[Pos2]; assert(PileIndex1 == PileIndexes[(From1 + Length1 - 1)/CHUNK_LENGTH]); assert(PileIndex1 >= 0 && PileIndex1 < PileCount); assert(PileIndex2 == PileIndexes[(From2 + Length2 - 1)/CHUNK_LENGTH]); assert(PileIndex2 >= 0 && PileIndex2 < PileCount); PileData &Pile1 = Piles[PileIndex1]; PileImageData &Image1 = Pile1.Images[Pile1.ImageCount++]; Image1.SILength = Length2; Image1.SIPile = PileIndex2; Image1.SIRev = Rev; PileData &Pile2 = Piles[PileIndex2]; PileImageData &Image2 = Pile2.Images[Pile2.ImageCount++]; Image2.SILength = Length1; Image2.SIPile = PileIndex1; Image2.SIRev = Rev; if (Pile1.From == -1 || From1 < Pile1.From) Pile1.From = From1; if (Pile1.To == -1 || To1 > Pile1.To) Pile1.To = To1; if (Pile2.From == -1 || From2 < Pile2.From) Pile2.From = From2; if (Pile2.To == -1 || To2 > Pile2.To) Pile2.To = To2; if (Pile1.ImageCount > MaxImageCount) MaxImageCount = Pile1.ImageCount; if (Pile2.ImageCount > MaxImageCount) MaxImageCount = Pile2.ImageCount; } ProgressDone(); }
//////////////////////////////////////////////////////////////////////////////// // Предварительная обработка // (07.07.2000) Изначально взято из puma.dll без изменений // сильно привязана к пуме // в начале окучиваем выделение компонент Bool32 PreProcessImage( PRSPreProcessImage Image ) { Bool32 gbAutoRotate = Image->gbAutoRotate; puchar *gpRecogDIB = Image->pgpRecogDIB; Handle hCPAGE = Image->hCPAGE; const char * glpRecogName = *Image->pglpRecogName; PCIMAGEBITMAPINFOHEADER info = (PCIMAGEBITMAPINFOHEADER)Image->pinfo; ///////////////////////////////// Bool32 rc = TRUE; //char * lpRecogName = NULL; uint32_t Angle = 0; hWndTurn = 0; if(InitPRGTIME()) ProgressStart(); if(!ProgressStep(1,5)) rc = FALSE; // Andrey 12.11.01 // Проинициализируем контейнер CPAGE // if(rc) { PAGEINFO PInfo = {0}; GetPageInfo(hCPAGE,&PInfo); strcpy((char*)PInfo.szImageName, glpRecogName); PInfo.BitPerPixel = info->biBitCount; PInfo.DPIX = info->biXPelsPerMeter*254L/10000; // PInfo.DPIX = PInfo.DPIX < 200 ? 200 : PInfo.DPIX; PInfo.DPIY = info->biYPelsPerMeter*254L/10000; // PInfo.DPIY = PInfo.DPIY < 200 ? 200 : PInfo.DPIY; PInfo.Height = info->biHeight; PInfo.Width = info->biWidth; // PInfo.X = 0; Уже установлено // PInfo.Y = 0; PInfo.Incline2048 = 0; PInfo.Page = 1; PInfo.Angle = Angle; SetPageInfo(hCPAGE,PInfo); } //////////////////////////////////////////////////////// // Выделим компоненты // if(!ProgressStep(2,65)) rc = FALSE; if(rc) { if(LDPUMA_Skip(Image->hDebugCancelComponent)/*DPumaSkipComponent()*/) { // uchar ori; PRGTIME prev = StorePRGTIME(65, 85); rc = ExtractComponents( gbAutoRotate, NULL, (puchar)glpRecogName, Image); RestorePRGTIME(prev); /* if(rc && gbAutoRotate) { //if(!REXC_GetOrient(&ori)) //if(!REXC_GetOrient(&ori) && db_spec_prj!=SPEC_PRJ_GIP ) if(!RNORM_GetOrient(&ori, *(Image->phCCOM)) && db_spec_prj!=SPEC_PRJ_GIP ) { SetReturnCode_rstuff(RNORM_GetReturnCode()); rc = FALSE; } else { //if(ori) if(ori && !(db_spec_prj==SPEC_PRJ_GIP&&ori==4)) { uint32_t dwTurn = 0; switch(ori) { case 1: Angle = 270; dwTurn = RIMAGE_TURN_270; break; case 2: Angle = 90; dwTurn = RIMAGE_TURN_90; break; case 3: Angle = 180; dwTurn = RIMAGE_TURN_180; break; } if( LDPUMA_Skip(Image->hDebugCancelTurn) /*umaSkipTurn()*/ /*) { if(!RIMAGE_Turn((puchar)glpRecogName,(puchar)PUMA_IMAGE_TURN,dwTurn,0)) { SetReturnCode_rstuff_rstuff(RIMAGE_GetReturnCode()); rc = FALSE; } else { if(!CIMAGE_ReadDIB((puchar)PUMA_IMAGE_TURN,(Handle*)gpRecogDIB,TRUE)) { SetReturnCode_rstuff_rstuff(CIMAGE_GetReturnCode()); rc = FALSE; } else { // // удалим общий контейнер // glpRecogName = PUMA_IMAGE_TURN; hWndTurn = LDPUMA_CreateWindow(PUMA_IMAGE_TURN,(*gpRecogDIB)); PRGTIME prev = StorePRGTIME(85, 100); rc = ExtractComponents( FALSE, NULL, (puchar)glpRecogName, Image); PAGEINFO info = {0}; GetPageInfo(hCPAGE,&info); info.Images|=IMAGE_TURN; // strcpy((char*)info.szImageName,PUMA_IMAGE_TURN); SetPageInfo(hCPAGE,info); RestorePRGTIME(prev); } } } } } }*/ //проверим наличие разрешения и попытаемся определить по компонентам, если его нет checkResolution(*(Image->phCCOM), hCPAGE); if(!ProgressStep(2,100)) rc = FALSE; } else LDPUMA_Console("Пропущен этап выделения компонент.\n"); } // // Переинициализируем контейнер CPAGE // if(rc) { PAGEINFO PInfo = {0}; GetPageInfo(hCPAGE,&PInfo); strcpy((char*)PInfo.szImageName, glpRecogName); PInfo.BitPerPixel = info->biBitCount; // PInfo.DPIX = info->biXPelsPerMeter*254L/10000; PInfo.DPIX = PInfo.DPIX < 200 ? 200 : PInfo.DPIX; // PInfo.DPIY = info->biYPelsPerMeter*254L/10000; PInfo.DPIY = PInfo.DPIY < 200 ? 200 : PInfo.DPIY; PInfo.Height = info->biHeight; PInfo.Width = info->biWidth; // PInfo.X = 0; Уже установлено // PInfo.Y = 0; PInfo.Incline2048 = 0; PInfo.Page = 1; PInfo.Angle = Angle; SetPageInfo(hCPAGE,PInfo); } if(DonePRGTIME()) ProgressFinish(); return rc; }