/*! * Event-handler for button "Add". */ void AP_UnixDialog_Spell::onAddClicked () { UT_DEBUGMSG (("ROB: AP_UnixDialog_Spell::onAddClicked()\n")); addToDict(); ignoreWord(); }
int main(int argc, char **argv) { uint32 pNum = 0; uint32 pAlloc = 8388608; uint32 estID = ~uint32ZERO; bool *found = 0L; // From fixPolishesIID.c IIDdict = 0L; SEQdict = 0L; GENdict = 0L; // Incorporated from sortPolishes mergeFilesLen = 0; mergeFilesMax = sysconf(_SC_OPEN_MAX); mergeFiles = new FILE * [mergeFilesMax]; mergeNames = new char * [mergeFilesMax]; mergePolishes = new sim4polish * [mergeFilesMax]; // Default to printing stats on stdout. sFile = stdout; int arg = 1; while (arg < argc) { if (strcmp(argv[arg], "-n") == 0) { pAlloc = strtouint32(argv[++arg], 0L); } else if (strcmp(argv[arg], "-fpart") == 0) { arg++; fprintf(stderr, "reading query deflines from '%s'\n", argv[arg]); IIDdict = dict_create(DICTCOUNT_T_MAX, headerCompare); addToDict(IIDdict, argv[arg]); } else if (strcmp(argv[arg], "-g") == 0) { ++arg; fprintf(stderr, "reading genomic deflines from '%s'\n", argv[arg]); GENdict = dict_create(DICTCOUNT_T_MAX, headerCompare); addToDict(GENdict, argv[arg]); } else if (strcmp(argv[arg], "-F") == 0) { ++arg; fprintf(stderr, "reading query deflines from '%s'\n", argv[arg]); SEQdict = dict_create(DICTCOUNT_T_MAX, headerCompare); addToDict(SEQdict, argv[arg]); } else if (strcmp(argv[arg], "-f") == 0) { ++arg; SEQ = new seqCache(argv[arg]); } else if (strcmp(argv[arg], "-q") == 0) { ++arg; QLT = new seqCache(argv[arg]); } else if (strcmp(argv[arg], "-filter") == 0) { filter = atof(argv[++arg]); doFiltering = true; } else if (strcmp(argv[arg], "-output") == 0) { char cmd[1024] = {0}; errno = 0; ++arg; if (strcmp(argv[arg] + strlen(argv[arg]) - 4, ".bz2") == 0) { sprintf(cmd, "bzip2 -1c > %s", argv[arg]); oFile = popen(cmd, "w"); oFileIsPipe = 1; } else if (strcmp(argv[arg] + strlen(argv[arg]) - 3, ".gz") == 0) { sprintf(cmd, "gzip -1c > %s", argv[arg]); oFile = popen(cmd, "w"); oFileIsPipe = 1; } else { fprintf(stderr, "Got %s, not .bz2 not .gz!\n", argv[arg]); exit(1); } if (errno) fprintf(stderr, "Failed to open '%s': %s\n", cmd, strerror(errno)); doFiltering = true; } else if (strcmp(argv[arg], "-scores") == 0) { errno = 0; sFile = fopen(argv[++arg], "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", argv[arg-1], strerror(errno)); doFiltering = true; } else if (strcmp(argv[arg], "-unique") == 0) { char cmd[1024] = {0}; errno = 0; arg++; if (strcmp(argv[arg] + strlen(argv[arg]) - 4, ".bz2") == 0) sprintf(cmd, "bzip2 -1c > %s", argv[arg]); else if (strcmp(argv[arg] + strlen(argv[arg]) - 3, ".gz") == 0) sprintf(cmd, "gzip -1c > %s", argv[arg]); else sprintf(cmd, "cat > %s", argv[arg]); uFile = popen(cmd, "w"); if (errno) fprintf(stderr, "Failed to open '%s': %s\n", cmd, strerror(errno)); doFiltering = true; } else if (strncmp(argv[arg], "-M", 2) == 0) { arg++; while ((arg < argc) && (fileExists(argv[arg]))) { if (mergeFilesLen >= mergeFilesMax) { fprintf(stderr, "%s: ERROR! Too many input files! Should be less than %d\n", argv[0], mergeFilesMax); exit(1); } mergeNames[mergeFilesLen] = argv[arg]; mergeFiles[mergeFilesLen++] = openFile(argv[arg], "r"); arg++; } arg--; } else { fprintf(stderr, "unknown option: %s\n", argv[arg]); } arg++; } if (doFiltering) { if (uFile == 0L) fprintf(stderr, "ERROR: -unique is required\n"), exit(1); if (sFile == 0L) fprintf(stderr, "ERROR: -scores is required\n"), exit(1); if ((filter < 0.0) || (filter > 1.0)) fprintf(stderr, "ERROR: -filter value of %f invalid. 0 <= F <= 100.\n", filter), exit(1); } if ((IIDdict == 0L) || (SEQdict == 0L) || (GENdict == 0L)) { fprintf(stderr, "WARNING! No sequence dictionaries, NOT FIXING IIDs! (supply -fpart, -f and -g)\n"); } if ((SEQ == 0L) || (QLT == 0L)) { fprintf(stderr, "I need -f and -q\n"); exit(1); } // We no longer require that input polishes be sorted increasingly; // now they only must be grouped. This remembers if we've seen a // match or not. At the end, we'll analyze() those we haven't done // already. // found = new bool [ SEQ->getNumberOfSequences() ]; for (uint32 i=0; i<SEQ->getNumberOfSequences(); i++) found[i] = false; // Initialize the merge -- if no merge files, nothing done! // for (int i=0; i<mergeFilesLen; i++) { mergePolishes[i] = new sim4polish(mergeFiles[i]); fixIID(mergePolishes[i], IIDdict); } // Read polishes, picking the best when we see a change in the // estID. sim4polish **p = new sim4polish * [pAlloc]; sim4polish *q; while ((q = nextPolish()) != 0L) { if ((q->_estID != estID) && (pNum > 0)) { //fprintf(stderr, "PickBest for estID "uint32FMT"\n", estID); found[estID] = true; pickBest(p, pNum); pNum = 0; } if (pNum >= pAlloc) { sim4polish **P = new sim4polish * [pAlloc * 2]; memcpy(p, P, sizeof(sim4polish *) * pAlloc); delete [] p; p = P; pAlloc *= 2; } p[pNum++] = q; estID = q->_estID; } if (pNum > 0) { found[estID] = true; pickBest(p, pNum); } // Attempt cleanup // for (int i=0; i<mergeFilesLen; i++) closeFile(mergeFiles[i], mergeNames[i]); for (estID=0; estID < SEQ->getNumberOfSequences(); estID++) if (found[estID] == false) analyze(estID, 0, SEQ->getSequenceLength(estID), SEQ->getSequenceLength(estID), true, 'M'); delete [] mergeFiles; delete [] mergeNames; delete [] mergePolishes; if (oFile) pclose(oFile); if (uFile) pclose(uFile); if (sFile) fclose(sFile); fprintf(stderr, "Uni:"uint32FMTW(8)" Con:"uint32FMTW(8)" (T:"uint32FMTW(8)" M:"uint32FMTW(8)" I:"uint32FMTW(8)" S:"uint32FMTW(8)" N:"uint32FMTW(8)") Inc:"uint32FMTW(8)" -- Save:"uint32FMTW(8)" Lost:"uint32FMTW(8)"\n", statOneMatch, statConsistent, consistentTie, consistentMatches, consistentIdentity, consistentTooShort, consistentNot, statInconsistent, statUnique, statLost); fprintf(stderr, "total: LQ:"uint32FMT" MQ:"uint32FMT" RQ:"uint32FMT"\n", totLQ, totMQ, totRQ); return(0); }