void tabPepPred(char *database, int fileCount, char *fileNames[], char *table) /* Load a tab separated peptide file. */ { struct hash *uniq = newHash(16); struct lineFile *lf = lineFileOpen(fileNames[0], TRUE); char *words[2]; if (fileCount != 1) errAbort("Only one file allowed for tab separated peptides"); makeCustomTable(database, table, createString); printf("Processing %s\n", fileNames[0]); while (lineFileRow(lf, words)) { char *upperCase; if (hashLookupUpperCase(uniq, words[0]) != NULL) errAbort("Duplicate (case insensitive) '%s' line %d of %s", words[0], lf->lineIx, lf->fileName); upperCase = cloneString(words[0]); touppers(upperCase); hashAdd(uniq, upperCase, NULL); freeMem(upperCase); } lineFileClose(&lf); printf("Loading %s\n", fileNames[0]); loadTableFromTabFile(database, table, fileNames[0]); freeHash(&uniq); }
void *hashFindValUpperCase(struct hash *hash, char *name) /* Lookup upper cased name in hash and return val or return NULL if not found. * (Assumes all elements of hash are themselves already in upper case.) */ { struct hashEl *hel = hashLookupUpperCase(hash, name); if (hel == NULL) return NULL; return hel->val; }
void oneGenieFile(char *fileName, struct hash *uniq, FILE *f) /* Process one genie peptide prediction file into known and alt tab files. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; boolean firstTime = TRUE; char *trans; boolean skip = FALSE; /* Do cursory sanity check. */ if (!lineFileNext(lf, &line, &lineSize)) errAbort("%s is empty", fileName); if (line[0] != '>') errAbort("%s is badly formatted, doesn't begin with '>'", fileName); lineFileReuse(lf); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '>') { /* End last line. */ if (firstTime) firstTime = FALSE; else fputc('\n', f); trans = firstWordInLine(line+1); if (abbr != NULL && startsWith(abbr, trans)) trans += strlen(abbr); if (hashLookupUpperCase(uniq, trans) != NULL) { warn("Duplicate (case insensitive) '%s' line %d of %s. Ignoring all but first.", trans, lf->lineIx, lf->fileName); skip = TRUE; } else { char *upperCase; upperCase = cloneString(trans); touppers(upperCase); hashAdd(uniq, upperCase, NULL); freeMem(upperCase); fprintf(f, "%s\t", trans); skip = FALSE; } } else if (!skip) { mustWrite(f, line, lineSize-1); } } fputc('\n', f); lineFileClose(&lf); }
void genericOne(char *fileName, struct hash *uniq, FILE *f) /* Process one ensemble peptide prediction file into tab delimited * output f, using uniq hash to make sure no dupes. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; boolean firstTime = TRUE; char *trans, transBuf[128]; /* Do cursory sanity check. */ if (!lineFileNext(lf, &line, &lineSize)) errAbort("%s is empty", fileName); if (line[0] != '>') errAbort("%s is badly formatted, doesn't begin with '>'", fileName); lineFileReuse(lf); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '>') { char *upperCase; /* End last line. */ if (firstTime) firstTime = FALSE; else fputc('\n', f); trans = firstWordInLine(line+1); if (abbr != NULL && startsWith(abbr, trans)) trans += strlen(abbr); if (suffix != NULL) { safef(transBuf, sizeof(transBuf), "%s%s", trans, suffix); trans = transBuf; } if (hashLookupUpperCase(uniq, trans) != NULL) errAbort("Duplicate (case insensitive) '%s' line %d of %s", trans, lf->lineIx, lf->fileName); upperCase = cloneString(trans); touppers(upperCase); hashAdd(uniq, upperCase, NULL); freeMem(upperCase); fprintf(f, "%s\t", trans); } else { mustWrite(f, line, lineSize-1); } } fputc('\n', f); lineFileClose(&lf); }
void oneEnsFile(char *ensFile, struct hash *uniq, struct hash *pToT, FILE *f) /* Process one ensemble peptide prediction file into tab delimited * output f, using uniq hash to make sure no dupes. */ { struct lineFile *lf = lineFileOpen(ensFile, TRUE); char *line; int lineSize; boolean firstTime = TRUE; char *translation; /* Do cursory sanity check. */ if (!lineFileNext(lf, &line, &lineSize)) errAbort("%s is empty", ensFile); if (line[0] != '>') errAbort("%s is badly formatted, doesn't begin with '>'", ensFile); lineFileReuse(lf); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '>') { char *upperCase; char *transcript; /* End last line. */ if (firstTime) firstTime = FALSE; else fputc('\n', f); translation = findEnsTrans(lf, line); if (hashLookupUpperCase(uniq, translation) != NULL) errAbort("Duplicate (case insensitive) '%s' line %d of %s", translation, lf->lineIx, lf->fileName); upperCase = cloneString(translation); touppers(upperCase); hashAdd(uniq, upperCase, NULL); freeMem(upperCase); transcript = hashFindVal(pToT, translation); if (transcript == NULL) errAbort("Can't find transcript for %s", translation); fprintf(f, "%s\t", transcript); } else { mustWrite(f, line, lineSize-1); } } fputc('\n', f); lineFileClose(&lf); }
struct subjInfo *stringAdvFilter(struct column *col, struct sqlConnection *conn, struct subjInfo *list) /* Do advanced filter on string in main table. */ { char *wild = advFilterVal(col, "wild"); struct hash *keyHash = keyFileHash(col); if (keyHash != NULL) { struct subjInfo *newList = NULL, *next, *si; for (si = list; si != NULL; si = next) { char *cell = col->cellVal(col, si, conn); next = si->next; if (hashLookupUpperCase(keyHash, cell)) { slAddHead(&newList, si); } freez(&cell); } slReverse(&newList); list = newList; } if (wild != NULL) { boolean orLogic = advFilterOrLogic(col, "logic", TRUE); struct subjInfo *newList = NULL, *next, *si; struct slName *wildList = stringToSlNames(wild); for (si = list; si != NULL; si = next) { char *cell = col->cellVal(col, si, conn); next = si->next; if (wildMatchList(cell, wildList, orLogic)) { slAddHead(&newList, si); } freez(&cell); } slReverse(&newList); list = newList; } hashFree(&keyHash); return list; }