Пример #1
0
void tabPepPred(char *database, int fileCount, char *fileNames[], char *table)
/* Load a tab separated peptide file. */
{
struct hash *uniq = newHash(16);
struct lineFile *lf = lineFileOpen(fileNames[0], TRUE);
char *words[2];

if (fileCount != 1)
    errAbort("Only one file allowed for tab separated peptides");

makeCustomTable(database, table, createString);

printf("Processing %s\n", fileNames[0]);
while (lineFileRow(lf, words))
    {
    char *upperCase;
    if (hashLookupUpperCase(uniq, words[0]) != NULL)
	errAbort("Duplicate (case insensitive) '%s' line %d of %s", words[0], lf->lineIx, lf->fileName);
    upperCase = cloneString(words[0]);
    touppers(upperCase);
    hashAdd(uniq, upperCase, NULL);
    freeMem(upperCase);
    }
lineFileClose(&lf);
printf("Loading %s\n", fileNames[0]);
loadTableFromTabFile(database, table, fileNames[0]);
freeHash(&uniq);
}
Пример #2
0
void *hashFindValUpperCase(struct hash *hash, char *name)
/* Lookup upper cased name in hash and return val or return NULL if not found.
 * (Assumes all elements of hash are themselves already in upper case.) */
{
struct hashEl *hel = hashLookupUpperCase(hash, name);
if (hel == NULL)
    return NULL;
return hel->val;
}
Пример #3
0
void oneGenieFile(char *fileName, struct hash *uniq, FILE *f)
/* Process one genie peptide prediction file into known and alt tab files. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *trans;
boolean skip = FALSE;

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", fileName);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", fileName);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	trans = firstWordInLine(line+1);
	if (abbr != NULL && startsWith(abbr, trans))
	    trans += strlen(abbr);
	if (hashLookupUpperCase(uniq, trans) != NULL)
	    {
	    warn("Duplicate (case insensitive) '%s' line %d of %s. Ignoring all but first.", trans, lf->lineIx, lf->fileName);
	    skip = TRUE;
	    }
	else
	    {
	    char *upperCase;
	    upperCase = cloneString(trans);
	    touppers(upperCase);
	    hashAdd(uniq, upperCase, NULL);
	    freeMem(upperCase);
	    fprintf(f, "%s\t", trans);
	    skip = FALSE;
	    }
	}
    else if (!skip)
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Пример #4
0
void genericOne(char *fileName, struct hash *uniq, FILE *f)
/* Process one ensemble peptide prediction file into tab delimited
 * output f, using uniq hash to make sure no dupes. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *trans, transBuf[128];

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", fileName);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", fileName);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	char *upperCase;
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	trans = firstWordInLine(line+1);
	if (abbr != NULL && startsWith(abbr, trans))
	    trans += strlen(abbr);
        if (suffix != NULL)
            {
            safef(transBuf, sizeof(transBuf), "%s%s", trans, suffix);
            trans = transBuf;
            }
	if (hashLookupUpperCase(uniq, trans) != NULL)
	    errAbort("Duplicate (case insensitive) '%s' line %d of %s", trans, lf->lineIx, lf->fileName);
	upperCase = cloneString(trans);
	touppers(upperCase);
	hashAdd(uniq, upperCase, NULL);
	freeMem(upperCase);
	fprintf(f, "%s\t", trans);
	}
    else
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Пример #5
0
void oneEnsFile(char *ensFile, struct hash *uniq, struct hash *pToT, FILE *f)
/* Process one ensemble peptide prediction file into tab delimited
 * output f, using uniq hash to make sure no dupes. */
{
struct lineFile *lf = lineFileOpen(ensFile, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *translation;

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", ensFile);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", ensFile);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	char *upperCase;
	char *transcript;
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	translation = findEnsTrans(lf, line);
	if (hashLookupUpperCase(uniq, translation) != NULL)
	    errAbort("Duplicate (case insensitive) '%s' line %d of %s", translation, lf->lineIx, lf->fileName);
	upperCase = cloneString(translation);
	touppers(upperCase);
	hashAdd(uniq, upperCase, NULL);
	freeMem(upperCase);
	transcript = hashFindVal(pToT, translation);
	if (transcript == NULL)
	    errAbort("Can't find transcript for %s", translation);
	fprintf(f, "%s\t", transcript);
	}
    else
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Пример #6
0
struct subjInfo *stringAdvFilter(struct column *col,
	struct sqlConnection *conn, struct subjInfo *list)
/* Do advanced filter on string in main table. */
{
char *wild = advFilterVal(col, "wild");
struct hash *keyHash = keyFileHash(col);
if (keyHash != NULL)
    {
    struct subjInfo *newList = NULL, *next, *si;
    for (si = list; si != NULL; si = next)
        {
	char *cell = col->cellVal(col, si, conn);
	next = si->next;
	if (hashLookupUpperCase(keyHash, cell))
	    {
	    slAddHead(&newList, si);
	    }
	freez(&cell);
	}
    slReverse(&newList);
    list = newList;
    }
if (wild != NULL)
    {
    boolean orLogic = advFilterOrLogic(col, "logic", TRUE);
    struct subjInfo *newList = NULL, *next, *si;
    struct slName *wildList = stringToSlNames(wild);
    for (si = list; si != NULL; si = next)
        {
	char *cell = col->cellVal(col, si, conn);
	next = si->next;
	if (wildMatchList(cell, wildList, orLogic))
	    {
	    slAddHead(&newList, si);
	    }
	freez(&cell);
	}
    slReverse(&newList);
    list = newList;
    }
hashFree(&keyHash);
return list;
}