Ejemplo n.º 1
0
static void checkTagIsInside(struct htmlPage *page, char *outsiders, char *insiders,  
	struct htmlTag *startTag, struct htmlTag *endTag)
/* Check that insiders are all bracketed by outsiders. */
{
char *outDupe = cloneString(outsiders);
char *inDupe = cloneString(insiders);
char *line, *word;
int depth = 0;
struct htmlTag *tag;
struct hash *outOpen = newHash(8);
struct hash *outClose = newHash(8);
struct hash *inHash = newHash(8);
char buf[256];

/* Create hashes of all insiders */
line = inDupe;
while ((word = nextWord(&line)) != NULL)
    {
    touppers(word);
    hashAdd(inHash, word, NULL);
    }

/* Create hash of open and close outsiders. */
line = outDupe;
while ((word = nextWord(&line)) != NULL)
    {
    touppers(word);
    hashAdd(outOpen, word, NULL);
    safef(buf, sizeof(buf), "/%s", word);
    hashAdd(outClose, buf, NULL);
    }

/* Stream through tags making sure that insiders are
 * at least one deep inside of outsiders. */
for (tag = startTag; tag != NULL; tag = tag->next)
    {
    char *type = tag->name;
    if (hashLookup(outOpen, type ))
        ++depth;
    else if (hashLookup(outClose, type))
        --depth;
    else if (hashLookup(inHash, type))
        {
	if (depth <= 0)
	    {
	    if (!startsWith("<INPUT TYPE=HIDDEN NAME=", tag->start))  // one exception hardwired
		tagAbort(page, tag, "%s outside of any of %s", type, outsiders);
	    }
	}
    }
freeHash(&inHash);
freeHash(&outOpen);
freeHash(&outClose);
freeMem(outDupe);
freeMem(inDupe);
}
Ejemplo n.º 2
0
void makeKnownGeneHashes(int knownDbCount, char **knownDbs)
/* Create hashes containing info on known genes. */
{
    int i;
    knownTextHash = hashNew(18);
    uniProtToKnown = hashNew(18);
    refSeqToKnown = hashNew(18);
    aliasToKnown = hashNew(19);
    nameToKnown = hashNew(18);

    for (i=0; i<knownDbCount; i += 1)
    {
        char *gdb = knownDbs[i];
        struct sqlConnection *conn = sqlConnect(gdb);
        struct sqlResult *sr;
        char **row;


        sr = sqlGetResult(conn, "NOSQLINJ select kgID,geneSymbol,spID,spDisplayID,refseq,description from kgXref");
        while ((row = sqlNextRow(sr)) != NULL)
        {
            char *kgID = cloneString(row[0]);
            touppers(kgID);
            touppers(row[1]);
            hashAdd(nameToKnown, row[1], kgID);
            hashAdd(uniProtToKnown, row[2], kgID);
            hashAdd(uniProtToKnown, row[3], kgID);
            hashAdd(refSeqToKnown, row[4], kgID);
            hashAdd(knownTextHash, kgID, cloneString(row[5]));
        }
        sqlFreeResult(&sr);

        sr = sqlGetResult(conn, "NOSQLINJ select kgID,alias from kgAlias");
        while ((row = sqlNextRow(sr)) != NULL)
        {
            char *upc = cloneString(row[0]);
            touppers(upc);
            hashAdd(aliasToKnown, row[1], upc);
        }
        sqlFreeResult(&sr);

        sr = sqlGetResult(conn, "NOSQLINJ select kgID,alias from kgProtAlias");
        while ((row = sqlNextRow(sr)) != NULL)
        {
            char *upc = cloneString(row[0]);
            touppers(upc);
            hashAdd(aliasToKnown, row[1], upc);
        }
        sqlFreeResult(&sr);
    }
}
Ejemplo n.º 3
0
void readNameOmim(char *fileName, struct nameOmim **retList, struct hash **retNameOmimHash,
	struct hash **retOmimNameHash)
/* Read in file into list and hashes.  Make hash keyed on transcriptId (txOmimHash)
 * and hash keyed on omimId (omimNameHash). */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *nameOmimHash = newHash(0);
struct hash *omimNameHash = newHash(0);
struct nameOmim *list = NULL, *el;
char *row[2];

while (lineFileRow(lf, row))
    {
    AllocVar(el);
    slAddHead(&list, el);
    touppers(row[0]);
    hashAddSaveName(nameOmimHash, row[0], el, &el->name);
    hashAddSaveName(omimNameHash, row[1], el, &el->omimId);
    }
lineFileClose(&lf);
slReverse(&list);
*retList = list;
*retNameOmimHash = nameOmimHash;
*retOmimNameHash = omimNameHash;
}
Ejemplo n.º 4
0
void printGeneText(struct gene *gene, FILE *f)
/* Print extended text associated with gene. */
{
    struct hash *uniqHash = hashNew(8);
    boolean gotSomething = FALSE;

    gotSomething |= printGeneFromHashOrAlias(gene->refSeq, refSeqToKnown, aliasToKnown, uniqHash, f);
    gotSomething |= printGeneFromHashOrAlias(gene->uniProt, uniProtToKnown, aliasToKnown, uniqHash, f);
    gotSomething |= printGeneFromHashOrAlias(gene->genbank, aliasToKnown, aliasToKnown, uniqHash, f);

    if (gene->name[0] != 0)
    {
        char *upcName = cloneString(gene->name);
        struct hashEl *hel;
        touppers(upcName);
        for (hel = hashLookup(nameToKnown, upcName); hel != NULL;
                hel = hashLookupNext(hel))
        {
            char *kgID = hel->val;
            gotSomething = TRUE;
            uniqPrintGene(kgID, uniqHash, f);
        }
        if (!gotSomething)
        {
            printGeneFromHashOrAlias(upcName, aliasToKnown, aliasToKnown, uniqHash, f);
        }
        freeMem(upcName);
    }
    hashFree(&uniqHash);
}
Ejemplo n.º 5
0
void tabPepPred(char *database, int fileCount, char *fileNames[], char *table)
/* Load a tab separated peptide file. */
{
struct hash *uniq = newHash(16);
struct lineFile *lf = lineFileOpen(fileNames[0], TRUE);
char *words[2];

if (fileCount != 1)
    errAbort("Only one file allowed for tab separated peptides");

makeCustomTable(database, table, createString);

printf("Processing %s\n", fileNames[0]);
while (lineFileRow(lf, words))
    {
    char *upperCase;
    if (hashLookupUpperCase(uniq, words[0]) != NULL)
	errAbort("Duplicate (case insensitive) '%s' line %d of %s", words[0], lf->lineIx, lf->fileName);
    upperCase = cloneString(words[0]);
    touppers(upperCase);
    hashAdd(uniq, upperCase, NULL);
    freeMem(upperCase);
    }
lineFileClose(&lf);
printf("Loading %s\n", fileNames[0]);
loadTableFromTabFile(database, table, fileNames[0]);
freeHash(&uniq);
}
Ejemplo n.º 6
0
struct annoRow *aggvIntergenicRow(struct annoGratorGpVar *self, struct variant *variant,
				  boolean *retRJFilterFailed, struct lm *callerLm)
/* If intergenic variants (no overlapping or nearby genes) are to be included in output,
 * make an output row with empty genePred and a gpFx that is empty except for soNumber. */
{
struct annoGrator *gSelf = &(self->grator);
struct annoStreamer *sSelf = &(gSelf->streamer);
char **wordsOut;
lmAllocArray(self->lm, wordsOut, sSelf->numCols);
// Add empty strings for genePred string columns:
int gpColCount = gSelf->mySource->numCols;
int i;
for (i = 0;  i < gpColCount;  i++)
    wordsOut[i] = "";
struct gpFx *intergenicGpFx;
lmAllocVar(self->lm, intergenicGpFx);
intergenicGpFx->allele = firstAltAllele(variant->alleles);
if (isAllNt(intergenicGpFx->allele, strlen(intergenicGpFx->allele)))
    touppers(intergenicGpFx->allele);
intergenicGpFx->soNumber = intergenic_variant;
intergenicGpFx->detailType = none;
aggvStringifyGpFx(&wordsOut[gpColCount], intergenicGpFx, self->lm);
boolean rjFail = (retRJFilterFailed && *retRJFilterFailed);
return annoRowFromStringArray(variant->chrom, variant->chromStart, variant->chromEnd, rjFail,
			      wordsOut, sSelf->numCols, callerLm);
}
Ejemplo n.º 7
0
static void addPrimaryIdsToHash(struct sqlConnection *conn, struct hash *hash,
				char *idField, struct slName *tableList,
				struct lm *lm, char *extraWhere)
/* For each table in tableList, query all idField values and add to hash,
 * id -> uppercased id for case-insensitive matching. */
{
struct slName *table;
struct sqlResult *sr;
char **row;
struct dyString *query = dyStringNew(0);
for (table = tableList;  table != NULL;  table = table->next)
    {
    dyStringClear(query);
    sqlDyStringPrintf(query, "select %s from %s", idField, table->name);
    if (extraWhere != NULL)
	dyStringPrintf(query, " where %s", extraWhere);
    sr = sqlGetResult(conn, query->string);
    while ((row = sqlNextRow(sr)) != NULL)
	{
	if (isNotEmpty(row[0]))
	    {
	    char *origCase = lmCloneString(lm, row[0]);
	    touppers(row[0]);
	    hashAdd(hash, row[0], origCase);
	    }
	}
    sqlFreeResult(&sr);
    }
}
Ejemplo n.º 8
0
static void checkTerm(char *term, char *target, enum dbDbMatchType type, struct dbDb *dbDb,
                      struct hash *matchHash, struct dbDbMatch **pMatchList)
/* If target starts with term (case-insensitive), and target is not already in matchHash,
 * add target to matchHash and add a new match to pMatchList. */
{
// Make uppercase version of target for case-insensitive matching.
int targetLen = strlen(target);
char targetUpcase[targetLen + 1];
safencpy(targetUpcase, sizeof(targetUpcase), target, targetLen);
touppers(targetUpcase);
int offset = wordMatchOffset(term, targetUpcase);
if (offset >= 0)
    {
    addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList);
    }
else if (offset < 0 && type == ddmtSciName && term[0] == targetUpcase[0])
    {
    // For scientific names ("Genus species"), see if the user entered the term as 'G. species'
    // e.g. term 'P. trog' for target 'Pan troglodytes'
    regmatch_t substrArr[3];
    if (regexMatchSubstrNoCase(term, "^[a-z](\\.| ) *([a-z]+)", substrArr, ArraySize(substrArr)))
        {
        char *termSpecies = term + substrArr[2].rm_so;
        char *targetSpecies = skipLeadingSpaces(skipToSpaces(targetUpcase));
        if (targetSpecies && startsWithNoCase(termSpecies, targetSpecies))
            {
            // Keep the negative offset since we can't just bold one chunk of target...
            addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList);
            }
        }
    }
}
void readGbZfin (struct lineFile *gzf)
/* Read in Genbank IDs and ZFIN IDs */
{
char *words[24], *acc = NULL, addAcc[20], *name = NULL, *newAcc = NULL;
struct zfin *gz = NULL;

while (lineFileChopTab(gzf, words) )
    {
    /* copy name and change to upper case */
    name = cloneString(words[1]);
    touppers(name);
    if ((gz = hashFindVal(zfinMarkerHash, name) ) != NULL)
        {
        acc = gz->acc;
        /* add new accession */
        if (acc != NULL)
            {
            safef(addAcc, sizeof(addAcc), ",%s", words[2]);
            newAcc = addSuffix(acc, addAcc);
            gz->acc = cloneString(newAcc);
            }
        else
            gz->acc = cloneString(words[2]);
        /* add structure back to hash */
        addHashElUnique(zfinMarkerHash, name, gz);
        }
    else 
        fprintf(stderr, "The marker, %s, with ZFIN ID, %s, is not found in the mapping panels \n", words[1], words[0]);
    }
}
void *addExtensionAndSearch(char *name, struct hash *hash, boolean alias)
{
char *addName = NULL, *newName = NULL, *nameLower = NULL;
void *result = NULL;
boolean found = FALSE;
int i;

addName = cloneString(name);
if (alias)
    {
    nameLower = cloneString(name);
    touppers(addName);
    }

for (i = 0; (i < NUMEXT) && (!found); i++)
    {
    newName = NULL;
    newName = addSuffix(addName, extensions[i]);
    /* for alias, check the lower case name */
    if (alias && ((result = hashFindVal(hash, newName)) != NULL) )
        found = TRUE;
    /* change name to upper case and check in hash */
    touppers(newName);
    if (!found && (result = hashFindVal(hash, newName)) != NULL)
       found = TRUE;
    else if (!found)
        {
        /* remove the suffix after the last '.' and compare */
        chopSuffix(addName);
        if (alias)
            chopSuffix(nameLower);
        if (!sameString(name, addName) && 
              (result = hashFindVal(hash, addName)) != NULL)
            found = TRUE; 
        else if (alias)  /* check also lower case name */
            {
            if (!sameString(name, nameLower) &&
                (result = hashFindVal(hash, nameLower)) != NULL)
                found = TRUE; 
            }
        }
    }
    if (found)
        return result;
    else
        return NULL;
}
struct dnaSeq *getSkinnySeq(char *sequenceFile, char *chromName)
/* mark deletions with '-' */
{
    char query[512];
    struct sqlConnection *conn = hAllocConn();
    struct sqlResult *sr;
    char **row;

    struct dnaSeq *seq;
    char *seqPtr = NULL;

    int pos = 0;
    int start = 0;
    int end = 0;
    int chromSize = 0;
    int snpCount = 0;

    char *snpChrom = NULL;
    char *rsId = NULL;

    verbose(1, "sequence file = %s\n", sequenceFile);
    verbose(1, "chrom = %s\n", chromName);
    chromSize = hChromSize(chromName);
    verbose(1, "chromSize = %d\n", chromSize);
    seq = hFetchSeq(sequenceFile, chromName, 0, chromSize);
// seq = hLoadChrom(chromName);
    touppers(seq->dna);
    seqPtr = seq->dna;

    sqlSafef(query, sizeof(query), "select chrom, chromStart, chromEnd, name from %s", snpTable);

    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        snpChrom = cloneString(row[0]);
        start = sqlUnsigned(row[1]);
        end = sqlUnsigned(row[2]);
        rsId = cloneString(row[3]);

        if (!sameString(snpChrom, chromName)) continue;

        assert (end < chromSize);
        assert (end > start);

        snpCount++;

        for (pos = start; pos < end; pos++)
            seqPtr[pos] = '-';
    }
    sqlFreeResult(&sr);
    hFreeConn(&conn);

    if (snpCount == 0)
        verbose(1, "no matching SNPs\n");

    return seq;
}
Ejemplo n.º 12
0
struct hashEl *hashLookupUpperCase(struct hash *hash, char *name)
/* Lookup upper cased name in hash. (Assumes all elements of hash
 * are themselves already in upper case.) */
{
char s[256];
safef(s, sizeof(s), "%s", name);
touppers(s);
return hashLookup(hash, s);
}
char *splatAliBasesOnly(char *aligned)
/* Strip out ^ and - chars, and upper case everything.  FreeMem result when done. */
{
char *bases = cloneString(aligned);
stripChar(bases, '-');
stripChar(bases, '^');
touppers(bases);
return bases;
}
static char *stripSpacesEtc(char *s)
/* Return a copy of s with spaces, periods, and dashes removed */
{
char *d = cloneString(s);
stripChar(d, ' ');
stripChar(d, '.');
stripChar(d, '-');
touppers(d);
return d;
}
Ejemplo n.º 15
0
struct hash *tableToAliasHash(struct sqlConnection *conn, char *table,
	char *query)
/* Create hash of true name keyed by alias */
{
struct sqlResult *sr;
char **row;
struct hash *hash = hashNew(19);
char buf[256];
sqlSafef(buf, sizeof(buf), query, table);
sr = sqlGetResult(conn, buf);
while ((row = sqlNextRow(sr)) != NULL)
    {
    touppers(row[0]);
    touppers(row[1]);
    hashAdd(hash, row[0], lmCloneString(hash->lm, row[1]));
    }
sqlFreeResult(&sr);
return hash;
}
Ejemplo n.º 16
0
double oligoTm(char *dna, double DNA_nM, double K_mM)
/* Calculate melting point of short DNA sequence given DNA concentration in 
 * nanomoles, and salt concentration in millimoles.  This is calculated using eqn
 * (ii) in Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, no 21, page
 * 6410, with tables of nearest-neighbor thermodynamics for DNA bases as
 * provided in Breslauer, Frank, Bloecker, and Markey,
 * Proc. Natl. Acad. Sci. USA, vol 83, page 3748. */
{
    register int dh = 0, ds = 108;
    register char c;
    char *dupe = cloneString(dna);
    char *s = dupe;
    double delta_H, delta_S;

    touppers(s);
    /* Use a finite-state machine (DFA) to calucluate dh and ds for s. */
    c = *s; s++;
    if (c == 'A') goto A_STATE;
    else if (c == 'G') goto G_STATE;
    else if (c == 'T') goto T_STATE;
    else if (c == 'C') goto C_STATE;
    else if (c == 'N') goto N_STATE;
    else goto ERROR;
    STATE(A);
    STATE(T);
    STATE(G);
    STATE(C);
    STATE(N);

    DONE:  /* dh and ds are now computed for the given sequence. */
    delta_H = dh * -100.0;  /* 
			     * Nearest-neighbor thermodynamic values for dh
			     * are given in 100 cal/mol of interaction.
			     */
    delta_S = ds * -0.1;     /*
			      * Nearest-neighbor thermodynamic values for ds
			      * are in in .1 cal/K per mol of interaction.
			      */

    /* 
     * See Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, no 21,
     * page 6410, eqn (ii).
     */
    freeMem(dupe);
    return delta_H / (delta_S + 1.987 * log(DNA_nM/4000000000.0))
	- 273.15 + 16.6 * log10(K_mM/1000.0);

    ERROR:  /* 
	  * length of s was less than 2 or there was an illegal character in
	  * s.
	  */
    freeMem(dupe);
    errAbort("Not a valid oligo in oligoTm.");
    return 0;
}
Ejemplo n.º 17
0
boolean mahoneyNameAgrees(char *mName, char *name)
/* Return TRUE if mahoney name agrees with name. */
{
if (mName == NULL || mName[0] == 0 || name == NULL || name[0] == 0)
    return FALSE;
else
    {
    char *mNameDupe = cloneString(mName);
    char *nameDupe = cloneString(name);
    char *s, *e;
    boolean match = FALSE;

    touppers(mNameDupe);
    touppers(nameDupe);
    stripChar(mNameDupe, ' ');
    stripChar(mNameDupe, '-');
    stripChar(mNameDupe, '.');
    stripChar(nameDupe, ' ');
    stripChar(nameDupe, '-');
    stripChar(mNameDupe, '.');
    verbose(2, "mahoneyNameAgrees %s (%s) %s (%s)", mName, mNameDupe, name, nameDupe);

    s = mNameDupe;
    while (s != NULL && s[0] != 0)
	{
	e = strchr(s, '/');
	if (e != NULL)
	   *e++ = 0;
	if (sameString(s, nameDupe))
	    {
	    match = TRUE;
	    break;
	    }
	s = e;
	}

    verbose(2, "  matches %d\n", match);
    freeMem(mNameDupe);
    freeMem(nameDupe);
    return match;
    }
}
Ejemplo n.º 18
0
void motifLogoAndMatrix(struct dnaSeq **seqs, int count, struct dnaMotif *motif)
/* Print out motif sequence logo and text (possibly with multiple occurences) */
{
// Detect inconsistent motif/pwm tables and suppress confusing display
if (motif != NULL)
    {
    if (seqs != NULL && motif->columnCount != seqs[0]->size)
        {
        warn("Motif seq length doesn't match PWM\n");
        return;
        }
    }
#define MOTIF_HELP_PAGE "../goldenPath/help/hgRegMotifHelp.html"
printf("<PRE>\n");
printf("<table>\n");
if (motif != NULL)
    {
    struct tempName pngTn;
    dnaMotifMakeProbabalistic(motif);
    makeTempName(&pngTn, "logo", ".png");
    dnaMotifToLogoPng(motif, 47, 140, NULL, "../trash", pngTn.forCgi);
    printf("<tr><td></td><td colspan='%d'align=right><a href=\"%s\" target=_blank>Motif display help</a></td></tr>", 
        motif->columnCount, MOTIF_HELP_PAGE);
    printf("<tr><td></td><td colspan='%d'>", motif->columnCount);
    printf("<IMG SRC=\"%s\" BORDER=1>", pngTn.forHtml);
    printf("</td><td></td></tr>\n");
    }
if (count > 0)
    {
    int i;
    for (i = 0; i < count; i++)
        {
        struct dnaSeq *seq = seqs[i];
        printf("<tr><td></td>");
        touppers(seq->dna);
        printDnaCells(seq->dna, seq->size);
        if (count == 1)
            printf("<td>this occurrence</td></tr>\n");
        else
            // is there a library routine to get 1st, 2nd ...?
            printf("<td>occurrence #%d</td></tr>\n", i + 1);
        }
    }
if (motif != NULL)
    {
    printf("<tr><td></td>");
    printConsensus(motif);
    printf("<td>motif consensus</td></tr>\n");
    dnaMotifPrintProbTable(motif, stdout);
    }
printf("</table>\n");
printf("</PRE>");
}
Ejemplo n.º 19
0
static char *getSearchTermUpperCase()
/* If we don't have the SEARCH_TERM cgi param, exit with an HTTP Bad Request response.
 * If we do, convert it to upper case for case-insensitive matching and return it. */
{
pushAbortHandler(htmlVaBadRequestAbort);
char *term = cgiOptionalString(SEARCH_TERM);
touppers(term);
if (isEmpty(term))
    errAbort("Missing required CGI parameter %s", SEARCH_TERM);
popAbortHandler();
return term;
}
Ejemplo n.º 20
0
int main(int argc, char *argv[])
{
char *database;
char *outFn;
struct dnaSeq *seq;

struct sqlConnection *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;

if (argc != 4) usage();

database = argv[1];
conn2= hAllocConn(database);

outFn   = argv[2];
outf    = mustOpen(outFn, "w");

tgtChrom = argv[3];

sqlSafef(query2, sizeof query2, "select secStr, name, chrom, chromStart, chromEnd, strand from evofold where chrom='%s'", tgtChrom);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    secStr   	= row2[0];
    id  	= row2[1];
    chrom 	= row2[2];
    chromStart 	= atoi(row2[3]);
    chromEnd   	= atoi(row2[4]);
    strand     	= *row2[5];
    seq = hChromSeq(database, chrom, chromStart, chromEnd);
    touppers(seq->dna);
    if (strand == '-')
        reverseComplement(seq->dna, seq->size);

    memSwapChar(seq->dna, seq->size, 'T', 'U');

    safef(javaCmd, sizeof(javaCmd),
       "java -cp VARNAv3-7.jar fr.orsay.lri.varna.applications.VARNAcmd -sequenceDBN %s -structureDBN '%s' -o evoFold/%s/%s.png",
          seq->dna,  secStr, chrom, id);
    
    fprintf(outf, "%s\n", javaCmd);

    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

fclose(outf);
hFreeConn(&conn2);
return(0);
}
Ejemplo n.º 21
0
void oneGenieFile(char *fileName, struct hash *uniq, FILE *f)
/* Process one genie peptide prediction file into known and alt tab files. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *trans;
boolean skip = FALSE;

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", fileName);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", fileName);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	trans = firstWordInLine(line+1);
	if (abbr != NULL && startsWith(abbr, trans))
	    trans += strlen(abbr);
	if (hashLookupUpperCase(uniq, trans) != NULL)
	    {
	    warn("Duplicate (case insensitive) '%s' line %d of %s. Ignoring all but first.", trans, lf->lineIx, lf->fileName);
	    skip = TRUE;
	    }
	else
	    {
	    char *upperCase;
	    upperCase = cloneString(trans);
	    touppers(upperCase);
	    hashAdd(uniq, upperCase, NULL);
	    freeMem(upperCase);
	    fprintf(f, "%s\t", trans);
	    skip = FALSE;
	    }
	}
    else if (!skip)
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Ejemplo n.º 22
0
void htmlPageValidateOrAbort(struct htmlPage *page)
/* Do some basic validations.  Aborts if there is a problem. */
{
struct htmlTag *tag;
boolean gotTitle = FALSE;
char *contentType = NULL;

if (page == NULL)
    errAbort("Can't validate NULL page");
if (page->header != NULL)
    contentType = hashFindVal(page->header, "Content-Type:");
if (contentType == NULL || startsWith("text/html", contentType))
    {
    /* To simplify things upper case all tag names. */
    for (tag = page->tags; tag != NULL; tag = tag->next)
	touppers(tag->name);

    checkExactlyOne(page->tags, "BODY");

    /* Validate header, and make a suggestion or two */
    if ((tag = page->tags) == NULL)
	errAbort("No tags");
    if (!sameWord(tag->name, "HTML"))
	errAbort("Doesn't start with <HTML> tag");
    tag = tag->next;
    if (tag == NULL || !sameWord(tag->name, "HEAD"))
	warn("<HEAD> tag does not follow <HTML> tag");
    else
	{
	for (;;)
	    {
	    tag = tag->next;
	    if (tag == NULL)
		errAbort("Missing </HEAD>");
	    if (sameWord(tag->name, "TITLE"))
		gotTitle = TRUE;
	    if (sameWord(tag->name, "/HEAD"))
		break;
	    }
	if (!gotTitle)
	    warn("No title in <HEAD>");
	validateNestingTags(page, page->tags, tag, headNesters, ArraySize(headNesters));
	tag = tag->next;
	}
    if (tag == NULL || !sameWord(tag->name, "BODY"))
	errAbort("<BODY> tag does not follow <HTML> tag");
    tag = validateBody(page, tag->next);
    if (tag == NULL || !sameWord(tag->name, "/HTML"))
	errAbort("Missing </HTML>");
    validateCgiUrls(page);
    }
}
Ejemplo n.º 23
0
void saveEntities(struct dlList *entList, char *dir, char *prefix, char *chrom)
/* Write out list of entities to a file. */
{
char fileName[512];
FILE *f;
struct dlNode *node;
struct entity *ent;
static int entCount = 0;
struct intron *intron;
int igStart, igEnd, igCount;
char *source = "genieCon";
char upcChrom[16];

strcpy(upcChrom, chrom);
touppers(upcChrom);

sprintf(fileName, "%s/%s%s.gff", dir, prefix, upcChrom);
f = mustOpen(fileName, "w");
for (node = entList->head; node->next != NULL; node = node->next)
    {
    ent = node->val;
    ++entCount;
    fprintf(f, "%s\t%s\tcdnaCluster\t%d\t%d\t%d\t%c\t.\tgc%d\n",
        chrom, source, ent->start+1, ent->end, slCount(ent->cdaRefList), ent->strand, entCount);
    for (intron = ent->intronList; intron != NULL; intron = intron->next)
        {
        char *startType, *endType;
        if (ent->strand == '+')
            {
            startType = "splice5";
            endType = "splice3";
            }
        else
            {
            startType = "splice3";
            endType = "splice5";
            }
        fprintf(f, "%s\t%s\t%s\t%d\t%d\t.\t%c\t.\tgc%d\n",
            chrom, source, startType, intron->start, intron->start+1, ent->strand, entCount);
        fprintf(f, "%s\t%s\tintron\t%d\t%d\t.\t%c\t.\tgc%d\n",
            chrom, source, intron->start+1, intron->end, ent->strand, entCount);
        fprintf(f, "%s\t%s\t%s\t%d\t%d\t.\t%c\t.\tgc%d\n",
            chrom, source, endType, intron->end, intron->end+1, ent->strand, entCount);
        }
    if (findIgRegion(ent, &igStart, &igEnd, &igCount))
        {
        fprintf(f, "%s\t%s\tIG\t%d\t%d\t%d\t%c\t.\tafter_gc%d\n",
            chrom, source, igStart+1, igEnd, igCount, ent->strand, entCount);
        }
    }        
fclose(f);
}
Ejemplo n.º 24
0
bool rnaPair(char a, char b)
/* Returns TRUE if a and b can pair, and false otherwise */
{
char pair[] = {a,b,'\0'};
int i;
dna2rna(pair);
touppers(pair);

for (i=0;RNA_PAIRS[i] != 0; i++)
    if (pair[0] == RNA_PAIRS[i][0] && pair[1] == RNA_PAIRS[i][1] )
	return TRUE;
return FALSE;
}
Ejemplo n.º 25
0
void genericOne(char *fileName, struct hash *uniq, FILE *f)
/* Process one ensemble peptide prediction file into tab delimited
 * output f, using uniq hash to make sure no dupes. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *trans, transBuf[128];

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", fileName);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", fileName);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	char *upperCase;
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	trans = firstWordInLine(line+1);
	if (abbr != NULL && startsWith(abbr, trans))
	    trans += strlen(abbr);
        if (suffix != NULL)
            {
            safef(transBuf, sizeof(transBuf), "%s%s", trans, suffix);
            trans = transBuf;
            }
	if (hashLookupUpperCase(uniq, trans) != NULL)
	    errAbort("Duplicate (case insensitive) '%s' line %d of %s", trans, lf->lineIx, lf->fileName);
	upperCase = cloneString(trans);
	touppers(upperCase);
	hashAdd(uniq, upperCase, NULL);
	freeMem(upperCase);
	fprintf(f, "%s\t", trans);
	}
    else
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Ejemplo n.º 26
0
struct gpFx *gpFxNew(char *allele, char *transcript, enum soTerm soNumber,
		     enum detailType detailType, struct lm *lm)
/* Fill in the common members of gpFx; leave soTerm-specific members for caller to fill in. */
{
struct gpFx *effect;
lmAllocVar(lm, effect);
effect->allele = collapseDashes(lmCloneString(lm, allele));
if (isAllNt(effect->allele, strlen(effect->allele)))
    touppers(effect->allele);
effect->transcript = lmCloneString(lm, transcript);
effect->soNumber = soNumber;
effect->detailType = detailType;
return effect;
}
Ejemplo n.º 27
0
struct hash *hashMahoneys(struct mahoney *list)
/* Put list of mahoneys into hash keyed by mahoney id. */
{ 
struct hash *hash = hashNew(0);
struct mahoney *el;
for (el = list; el != NULL; el = el->next)
    {
    char hex[8];
    touppers(el->genbank);
    safef(hex, sizeof(hex), "%x", el->mtf);
    hashAdd(hash, hex, el);
    }
return hash;
}
Ejemplo n.º 28
0
void doFetch(char *inputFileName, char *sequenceFileName, char *outputFileName)
/* lookup sequence for each line */
{
struct lineFile *lf = NULL;
char *line;
char *row[6];
int elementCount;
struct twoBitFile *tbf;

char *fileChrom = NULL;
int start = 0;
int end = 0;
char *name = NULL;
int score = 0;
char *strand = NULL;

struct dnaSeq *chunk = NULL;

FILE *outputFileHandle = mustOpen(outputFileName, "w");

tbf = twoBitOpen(sequenceFileName);

lf = lineFileOpen(inputFileName, TRUE);
while (lineFileNext(lf, &line, NULL))
    {
    elementCount = chopString(line, "\t", row, ArraySize(row));
    if (elementCount != 6) continue;

    fileChrom = cloneString(row[0]);
    start = sqlUnsigned(row[1]);
    end = sqlUnsigned(row[2]);
    name = cloneString(row[3]);
    score = sqlUnsigned(row[4]);
    strand = cloneString(row[5]);

    if (start == end) continue;
    assert (end > start);

    chunk = twoBitReadSeqFrag(tbf, fileChrom, start, end);
    touppers(chunk->dna);
    if (sameString(strand, "-"))
        reverseComplement(chunk->dna, chunk->size);
    fprintf(outputFileHandle, "%s\t%d\t%d\t%s\t%d\t%s\t%s\n", fileChrom, start, end, name, score, strand, chunk->dna);
    dnaSeqFree(&chunk);
    }

lineFileClose(&lf);
carefulClose(&outputFileHandle);
}
Ejemplo n.º 29
0
void oneEnsFile(char *ensFile, struct hash *uniq, struct hash *pToT, FILE *f)
/* Process one ensemble peptide prediction file into tab delimited
 * output f, using uniq hash to make sure no dupes. */
{
struct lineFile *lf = lineFileOpen(ensFile, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *translation;

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", ensFile);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", ensFile);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	char *upperCase;
	char *transcript;
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	translation = findEnsTrans(lf, line);
	if (hashLookupUpperCase(uniq, translation) != NULL)
	    errAbort("Duplicate (case insensitive) '%s' line %d of %s", translation, lf->lineIx, lf->fileName);
	upperCase = cloneString(translation);
	touppers(upperCase);
	hashAdd(uniq, upperCase, NULL);
	freeMem(upperCase);
	transcript = hashFindVal(pToT, translation);
	if (transcript == NULL)
	    errAbort("Can't find transcript for %s", translation);
	fprintf(f, "%s\t", transcript);
	}
    else
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Ejemplo n.º 30
0
static struct hash *upcHashWordsInFile(char *fileName, int hashSize)
/* Create a hash of space delimited uppercased words in file. */
{
struct hash *hash = newHash(hashSize);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line, *word;
while (lineFileNext(lf, &line, NULL))
    {
    while ((word = nextQuotedWord(&line)) != NULL)
	{
	touppers(word);
        hashAdd(hash, word, NULL);
	}
    }
lineFileClose(&lf);
return hash;
}