Example #1
0
static char *boldTerm(char *target, char *term, int offset, enum dbDbMatchType type)
/* Return a string with <b>term</b> swapped in for term at offset.
 * If offset is negative and type is ddmtSciName, treat term as an abbreviated species
 * name (term = "G. species" vs. target = "Genus species"): bold the first letter of the
 * genus and the matching portion of the species. */
{
int termLen = strlen(term);
int targetLen = strlen(target);
if (offset + termLen > targetLen)
    errAbort("boldTerm: invalid offset (%d) for term '%s' (length %d) in target '%s' (length %d)",
             offset, term, termLen, target, targetLen);
else if (offset < 0 && type != ddmtSciName)
    errAbort("boldTerm: negative offset (%d) given for type %d", offset, type);
// Allocate enough to have two bolded chunks:
int resultSize = targetLen + 2*strlen("<b></b>") + 1;
char result[resultSize];
char *p = result;
int size = sizeof(result);
if (offset >= 0)
    {
    // The part of target before the term:
    safeAddN(&p, &size, target, offset);
    // The bolded term:
    safeAdd(&p, &size, "<b>");
    safeAddN(&p, &size, target+offset, termLen);
    safeAdd(&p, &size, "</b>");
    // The rest of the target after the term:
    safeAdd(&p, &size, target+offset+termLen);
    // Accounting tweak -- we allocate enough for two bolded chunks, but use only one here:
    size -= strlen("<b></b>");
    }
else
    {
    // Term is abbreviated scientific name -- bold the first letter of the genus:
    safeAdd(&p, &size, "<b>");
    safeAddN(&p, &size, target, 1);
    safeAdd(&p, &size, "</b>");
    // add the rest of the genus:
    char *targetSpecies = skipLeadingSpaces(skipToSpaces(target));
    int targetOffset = targetSpecies - target;
    safeAddN(&p, &size, target+1, targetOffset-1);
    // bold the matching portion of the species:
    char *termSpecies = skipLeadingSpaces(skipToSpaces(term));
    termLen = strlen(termSpecies);
    safeAdd(&p, &size, "<b>");
    safeAddN(&p, &size, targetSpecies, termLen);
    safeAdd(&p, &size, "</b>");
    // add the rest of the species:
    safeAdd(&p, &size, targetSpecies+termLen); 
    }
if (*p != '\0' || size != 1)
    errAbort("boldTerm: bad arithmetic (size is %d, *p is '%c')", size, *p);
return cloneStringZ(result, resultSize);
}
Example #2
0
struct slName *stringToSlNames(char *string)
/* Convert string to a list of slNames separated by
 * white space, but allowing multiple words in quotes.
 * Quotes if any are stripped.  */
{
struct slName *list = NULL, *name;
char *dupe = cloneString(string);
char c, *s = dupe, *e;

for (;;)
    {
    if ((s = skipLeadingSpaces(s)) == NULL)
        break;
    if ((c = *s) == 0)
        break;
    if (c == '\'' || c == '"')
        {
	if (!parseQuotedString(s, s, &e))
	    errAbort("missing closing %c in %s", c, string);
	}
    else
        {
	e = skipToSpaces(s);
	if (e != NULL) *e++ = 0;
	}
    name = slNameNew(s);
    slAddHead(&list, name);
    s = e;
    }
freeMem(dupe);
slReverse(&list);
return list;
}
Example #3
0
struct htmlStatus *htmlStatusParse(char **pText)
/* Read in status from first line.  Update pText to point to next line. 
 * Note unlike many routines here, this does not insert zeros into text. */
{
char *text = *pText;
char *end = strchr(text, '\n');
struct htmlStatus *status;
if (end != NULL)
   *pText = end+1;
else
   *pText = text + strlen(text);
end = skipToSpaces(text);
if (end == NULL)
    {
    warn("Short status line.");
    return NULL;
    }
AllocVar(status);
status->version = cloneStringZ(text, end-text);
end = skipLeadingSpaces(end);
if (!isdigit(end[0]))
    {
    warn("Not a number in status field");
    return NULL;
    }
status->status = atoi(end);
return status;
}
void gsToUcsc(char *gsName, char *ucscName)
/* Convert from 
 *    AC020585.5~1.2 Fragment 2 of 29 (AC020585.5:1..1195)
 * to
 *    AC020585.5_1_2
 */
{
char *s, *e, *d;
int size;

/* Copy in accession and version. */
d = ucscName;
s = gsName;
e = strchr(s, '~');
if (e == NULL)
    errAbort("Expecting . in %s", gsName);
size = e - s;
memcpy(d, s, size);
d += size;

/* Skip over tilde and replace it with _ */
s = e+1;
*d++ = '_';

e = skipToSpaces(s);
if (e == NULL)
    e = s + strlen(s);
size = e - s;
memcpy(d, s, size);
d[size] = 0;
subChar(d, '.', '_');
return;
}
void recNameToFileName(char *dir, char *recName, char *fileName, char *suffix)
/* Convert UCSC style fragment name to name of file for a clone. */
{
char *e;
char *d = fileName;
int size;


/* Start file name with directory if any. */
if (dir != NULL)
    {
    size = strlen(dir);
    memcpy(d, dir, size);
    d += size;
    if (dir[size-1] != '/')
	*d++ = '/';
    }
if (*recName == '>')
    ++recName;
recName = skipLeadingSpaces(recName);
e = strchr(recName, '.');
if (e == NULL)
    e = skipToSpaces(recName);
if (e == NULL)
    e = recName + strlen(recName);
size = e - recName;
memcpy(d, recName, size);
d += size;
strcpy(d, suffix);
}
Example #6
0
static void checkTerm(char *term, char *target, enum dbDbMatchType type, struct dbDb *dbDb,
                      struct hash *matchHash, struct dbDbMatch **pMatchList)
/* If target starts with term (case-insensitive), and target is not already in matchHash,
 * add target to matchHash and add a new match to pMatchList. */
{
// Make uppercase version of target for case-insensitive matching.
int targetLen = strlen(target);
char targetUpcase[targetLen + 1];
safencpy(targetUpcase, sizeof(targetUpcase), target, targetLen);
touppers(targetUpcase);
int offset = wordMatchOffset(term, targetUpcase);
if (offset >= 0)
    {
    addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList);
    }
else if (offset < 0 && type == ddmtSciName && term[0] == targetUpcase[0])
    {
    // For scientific names ("Genus species"), see if the user entered the term as 'G. species'
    // e.g. term 'P. trog' for target 'Pan troglodytes'
    regmatch_t substrArr[3];
    if (regexMatchSubstrNoCase(term, "^[a-z](\\.| ) *([a-z]+)", substrArr, ArraySize(substrArr)))
        {
        char *termSpecies = term + substrArr[2].rm_so;
        char *targetSpecies = skipLeadingSpaces(skipToSpaces(targetUpcase));
        if (targetSpecies && startsWithNoCase(termSpecies, targetSpecies))
            {
            // Keep the negative offset since we can't just bold one chunk of target...
            addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList);
            }
        }
    }
}
Example #7
0
static void appendFirstWord(struct dyString *buf, char *str)
/* append the first white-spaced word from str */
{
char *end = skipToSpaces(str);
if (end == NULL)
    end = str + strlen(str);
dyStringAppendN(buf, str, (end - str));
}
Example #8
0
char *lmCloneFirstWord(struct lm *lm, char *line)
/* Clone first word in line */
{
char *startFirstWord = skipLeadingSpaces(line);
if (startFirstWord == NULL)
    return NULL;
char *endFirstWord = skipToSpaces(startFirstWord);
if (endFirstWord == NULL)
    return lmCloneString(lm, startFirstWord);
else
    return lmCloneStringZ(lm, startFirstWord, endFirstWord - startFirstWord);
}
char *skipWord(char *fw)
/* skips over current word to start of next. 
 * Error for this not to exist. */
{
char *s;
s = skipToSpaces(fw);
if (s == NULL)
    errAbort("Expecting two words in .ra file line %s\n", fw);
s = skipLeadingSpaces(s);
if (s == NULL)
    errAbort("Expecting two words in .ra file line %s\n", fw);
return s;
}
boolean gbFaReadNext(struct gbFa *fa)
/* read the next fasta record header. The sequence is not read until
 * gbFaGetSeq is called */
{
boolean atBOLN = TRUE; /* always stops after a line */
char c, *next;
unsigned iHdr = 0, hdrCap = fa->headerCap;
off_t off = fa->off;

fa->seq = NULL;

/* find next header */
while (((c = getc_unlocked(fa->fh)) != EOF) && !((c == '>') && atBOLN))
    {
    off++;
    atBOLN = (c == '\n');
    }
fa->recOff = off; /* offset of '>' */
fa->off = ++off; /* count '>' */
if (c == EOF)
    return FALSE;

/* read header */
while ((c = getc_unlocked(fa->fh)) != EOF)
    {
    off++;
    if (iHdr == hdrCap)
        hdrCap = expandHeader(fa);
    fa->headerBuf[iHdr++] = c;
    if (c == '\n')
        break; /* got it */
}
fa->off = off;
if (c == EOF)
    errAbort("premature EOF in %s", fa->fileName);
fa->headerBuf[iHdr-1] = '\0';  /* wack newline */

next = fa->headerBuf;
fa->id = next;
next = skipToSpaces(next);
if (next != NULL)
    {
    *next++ = '\0';
    fa->comment = trimSpaces(next);
    }
else
    fa->comment = "";  /* empty string */
return TRUE;
}
Example #11
0
static void parseHeader(struct gff3File *g3f)
/* parse and validate a GFF3 header */
{
char *line;
if (!lineFileNext(g3f->lf, &line, NULL))
    gff3FileErr(g3f, "empty GFF file, must have header");
char *ver = skipToSpaces(line);
if (*ver != '\0')
    {
    *ver++ = '\0';
    ver = trimSpaces(ver);
    }
if (!(sameString(line, "##gff-version") && sameString(ver, "3")))
    gff3FileErr(g3f, "invalid GFF3 header");
}
Example #12
0
char *lmCloneSomeWord(struct lm *lm, char *line, int wordIx)
/* Return a clone of the given space-delimited word within line.  Returns NULL if
 * not that many words in line. */
{
if (wordIx < 0)
    return NULL;
int i;
for (i=0; i<wordIx; ++i)
    {
    line = skipLeadingSpaces(line);
    line = skipToSpaces(line);
    if (line == NULL)
        return NULL;
    }
return lmCloneFirstWord(lm, line);
}
Example #13
0
struct slInt *tabRowGuessFixedOffsets(struct slName *lineList, char *fileName)
/* Return our best guess list of starting positions for space-padded fixed
 * width fields. */
{
struct slInt *offList = NULL, *off;

if (lineList)
    {
    char *spaceRec = cloneString(lineList->name), *s;
    int lineSize = strlen(spaceRec);
    struct slName *line;
    int lineIx=1;

    /* First 'or' together all lines into spaceRec, which will
     * have a space wherever all columns of all lines are space and
     * non-space elsewhere. */
    for (line = lineList->next; line != NULL; line = line->next, ++lineIx)
        {
	int i;
	s = line->name;
	if (strlen(s) != lineSize)
	   errAbort("Line %d of %s has %lu chars, but first line has just %d",
	       lineIx, fileName, (unsigned long)strlen(s), lineSize);
	for (i=0; i<lineSize; ++i)
	    {
	    if (s[i] != ' ')
	        spaceRec[i] = 'X';
	    }
	}

    /* Now make up slInt list that describes where words begin */
    s = spaceRec;
    for (;;)
        {
	s = skipLeadingSpaces(s);
	if (s == NULL || s[0] == 0)
	    break;
	AllocVar(off);
	off->val = s - spaceRec;
	slAddHead(&offList, off);
	s = skipToSpaces(s);
	}
    slReverse(&offList);
    }
return offList;
}
boolean matchName(char *seqHeader)
/* see if the sequence name matches */
{
    /* find end of name */
    char *nameSep = skipToSpaces(seqHeader);
    char sepChr = '\0';
    boolean isMatch = FALSE;

    if (nameSep != NULL)
    {
        sepChr = *nameSep; /* terminate name */
        *nameSep = '\0';
    }
    isMatch = wildMatch(namePat, seqHeader);
    if (nameSep != NULL)
        *nameSep = sepChr;
    return isMatch;
}
Example #15
0
struct hash *hashThisEqThatLine(char *line, int lineIx, boolean firstStartsWithLetter)
/* Return a symbol table from a line of form:
 *   1-this1=val1 2-this='quoted val2' var3="another val" 
 * If firstStartsWithLetter is true, then the left side of the equals must start with
 * a letter. */
{
char *dupe = cloneString(line);
char *s = dupe, c;
char *var, *val;
struct hash *hash = newHash(8);

for (;;)
    {
    if ((var = skipLeadingSpaces(s)) == NULL)
        break;

    if ((c = *var) == 0)
        break;
    if (firstStartsWithLetter && !isalpha(c))
	errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var);
    val = strchr(var, '=');
    if (val == NULL)
        {
        errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line);
        }
    *val++ = 0;
    c = *val;
    if (c == '\'' || c == '"')
        {
	if (!parseQuotedString(val, val, &s))
	    errAbort("line %d of input: missing closing %c", lineIx, c);
	}
    else
	{
	s = skipToSpaces(val);
	if (s != NULL) *s++ = 0;
	}
    hashAdd(hash, var, cloneString(val));
    }
freez(&dupe);
return hash;
}
Example #16
0
struct hash *hashVarLine(char *line, int lineIx)
/* Return a symbol table from a line of form:
 *   var1=val1 var2='quoted val2' var3="another val" */
{
char *dupe = cloneString(line);
char *s = dupe, c;
char *var, *val;
struct hash *hash = newHash(8);

for (;;)
    {
    if ((var = skipLeadingSpaces(s)) == NULL)
        break;

    if ((c = *var) == 0)
        break;
    if (!isalpha(c))
	errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var);
    val = strchr(var, '=');
    if (val == NULL)
        {
        errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line);
        }
    *val++ = 0;
    c = *val;
    if (c == '\'' || c == '"')
        {
	if (!parseQuotedString(val, val, &s))
	    errAbort("line %d of input: missing closing %c", lineIx, c);
	}
    else
	{
	s = skipToSpaces(val);
	if (s != NULL) *s++ = 0;
	}
    hashAdd(hash, var, cloneString(val));
    }
freez(&dupe);
return hash;
}
Example #17
0
static void addXrefIdsToHash(struct sqlConnection *conn, struct hash *hash,
			     char *idField, char *xrefTable, char *xrefIdField,
			     char *aliasField, struct lm *lm, char *extraWhere)
/* Query all id-alias pairs from xrefTable (where id actually appears
 * in curTable) and hash alias -> id.  Convert alias to upper case for
 * case-insensitive matching.
 * Ignore self (alias = id) mappings -- we already got those above. */
{
struct sqlResult *sr;
char **row;
struct dyString *query = dyStringNew(0);
if (sameString(xrefTable, curTable))
    sqlDyStringPrintf(query, "select %s,%s from %s", aliasField, xrefIdField, xrefTable);
else
    /* Get only the aliases for items actually in curTable.idField: */
    sqlDyStringPrintf(query,
	  "select %s.%s,%s.%s from %s,%s where %s.%s = %s.%s",
	  xrefTable, aliasField, xrefTable, xrefIdField,
	  xrefTable, curTable,
	  xrefTable, xrefIdField, curTable, idField);
if (extraWhere != NULL)
    // extraWhere begins w/ID field of curTable=xrefTable.  Skip that field name and
    // use "xrefTable.aliasField" with the IN (...) condition that follows:
    sqlDyStringPrintf(query, " %s %s.%s %-s",
		   (sameString(xrefTable, curTable) ? "where" : "and"),
		   xrefTable, aliasField, skipToSpaces(extraWhere));
sr = sqlGetResult(conn, query->string);
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (sameString(row[0], row[1]))
	continue;
    touppers(row[0]);
    hashAdd(hash, row[0], lmCloneString(lm, row[1]));
    }
sqlFreeResult(&sr);
}
Example #18
0
static bioSeq *nextSeqFromMem(char **pText, boolean isDna, boolean doFilter)
/* Convert fa in memory to bioSeq.  Update *pText to point to next
 * record.  Returns NULL when no more sequences left. */
{
char *name = "";
char *s, *d;
struct dnaSeq *seq;
int size = 0;
char c;
char *filter = (isDna ? ntChars : aaChars);
char *text = *pText;
char *p = skipLeadingSpaces(text);
if (p == NULL)
    return NULL;
dnaUtilOpen();
if (*p == '>')
    {
    char *end;
    s = strchr(p, '\n');
    if (s != NULL) ++s;
    name = skipLeadingSpaces(p+1);
    end = skipToSpaces(name);
    if (end >= s || name >= s)
        errAbort("No name in line starting with '>'");
    if (end != NULL)
        *end = 0;
    }
else
    {
    s = p; 
    if (s == NULL || s[0] == 0)
        return NULL;
    }
name = cloneString(name);
    
d = text;
if (s != NULL)
    {
    for (;;)
	{
	c = *s;
	if (c == 0 || c == '>')
	    break;
	++s;
	if (!isalpha(c))
	    continue;
	if (doFilter)
	    {
	    if ((c = filter[(int)c]) == 0) 
		{
		if (isDna)
		    c = 'n';
		else
		    c = 'X';
		}
	    }
	d[size++] = c;
	}
    }
d[size] = 0;

/* Put sequence into our little sequence structure. */
AllocVar(seq);
seq->name = name;
seq->dna = text;
seq->size = size;
*pText = s;
return seq;
}
Example #19
0
void fixLine(struct lineFile *lf, char *line, FILE *f)
/* Fix up a single line. */
{
char *group;            /* Last. */
char *words[8];		/* First words. */
int i;
char fixStart[16], fixEnd[16];
char *type, *strand;

/* Pass through comments. */
if (line[0] == '#')
    {
    fprintf(f, "%s\n", line);
    return;
    }

/* Find the start of the "group" field. */
group = line;
for (i=0; i<8; ++i)
    {
    group = skipToSpaces(group);
    if (group == NULL)
       errAbort("Expecting at least 9 fields line %d of %s\n", lf->lineIx, lf->fileName);
    group = skipLeadingSpaces(group);
    }

/* Truncate initial string before group field and chop it up. */
group[-1] = 0;
chopLine(line, words);

#ifdef FLAKY    /* This doesn't fix all problems, we'll just ignore start/stop_codons. */
/* Fix up start and stop codons. */
type = words[2];
strand = words[6];
if (sameString(type, "start_codon") && sameString(strand, "-"))
    {
    sprintf(fixStart, "%d", atoi(words[3])-3);
    sprintf(fixEnd, "%d", atoi(words[4])-3);
    words[3] = fixStart;
    words[4] = fixEnd;
    }
else if (sameString(type, "stop_codon"))
    {
    /* Start and end reversed on both strands. */
    int start = atoi(words[4]);
    int end = atoi(words[3]);
    if (sameString(strand, "-"))
        {
	start += 3;
	end += 3;
	}
    sprintf(fixStart, "%d", start);
    sprintf(fixEnd, "%d", end);
    words[3] = fixStart;
    words[4] = fixEnd;
    }
#endif /* FLAKY */

/* Skip start/stop codons.  Code will then assume all exons are CDS. */
type = words[2];
if (sameString(type, "start_codon") || sameString(type, "stop_codon"))
    return;

/* Write fixed output. */
for (i=0; i<8; ++i)
    fprintf(f, "%s\t", words[i]);
fprintf(f, "%s\n", group);
}