static char *boldTerm(char *target, char *term, int offset, enum dbDbMatchType type) /* Return a string with <b>term</b> swapped in for term at offset. * If offset is negative and type is ddmtSciName, treat term as an abbreviated species * name (term = "G. species" vs. target = "Genus species"): bold the first letter of the * genus and the matching portion of the species. */ { int termLen = strlen(term); int targetLen = strlen(target); if (offset + termLen > targetLen) errAbort("boldTerm: invalid offset (%d) for term '%s' (length %d) in target '%s' (length %d)", offset, term, termLen, target, targetLen); else if (offset < 0 && type != ddmtSciName) errAbort("boldTerm: negative offset (%d) given for type %d", offset, type); // Allocate enough to have two bolded chunks: int resultSize = targetLen + 2*strlen("<b></b>") + 1; char result[resultSize]; char *p = result; int size = sizeof(result); if (offset >= 0) { // The part of target before the term: safeAddN(&p, &size, target, offset); // The bolded term: safeAdd(&p, &size, "<b>"); safeAddN(&p, &size, target+offset, termLen); safeAdd(&p, &size, "</b>"); // The rest of the target after the term: safeAdd(&p, &size, target+offset+termLen); // Accounting tweak -- we allocate enough for two bolded chunks, but use only one here: size -= strlen("<b></b>"); } else { // Term is abbreviated scientific name -- bold the first letter of the genus: safeAdd(&p, &size, "<b>"); safeAddN(&p, &size, target, 1); safeAdd(&p, &size, "</b>"); // add the rest of the genus: char *targetSpecies = skipLeadingSpaces(skipToSpaces(target)); int targetOffset = targetSpecies - target; safeAddN(&p, &size, target+1, targetOffset-1); // bold the matching portion of the species: char *termSpecies = skipLeadingSpaces(skipToSpaces(term)); termLen = strlen(termSpecies); safeAdd(&p, &size, "<b>"); safeAddN(&p, &size, targetSpecies, termLen); safeAdd(&p, &size, "</b>"); // add the rest of the species: safeAdd(&p, &size, targetSpecies+termLen); } if (*p != '\0' || size != 1) errAbort("boldTerm: bad arithmetic (size is %d, *p is '%c')", size, *p); return cloneStringZ(result, resultSize); }
struct slName *stringToSlNames(char *string) /* Convert string to a list of slNames separated by * white space, but allowing multiple words in quotes. * Quotes if any are stripped. */ { struct slName *list = NULL, *name; char *dupe = cloneString(string); char c, *s = dupe, *e; for (;;) { if ((s = skipLeadingSpaces(s)) == NULL) break; if ((c = *s) == 0) break; if (c == '\'' || c == '"') { if (!parseQuotedString(s, s, &e)) errAbort("missing closing %c in %s", c, string); } else { e = skipToSpaces(s); if (e != NULL) *e++ = 0; } name = slNameNew(s); slAddHead(&list, name); s = e; } freeMem(dupe); slReverse(&list); return list; }
struct htmlStatus *htmlStatusParse(char **pText) /* Read in status from first line. Update pText to point to next line. * Note unlike many routines here, this does not insert zeros into text. */ { char *text = *pText; char *end = strchr(text, '\n'); struct htmlStatus *status; if (end != NULL) *pText = end+1; else *pText = text + strlen(text); end = skipToSpaces(text); if (end == NULL) { warn("Short status line."); return NULL; } AllocVar(status); status->version = cloneStringZ(text, end-text); end = skipLeadingSpaces(end); if (!isdigit(end[0])) { warn("Not a number in status field"); return NULL; } status->status = atoi(end); return status; }
void gsToUcsc(char *gsName, char *ucscName) /* Convert from * AC020585.5~1.2 Fragment 2 of 29 (AC020585.5:1..1195) * to * AC020585.5_1_2 */ { char *s, *e, *d; int size; /* Copy in accession and version. */ d = ucscName; s = gsName; e = strchr(s, '~'); if (e == NULL) errAbort("Expecting . in %s", gsName); size = e - s; memcpy(d, s, size); d += size; /* Skip over tilde and replace it with _ */ s = e+1; *d++ = '_'; e = skipToSpaces(s); if (e == NULL) e = s + strlen(s); size = e - s; memcpy(d, s, size); d[size] = 0; subChar(d, '.', '_'); return; }
void recNameToFileName(char *dir, char *recName, char *fileName, char *suffix) /* Convert UCSC style fragment name to name of file for a clone. */ { char *e; char *d = fileName; int size; /* Start file name with directory if any. */ if (dir != NULL) { size = strlen(dir); memcpy(d, dir, size); d += size; if (dir[size-1] != '/') *d++ = '/'; } if (*recName == '>') ++recName; recName = skipLeadingSpaces(recName); e = strchr(recName, '.'); if (e == NULL) e = skipToSpaces(recName); if (e == NULL) e = recName + strlen(recName); size = e - recName; memcpy(d, recName, size); d += size; strcpy(d, suffix); }
static void checkTerm(char *term, char *target, enum dbDbMatchType type, struct dbDb *dbDb, struct hash *matchHash, struct dbDbMatch **pMatchList) /* If target starts with term (case-insensitive), and target is not already in matchHash, * add target to matchHash and add a new match to pMatchList. */ { // Make uppercase version of target for case-insensitive matching. int targetLen = strlen(target); char targetUpcase[targetLen + 1]; safencpy(targetUpcase, sizeof(targetUpcase), target, targetLen); touppers(targetUpcase); int offset = wordMatchOffset(term, targetUpcase); if (offset >= 0) { addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList); } else if (offset < 0 && type == ddmtSciName && term[0] == targetUpcase[0]) { // For scientific names ("Genus species"), see if the user entered the term as 'G. species' // e.g. term 'P. trog' for target 'Pan troglodytes' regmatch_t substrArr[3]; if (regexMatchSubstrNoCase(term, "^[a-z](\\.| ) *([a-z]+)", substrArr, ArraySize(substrArr))) { char *termSpecies = term + substrArr[2].rm_so; char *targetSpecies = skipLeadingSpaces(skipToSpaces(targetUpcase)); if (targetSpecies && startsWithNoCase(termSpecies, targetSpecies)) { // Keep the negative offset since we can't just bold one chunk of target... addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList); } } } }
static void appendFirstWord(struct dyString *buf, char *str) /* append the first white-spaced word from str */ { char *end = skipToSpaces(str); if (end == NULL) end = str + strlen(str); dyStringAppendN(buf, str, (end - str)); }
char *lmCloneFirstWord(struct lm *lm, char *line) /* Clone first word in line */ { char *startFirstWord = skipLeadingSpaces(line); if (startFirstWord == NULL) return NULL; char *endFirstWord = skipToSpaces(startFirstWord); if (endFirstWord == NULL) return lmCloneString(lm, startFirstWord); else return lmCloneStringZ(lm, startFirstWord, endFirstWord - startFirstWord); }
char *skipWord(char *fw) /* skips over current word to start of next. * Error for this not to exist. */ { char *s; s = skipToSpaces(fw); if (s == NULL) errAbort("Expecting two words in .ra file line %s\n", fw); s = skipLeadingSpaces(s); if (s == NULL) errAbort("Expecting two words in .ra file line %s\n", fw); return s; }
boolean gbFaReadNext(struct gbFa *fa) /* read the next fasta record header. The sequence is not read until * gbFaGetSeq is called */ { boolean atBOLN = TRUE; /* always stops after a line */ char c, *next; unsigned iHdr = 0, hdrCap = fa->headerCap; off_t off = fa->off; fa->seq = NULL; /* find next header */ while (((c = getc_unlocked(fa->fh)) != EOF) && !((c == '>') && atBOLN)) { off++; atBOLN = (c == '\n'); } fa->recOff = off; /* offset of '>' */ fa->off = ++off; /* count '>' */ if (c == EOF) return FALSE; /* read header */ while ((c = getc_unlocked(fa->fh)) != EOF) { off++; if (iHdr == hdrCap) hdrCap = expandHeader(fa); fa->headerBuf[iHdr++] = c; if (c == '\n') break; /* got it */ } fa->off = off; if (c == EOF) errAbort("premature EOF in %s", fa->fileName); fa->headerBuf[iHdr-1] = '\0'; /* wack newline */ next = fa->headerBuf; fa->id = next; next = skipToSpaces(next); if (next != NULL) { *next++ = '\0'; fa->comment = trimSpaces(next); } else fa->comment = ""; /* empty string */ return TRUE; }
static void parseHeader(struct gff3File *g3f) /* parse and validate a GFF3 header */ { char *line; if (!lineFileNext(g3f->lf, &line, NULL)) gff3FileErr(g3f, "empty GFF file, must have header"); char *ver = skipToSpaces(line); if (*ver != '\0') { *ver++ = '\0'; ver = trimSpaces(ver); } if (!(sameString(line, "##gff-version") && sameString(ver, "3"))) gff3FileErr(g3f, "invalid GFF3 header"); }
char *lmCloneSomeWord(struct lm *lm, char *line, int wordIx) /* Return a clone of the given space-delimited word within line. Returns NULL if * not that many words in line. */ { if (wordIx < 0) return NULL; int i; for (i=0; i<wordIx; ++i) { line = skipLeadingSpaces(line); line = skipToSpaces(line); if (line == NULL) return NULL; } return lmCloneFirstWord(lm, line); }
struct slInt *tabRowGuessFixedOffsets(struct slName *lineList, char *fileName) /* Return our best guess list of starting positions for space-padded fixed * width fields. */ { struct slInt *offList = NULL, *off; if (lineList) { char *spaceRec = cloneString(lineList->name), *s; int lineSize = strlen(spaceRec); struct slName *line; int lineIx=1; /* First 'or' together all lines into spaceRec, which will * have a space wherever all columns of all lines are space and * non-space elsewhere. */ for (line = lineList->next; line != NULL; line = line->next, ++lineIx) { int i; s = line->name; if (strlen(s) != lineSize) errAbort("Line %d of %s has %lu chars, but first line has just %d", lineIx, fileName, (unsigned long)strlen(s), lineSize); for (i=0; i<lineSize; ++i) { if (s[i] != ' ') spaceRec[i] = 'X'; } } /* Now make up slInt list that describes where words begin */ s = spaceRec; for (;;) { s = skipLeadingSpaces(s); if (s == NULL || s[0] == 0) break; AllocVar(off); off->val = s - spaceRec; slAddHead(&offList, off); s = skipToSpaces(s); } slReverse(&offList); } return offList; }
boolean matchName(char *seqHeader) /* see if the sequence name matches */ { /* find end of name */ char *nameSep = skipToSpaces(seqHeader); char sepChr = '\0'; boolean isMatch = FALSE; if (nameSep != NULL) { sepChr = *nameSep; /* terminate name */ *nameSep = '\0'; } isMatch = wildMatch(namePat, seqHeader); if (nameSep != NULL) *nameSep = sepChr; return isMatch; }
struct hash *hashThisEqThatLine(char *line, int lineIx, boolean firstStartsWithLetter) /* Return a symbol table from a line of form: * 1-this1=val1 2-this='quoted val2' var3="another val" * If firstStartsWithLetter is true, then the left side of the equals must start with * a letter. */ { char *dupe = cloneString(line); char *s = dupe, c; char *var, *val; struct hash *hash = newHash(8); for (;;) { if ((var = skipLeadingSpaces(s)) == NULL) break; if ((c = *var) == 0) break; if (firstStartsWithLetter && !isalpha(c)) errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var); val = strchr(var, '='); if (val == NULL) { errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line); } *val++ = 0; c = *val; if (c == '\'' || c == '"') { if (!parseQuotedString(val, val, &s)) errAbort("line %d of input: missing closing %c", lineIx, c); } else { s = skipToSpaces(val); if (s != NULL) *s++ = 0; } hashAdd(hash, var, cloneString(val)); } freez(&dupe); return hash; }
struct hash *hashVarLine(char *line, int lineIx) /* Return a symbol table from a line of form: * var1=val1 var2='quoted val2' var3="another val" */ { char *dupe = cloneString(line); char *s = dupe, c; char *var, *val; struct hash *hash = newHash(8); for (;;) { if ((var = skipLeadingSpaces(s)) == NULL) break; if ((c = *var) == 0) break; if (!isalpha(c)) errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var); val = strchr(var, '='); if (val == NULL) { errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line); } *val++ = 0; c = *val; if (c == '\'' || c == '"') { if (!parseQuotedString(val, val, &s)) errAbort("line %d of input: missing closing %c", lineIx, c); } else { s = skipToSpaces(val); if (s != NULL) *s++ = 0; } hashAdd(hash, var, cloneString(val)); } freez(&dupe); return hash; }
static void addXrefIdsToHash(struct sqlConnection *conn, struct hash *hash, char *idField, char *xrefTable, char *xrefIdField, char *aliasField, struct lm *lm, char *extraWhere) /* Query all id-alias pairs from xrefTable (where id actually appears * in curTable) and hash alias -> id. Convert alias to upper case for * case-insensitive matching. * Ignore self (alias = id) mappings -- we already got those above. */ { struct sqlResult *sr; char **row; struct dyString *query = dyStringNew(0); if (sameString(xrefTable, curTable)) sqlDyStringPrintf(query, "select %s,%s from %s", aliasField, xrefIdField, xrefTable); else /* Get only the aliases for items actually in curTable.idField: */ sqlDyStringPrintf(query, "select %s.%s,%s.%s from %s,%s where %s.%s = %s.%s", xrefTable, aliasField, xrefTable, xrefIdField, xrefTable, curTable, xrefTable, xrefIdField, curTable, idField); if (extraWhere != NULL) // extraWhere begins w/ID field of curTable=xrefTable. Skip that field name and // use "xrefTable.aliasField" with the IN (...) condition that follows: sqlDyStringPrintf(query, " %s %s.%s %-s", (sameString(xrefTable, curTable) ? "where" : "and"), xrefTable, aliasField, skipToSpaces(extraWhere)); sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { if (sameString(row[0], row[1])) continue; touppers(row[0]); hashAdd(hash, row[0], lmCloneString(lm, row[1])); } sqlFreeResult(&sr); }
static bioSeq *nextSeqFromMem(char **pText, boolean isDna, boolean doFilter) /* Convert fa in memory to bioSeq. Update *pText to point to next * record. Returns NULL when no more sequences left. */ { char *name = ""; char *s, *d; struct dnaSeq *seq; int size = 0; char c; char *filter = (isDna ? ntChars : aaChars); char *text = *pText; char *p = skipLeadingSpaces(text); if (p == NULL) return NULL; dnaUtilOpen(); if (*p == '>') { char *end; s = strchr(p, '\n'); if (s != NULL) ++s; name = skipLeadingSpaces(p+1); end = skipToSpaces(name); if (end >= s || name >= s) errAbort("No name in line starting with '>'"); if (end != NULL) *end = 0; } else { s = p; if (s == NULL || s[0] == 0) return NULL; } name = cloneString(name); d = text; if (s != NULL) { for (;;) { c = *s; if (c == 0 || c == '>') break; ++s; if (!isalpha(c)) continue; if (doFilter) { if ((c = filter[(int)c]) == 0) { if (isDna) c = 'n'; else c = 'X'; } } d[size++] = c; } } d[size] = 0; /* Put sequence into our little sequence structure. */ AllocVar(seq); seq->name = name; seq->dna = text; seq->size = size; *pText = s; return seq; }
void fixLine(struct lineFile *lf, char *line, FILE *f) /* Fix up a single line. */ { char *group; /* Last. */ char *words[8]; /* First words. */ int i; char fixStart[16], fixEnd[16]; char *type, *strand; /* Pass through comments. */ if (line[0] == '#') { fprintf(f, "%s\n", line); return; } /* Find the start of the "group" field. */ group = line; for (i=0; i<8; ++i) { group = skipToSpaces(group); if (group == NULL) errAbort("Expecting at least 9 fields line %d of %s\n", lf->lineIx, lf->fileName); group = skipLeadingSpaces(group); } /* Truncate initial string before group field and chop it up. */ group[-1] = 0; chopLine(line, words); #ifdef FLAKY /* This doesn't fix all problems, we'll just ignore start/stop_codons. */ /* Fix up start and stop codons. */ type = words[2]; strand = words[6]; if (sameString(type, "start_codon") && sameString(strand, "-")) { sprintf(fixStart, "%d", atoi(words[3])-3); sprintf(fixEnd, "%d", atoi(words[4])-3); words[3] = fixStart; words[4] = fixEnd; } else if (sameString(type, "stop_codon")) { /* Start and end reversed on both strands. */ int start = atoi(words[4]); int end = atoi(words[3]); if (sameString(strand, "-")) { start += 3; end += 3; } sprintf(fixStart, "%d", start); sprintf(fixEnd, "%d", end); words[3] = fixStart; words[4] = fixEnd; } #endif /* FLAKY */ /* Skip start/stop codons. Code will then assume all exons are CDS. */ type = words[2]; if (sameString(type, "start_codon") || sameString(type, "stop_codon")) return; /* Write fixed output. */ for (i=0; i<8; ++i) fprintf(f, "%s\t", words[i]); fprintf(f, "%s\n", group); }