static int eqn_do_define(struct eqn_node *ep) { const char *start; size_t sz; struct eqn_def *def; int i; if (NULL == (start = eqn_nextrawtok(ep, &sz))) { EQN_MSG(MANDOCERR_EQNEOF, ep); return(0); } /* * Search for a key that already exists. * Create a new key if none is found. */ if (NULL == (def = eqn_def_find(ep, start, sz))) { /* Find holes in string array. */ for (i = 0; i < (int)ep->defsz; i++) if (0 == ep->defs[i].keysz) break; if (i == (int)ep->defsz) { ep->defsz++; ep->defs = mandoc_realloc (ep->defs, ep->defsz * sizeof(struct eqn_def)); ep->defs[i].key = ep->defs[i].val = NULL; } ep->defs[i].keysz = sz; ep->defs[i].key = mandoc_realloc (ep->defs[i].key, sz + 1); memcpy(ep->defs[i].key, start, sz); ep->defs[i].key[(int)sz] = '\0'; def = &ep->defs[i]; } start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); if (NULL == start) { EQN_MSG(MANDOCERR_EQNEOF, ep); return(0); } def->valsz = sz; def->val = mandoc_realloc(def->val, sz + 1); memcpy(def->val, start, sz); def->val[(int)sz] = '\0'; return(1); }
static int argv_multi(struct mdoc *mdoc, int line, struct mdoc_argv *v, int *pos, char *buf) { enum margserr ac; char *p; for (v->sz = 0; ; v->sz++) { if ('-' == buf[*pos]) break; ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); if (ARGS_ERROR == ac) return(0); else if (ARGS_EOLN == ac) break; if (0 == v->sz % MULTI_STEP) v->value = mandoc_realloc(v->value, (v->sz + MULTI_STEP) * sizeof(char *)); v->value[(int)v->sz] = mandoc_strdup(p); } return(1); }
static const char * md_stack(char c) { static char *stack; static size_t sz; static size_t cur; switch (c) { case '\0': break; case (char)-1: assert(cur); stack[--cur] = '\0'; break; default: if (cur + 1 >= sz) { sz += 8; stack = mandoc_realloc(stack, sz); } stack[cur] = c; stack[++cur] = '\0'; break; } return stack == NULL ? "" : stack; }
static void resize_buf(struct buf *buf, size_t initial) { buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; buf->buf = mandoc_realloc(buf->buf, buf->sz); }
static void adjbuf(struct termp *p, int sz) { if (0 == p->maxcols) p->maxcols = 1024; while (sz >= p->maxcols) p->maxcols <<= 2; p->buf = mandoc_realloc (p->buf, sizeof(int) * (size_t)p->maxcols); }
static void ps_growbuf(struct termp *p, size_t sz) { if (p->ps->psmargcur + sz <= p->ps->psmargsz) return; if (sz < PS_BUFSLOP) sz = PS_BUFSLOP; p->ps->psmargsz += sz; p->ps->psmarg = mandoc_realloc(p->ps->psmarg, p->ps->psmargsz); }
static void sql_append(char **sql, size_t *sz, const char *newstr, int count) { size_t newsz; newsz = 1 < count ? (size_t)count : strlen(newstr); *sql = mandoc_realloc(*sql, *sz + newsz + 1); if (1 < count) memset(*sql + *sz, *newstr, (size_t)count); else memcpy(*sql + *sz, newstr, newsz); *sz += newsz; (*sql)[*sz] = '\0'; }
/* * Grow the buffer (if necessary) and copy in a binary string. */ static void buf_appendb(struct buf *buf, const void *cp, size_t sz) { /* Overshoot by MANDOC_BUFSZ. */ while (buf->len + sz >= buf->size) { buf->size = buf->len + sz + MANDOC_BUFSZ; buf->cp = mandoc_realloc(buf->cp, buf->size); } memcpy(buf->cp + (int)buf->len, cp, sz); buf->len += sz; }
static void font_push(char newfont) { if (fontqueue.head + fontqueue.size <= ++fontqueue.tail) { fontqueue.size += 8; fontqueue.head = mandoc_realloc(fontqueue.head, fontqueue.size); } *fontqueue.tail = newfont; print_word(""); printf("\\f"); putchar(newfont); outflags &= ~MMAN_spc; }
/* * Append a binary value to a database entry. This can be invoked * multiple times; the buffer is automatically resized. */ static void dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz) { assert(key->data); /* Overshoot by MANDOC_BUFSZ. */ while (key->size + sz >= *ksz) { *ksz = key->size + sz + MANDOC_BUFSZ; key->data = mandoc_realloc(key->data, *ksz); } memcpy(key->data + (int)key->size, cp, sz); key->size += sz; }
/* * Scan for indexable paths. */ static void pathgen(struct req *req) { FILE *fp; char *dp; size_t dpsz; ssize_t len; if (NULL == (fp = fopen("manpath.conf", "r"))) { fprintf(stderr, "%s/manpath.conf: %s\n", MAN_DIR, strerror(errno)); pg_error_internal(); exit(EXIT_FAILURE); } dp = NULL; dpsz = 0; while ((len = getline(&dp, &dpsz, fp)) != -1) { if (dp[len - 1] == '\n') dp[--len] = '\0'; req->p = mandoc_realloc(req->p, (req->psz + 1) * sizeof(char *)); if ( ! validate_urifrag(dp)) { fprintf(stderr, "%s/manpath.conf contains " "unsafe path \"%s\"\n", MAN_DIR, dp); pg_error_internal(); exit(EXIT_FAILURE); } if (NULL != strchr(dp, '/')) { fprintf(stderr, "%s/manpath.conf contains " "path with slash \"%s\"\n", MAN_DIR, dp); pg_error_internal(); exit(EXIT_FAILURE); } req->p[req->psz++] = dp; dp = NULL; dpsz = 0; } free(dp); if ( req->p == NULL ) { fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR); pg_error_internal(); exit(EXIT_FAILURE); } }
enum rofferr eqn_read(struct eqn_node **epp, int ln, const char *p, int pos, int *offs) { size_t sz; struct eqn_node *ep; enum rofferr er; ep = *epp; /* * If we're the terminating mark, unset our equation status and * validate the full equation. */ if (0 == strncmp(p, ".EN", 3)) { er = eqn_end(epp); p += 3; while (' ' == *p || '\t' == *p) p++; if ('\0' == *p) return(er); mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse, ln, pos, "EN %s", p); return(er); } /* * Build up the full string, replacing all newlines with regular * whitespace. */ sz = strlen(p + pos) + 1; ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1); /* First invocation: nil terminate the string. */ if (0 == ep->sz) *ep->data = '\0'; ep->sz += sz; strlcat(ep->data, p + pos, ep->sz + 1); strlcat(ep->data, " ", ep->sz + 1); return(ROFF_IGN); }
/* * Add a directory to the array, ignoring bad directories. * Grow the array one-by-one for simplicity's sake. */ static void manpath_add(struct manpaths *dirs, const char *dir) { char buf[PATH_MAX]; char *cp; int i; if (NULL == (cp = realpath(dir, buf))) return; for (i = 0; i < dirs->sz; i++) if (0 == strcmp(dirs->paths[i], dir)) return; dirs->paths = mandoc_realloc (dirs->paths, ((size_t)dirs->sz + 1) * sizeof(char *)); dirs->paths[dirs->sz++] = mandoc_strdup(cp); }
int tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos) { struct tbl_dat *dat; size_t sz; dat = tbl->last_span->last; if (p[pos] == 'T' && p[pos + 1] == '}') { pos += 2; if (p[pos] == tbl->opts.tab) { tbl->part = TBL_PART_DATA; pos++; while (p[pos] != '\0') getdata(tbl, tbl->last_span, ln, p, &pos); return 1; } else if (p[pos] == '\0') { tbl->part = TBL_PART_DATA; return 1; } /* Fallthrough: T} is part of a word. */ } dat->pos = TBL_DATA_DATA; dat->block = 1; if (dat->string != NULL) { sz = strlen(p + pos) + strlen(dat->string) + 2; dat->string = mandoc_realloc(dat->string, sz); (void)strlcat(dat->string, " ", sz); (void)strlcat(dat->string, p + pos, sz); } else dat->string = mandoc_strdup(p + pos); if (dat->layout->pos == TBL_CELL_DOWN) mandoc_msg(MANDOCERR_TBLDATA_SPAN, tbl->parse, ln, pos, dat->string); return 0; }
/* * Scan for indexable paths. */ static void pathgen(struct req *req) { FILE *fp; char *dp; size_t dpsz; if (NULL == (fp = fopen("manpath.conf", "r"))) { fprintf(stderr, "%s/manpath.conf: %s\n", MAN_DIR, strerror(errno)); pg_error_internal(); exit(EXIT_FAILURE); } while (NULL != (dp = fgetln(fp, &dpsz))) { if ('\n' == dp[dpsz - 1]) dpsz--; req->p = mandoc_realloc(req->p, (req->psz + 1) * sizeof(char *)); dp = mandoc_strndup(dp, dpsz); if ( ! validate_urifrag(dp)) { fprintf(stderr, "%s/manpath.conf contains " "unsafe path \"%s\"\n", MAN_DIR, dp); pg_error_internal(); exit(EXIT_FAILURE); } if (NULL != strchr(dp, '/')) { fprintf(stderr, "%s/manpath.conf contains " "path with slash \"%s\"\n", MAN_DIR, dp); pg_error_internal(); exit(EXIT_FAILURE); } req->p[req->psz++] = dp; } if ( req->p == NULL ) { fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR); pg_error_internal(); exit(EXIT_FAILURE); } }
/* ARGSUSED */ int tbl_cdata(struct tbl_node *tbl, int ln, const char *p) { struct tbl_dat *dat; size_t sz; int pos; pos = 0; dat = tbl->last_span->last; if (p[pos] == 'T' && p[pos + 1] == '}') { pos += 2; if (p[pos] == tbl->opts.tab) { tbl->part = TBL_PART_DATA; pos++; return(data(tbl, tbl->last_span, ln, p, &pos)); } else if ('\0' == p[pos]) { tbl->part = TBL_PART_DATA; return(1); } /* Fallthrough: T} is part of a word. */ } dat->pos = TBL_DATA_DATA; if (dat->string) { sz = strlen(p) + strlen(dat->string) + 2; dat->string = mandoc_realloc(dat->string, sz); strlcat(dat->string, " ", sz); strlcat(dat->string, p, sz); } else dat->string = mandoc_strdup(p); if (TBL_CELL_DOWN == dat->layout->pos) mandoc_msg(MANDOCERR_TBLIGNDATA, tbl->parse, ln, pos, NULL); return(0); }
/* * Scan for indexable paths. * This adds all paths with "etc/catman.conf" to the buffer. */ static void pathgen(DIR *dir, char *path, struct req *req) { struct dirent *d; char *cp; DIR *cd; int rc; size_t sz, ssz; sz = strlcat(path, "/", PATH_MAX); if (sz >= PATH_MAX) { fprintf(stderr, "%s: Path too long", path); return; } /* * First, scan for the "etc" directory. * If it's found, then see if it should cause us to stop. This * happens when a catman.conf is found in the directory. */ rc = 0; while (0 == rc && NULL != (d = readdir(dir))) { if (DT_DIR != d->d_type || strcmp(d->d_name, "etc")) continue; path[(int)sz] = '\0'; ssz = strlcat(path, d->d_name, PATH_MAX); if (ssz >= PATH_MAX) { fprintf(stderr, "%s: Path too long", path); return; } else if (NULL == (cd = opendir(path))) { perror(path); return; } rc = pathstop(cd); closedir(cd); } if (rc > 0) { /* This also strips the trailing slash. */ path[(int)--sz] = '\0'; req->p = mandoc_realloc (req->p, (req->psz + 1) * sizeof(struct paths)); /* * Strip out the leading "./" unless we're just a ".", * in which case use an empty string as our name. */ req->p[(int)req->psz].path = mandoc_strdup(path); req->p[(int)req->psz].name = cp = mandoc_strdup(path + (1 == sz ? 1 : 2)); req->psz++; /* * The name is just the path with all the slashes taken * out of it. Simple but effective. */ for ( ; '\0' != *cp; cp++) if ('/' == *cp) *cp = ' '; return; } /* * If no etc/catman.conf was found, recursively enter child * directory and continue scanning. */ rewinddir(dir); while (NULL != (d = readdir(dir))) { if (DT_DIR != d->d_type || '.' == d->d_name[0]) continue; path[(int)sz] = '\0'; ssz = strlcat(path, d->d_name, PATH_MAX); if (ssz >= PATH_MAX) { fprintf(stderr, "%s: Path too long", path); return; } else if (NULL == (cd = opendir(path))) { perror(path); return; } pathgen(cd, path, req); closedir(cd); } }
/* * Scan through all entries in the index file `idx' and prune those * entries in `ofile'. * Pruning consists of removing from `db', then invalidating the entry * in `idx' (zeroing its value size). */ static void index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs, const char *basedir) { const struct of *of; const char *fn; uint64_t vbuf[2]; unsigned seq, sseq; DBT key, val; int ch; recs->cur = 0; seq = R_FIRST; while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { seq = R_NEXT; assert(sizeof(recno_t) == key.size); memcpy(&recs->last, key.data, key.size); /* Deleted records are zero-sized. Skip them. */ if (0 == val.size) goto cont; /* * Make sure we're sane. * Read past our mdoc/man/cat type to the next string, * then make sure it's bounded by a NUL. * Failing any of these, we go into our error handler. */ fn = (char *)val.data + 1; if (NULL == memchr(fn, '\0', val.size - 1)) break; /* * Search for the file in those we care about. * XXX: build this into a tree. Too slow. */ for (of = ofile->first; of; of = of->next) if (0 == strcmp(fn, of->fname)) break; if (NULL == of) continue; /* * Search through the keyword database, throwing out all * references to our file. */ sseq = R_FIRST; while (0 == (ch = (*mdb->db->seq)(mdb->db, &key, &val, sseq))) { sseq = R_NEXT; if (sizeof(vbuf) != val.size) break; memcpy(vbuf, val.data, val.size); if (recs->last != betoh64(vbuf[1])) continue; if ((ch = (*mdb->db->del)(mdb->db, &key, R_CURSOR)) < 0) break; } if (ch < 0) { perror(mdb->dbn); exit((int)MANDOCLEVEL_SYSERR); } else if (1 != ch) { fprintf(stderr, "%s: corrupt database\n", mdb->dbn); exit((int)MANDOCLEVEL_SYSERR); } if (verb) printf("%s: Deleting from index: %s\n", basedir, fn); val.size = 0; ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); if (ch < 0) break; cont: if (recs->cur >= recs->size) { recs->size += MANDOC_SLOP; recs->stack = mandoc_realloc(recs->stack, recs->size * sizeof(recno_t)); } recs->stack[(int)recs->cur] = recs->last; recs->cur++; } if (ch < 0) { perror(mdb->idxn); exit((int)MANDOCLEVEL_SYSERR); } else if (1 != ch) { fprintf(stderr, "%s: corrupt index\n", mdb->idxn); exit((int)MANDOCLEVEL_SYSERR); } recs->last++; }
static void roff_setstrn(struct roffkv **r, const char *name, size_t namesz, const char *string, size_t stringsz, int multiline) { struct roffkv *n; char *c; int i; size_t oldch, newch; /* Search for an existing string with the same name. */ n = *r; while (n && strcmp(name, n->key.p)) n = n->next; if (NULL == n) { /* Create a new string table entry. */ n = mandoc_malloc(sizeof(struct roffkv)); n->key.p = mandoc_strndup(name, namesz); n->key.sz = namesz; n->val.p = NULL; n->val.sz = 0; n->next = *r; *r = n; } else if (0 == multiline) { /* In multiline mode, append; else replace. */ free(n->val.p); n->val.p = NULL; n->val.sz = 0; } if (NULL == string) return; /* * One additional byte for the '\n' in multiline mode, * and one for the terminating '\0'. */ newch = stringsz + (multiline ? 2u : 1u); if (NULL == n->val.p) { n->val.p = mandoc_malloc(newch); *n->val.p = '\0'; oldch = 0; } else { oldch = n->val.sz; n->val.p = mandoc_realloc(n->val.p, oldch + newch); } /* Skip existing content in the destination buffer. */ c = n->val.p + (int)oldch; /* Append new content to the destination buffer. */ i = 0; while (i < (int)stringsz) { /* * Rudimentary roff copy mode: * Handle escaped backslashes. */ if ('\\' == string[i] && '\\' == string[i + 1]) i++; *c++ = string[i++]; } /* Append terminating bytes. */ if (multiline) *c++ = '\n'; *c = '\0'; n->val.sz = (int)(c - n->val.p); }
static int pman_node(MAN_ARGS) { const struct man_node *head, *body; char *start, *sv, *title; size_t sz, titlesz; if (NULL == n) return(0); /* * We're only searching for one thing: the first text child in * the BODY of a NAME section. Since we don't keep track of * sections in -man, run some hoops to find out whether we're in * the correct section or not. */ if (MAN_BODY == n->type && MAN_SH == n->tok) { body = n; assert(body->parent); if (NULL != (head = body->parent->head) && 1 == head->nchild && NULL != (head = (head->child)) && MAN_TEXT == head->type && 0 == strcmp(head->string, "NAME") && NULL != (body = body->child) && MAN_TEXT == body->type) { title = NULL; titlesz = 0; /* * Suck the entire NAME section into memory. * Yes, we might run away. * But too many manuals have big, spread-out * NAME sections over many lines. */ for ( ; NULL != body; body = body->next) { if (MAN_TEXT != body->type) break; if (0 == (sz = strlen(body->string))) continue; title = mandoc_realloc (title, titlesz + sz + 1); memcpy(title + titlesz, body->string, sz); titlesz += sz + 1; title[(int)titlesz - 1] = ' '; } if (NULL == title) return(0); title = mandoc_realloc(title, titlesz + 1); title[(int)titlesz] = '\0'; /* Skip leading space. */ sv = title; while (isspace((unsigned char)*sv)) sv++; if (0 == (sz = strlen(sv))) { free(title); return(0); } /* Erase trailing space. */ start = &sv[sz - 1]; while (start > sv && isspace((unsigned char)*start)) *start-- = '\0'; if (start == sv) { free(title); return(0); } start = sv; /* * Go through a special heuristic dance here. * This is why -man manuals are great! * (I'm being sarcastic: my eyes are bleeding.) * Conventionally, one or more manual names are * comma-specified prior to a whitespace, then a * dash, then a description. Try to puzzle out * the name parts here. */ for ( ;; ) { sz = strcspn(start, " ,"); if ('\0' == start[(int)sz]) break; buf->len = 0; buf_appendb(buf, start, sz); buf_appendb(buf, "", 1); hash_put(hash, buf, TYPE_Nm); if (' ' == start[(int)sz]) { start += (int)sz + 1; break; } assert(',' == start[(int)sz]); start += (int)sz + 1; while (' ' == *start) start++; } buf->len = 0; if (sv == start) { buf_append(buf, start); free(title); return(1); } while (isspace((unsigned char)*start)) start++; if (0 == strncmp(start, "-", 1)) start += 1; else if (0 == strncmp(start, "\\-\\-", 4)) start += 4; else if (0 == strncmp(start, "\\-", 2)) start += 2; else if (0 == strncmp(start, "\\(en", 4)) start += 4; else if (0 == strncmp(start, "\\(em", 4)) start += 4; while (' ' == *start) start++; sz = strlen(start) + 1; buf_appendb(dbuf, start, sz); buf_appendb(buf, start, sz); hash_put(hash, buf, TYPE_Nd); free(title); } } for (n = n->child; n; n = n->next) if (pman_node(hash, buf, dbuf, n)) return(1); return(0); }
/* * Parse a formatted manual page. * By necessity, this involves rather crude guesswork. */ static void pformatted(DB *hash, struct buf *buf, struct buf *dbuf, const struct of *of, const char *basedir) { FILE *stream; char *line, *p, *title; size_t len, plen, titlesz; if (NULL == (stream = fopen(of->fname, "r"))) { WARNING(of->fname, basedir, "%s", strerror(errno)); return; } /* * Always use the title derived from the filename up front, * do not even try to find it in the file. This also makes * sure we don't end up with an orphan index record, even if * the file content turns out to be completely unintelligible. */ buf->len = 0; buf_append(buf, of->title); hash_put(hash, buf, TYPE_Nm); /* Skip to first blank line. */ while (NULL != (line = fgetln(stream, &len))) if ('\n' == *line) break; /* * Assume the first line that is not indented * is the first section header. Skip to it. */ while (NULL != (line = fgetln(stream, &len))) if ('\n' != *line && ' ' != *line) break; /* * Read up until the next section into a buffer. * Strip the leading and trailing newline from each read line, * appending a trailing space. * Ignore empty (whitespace-only) lines. */ titlesz = 0; title = NULL; while (NULL != (line = fgetln(stream, &len))) { if (' ' != *line || '\n' != line[(int)len - 1]) break; while (len > 0 && isspace((unsigned char)*line)) { line++; len--; } if (1 == len) continue; title = mandoc_realloc(title, titlesz + len); memcpy(title + titlesz, line, len); titlesz += len; title[(int)titlesz - 1] = ' '; } /* * If no page content can be found, or the input line * is already the next section header, or there is no * trailing newline, reuse the page title as the page * description. */ if (NULL == title || '\0' == *title) { WARNING(of->fname, basedir, "Cannot find NAME section"); buf_appendb(dbuf, buf->cp, buf->size); hash_put(hash, buf, TYPE_Nd); fclose(stream); free(title); return; } title = mandoc_realloc(title, titlesz + 1); title[(int)titlesz] = '\0'; /* * Skip to the first dash. * Use the remaining line as the description (no more than 70 * bytes). */ if (NULL != (p = strstr(title, "- "))) { for (p += 2; ' ' == *p || '\b' == *p; p++) /* Skip to next word. */ ; } else { WARNING(of->fname, basedir, "No dash in title line"); p = title; } plen = strlen(p); /* Strip backspace-encoding from line. */ while (NULL != (line = memchr(p, '\b', plen))) { len = line - p; if (0 == len) { memmove(line, line + 1, plen--); continue; } memmove(line - 1, line + 1, plen - len); plen -= 2; } buf_appendb(dbuf, p, plen + 1); buf->len = 0; buf_appendb(buf, p, plen + 1); hash_put(hash, buf, TYPE_Nd); fclose(stream); free(title); }
/* * Duplicate an input string, making the appropriate character * conversations (as stipulated by `tr') along the way. * Returns a heap-allocated string with all the replacements made. */ char * roff_strdup(const struct roff *r, const char *p) { const struct roffkv *cp; char *res; const char *pp; size_t ssz, sz; enum mandoc_esc esc; if (NULL == r->xmbtab && NULL == r->xtab) return(mandoc_strdup(p)); else if ('\0' == *p) return(mandoc_strdup("")); /* * Step through each character looking for term matches * (remember that a `tr' can be invoked with an escape, which is * a glyph but the escape is multi-character). * We only do this if the character hash has been initialised * and the string is >0 length. */ res = NULL; ssz = 0; while ('\0' != *p) { if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { sz = r->xtab[(int)*p].sz; res = mandoc_realloc(res, ssz + sz + 1); memcpy(res + ssz, r->xtab[(int)*p].p, sz); ssz += sz; p++; continue; } else if ('\\' != *p) { res = mandoc_realloc(res, ssz + 2); res[ssz++] = *p++; continue; } /* Search for term matches. */ for (cp = r->xmbtab; cp; cp = cp->next) if (0 == strncmp(p, cp->key.p, cp->key.sz)) break; if (NULL != cp) { /* * A match has been found. * Append the match to the array and move * forward by its keysize. */ res = mandoc_realloc (res, ssz + cp->val.sz + 1); memcpy(res + ssz, cp->val.p, cp->val.sz); ssz += cp->val.sz; p += (int)cp->key.sz; continue; } /* * Handle escapes carefully: we need to copy * over just the escape itself, or else we might * do replacements within the escape itself. * Make sure to pass along the bogus string. */ pp = p++; esc = mandoc_escape(&p, NULL, NULL); if (ESCAPE_ERROR == esc) { sz = strlen(pp); res = mandoc_realloc(res, ssz + sz + 1); memcpy(res + ssz, pp, sz); break; } /* * We bail out on bad escapes. * No need to warn: we already did so when * roff_res() was called. */ sz = (int)(p - pp); res = mandoc_realloc(res, ssz + sz + 1); memcpy(res + ssz, pp, sz); ssz += sz; } res[(int)ssz] = '\0'; return(res); }
/* * Main parse routine for a buffer. * It assumes encoding and line numbering are already set up. * It can recurse directly (for invocations of user-defined * macros, inline equations, and input line traps) * and indirectly (for .so file inclusion). */ static void mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) { const struct tbl_span *span; struct buf ln; const char *save_file; char *cp; size_t pos; /* byte number in the ln buffer */ enum rofferr rr; int of; int lnn; /* line number in the real file */ int fd; unsigned char c; memset(&ln, 0, sizeof(ln)); lnn = curp->line; pos = 0; while (i < blk.sz) { if (0 == pos && '\0' == blk.buf[i]) break; if (start) { curp->line = lnn; curp->reparse_count = 0; if (lnn < 3 && curp->filenc & MPARSE_UTF8 && curp->filenc & MPARSE_LATIN1) curp->filenc = preconv_cue(&blk, i); } while (i < blk.sz && (start || blk.buf[i] != '\0')) { /* * When finding an unescaped newline character, * leave the character loop to process the line. * Skip a preceding carriage return, if any. */ if ('\r' == blk.buf[i] && i + 1 < blk.sz && '\n' == blk.buf[i + 1]) ++i; if ('\n' == blk.buf[i]) { ++i; ++lnn; break; } /* * Make sure we have space for the worst * case of 11 bytes: "\\[u10ffff]\0" */ if (pos + 11 > ln.sz) resize_buf(&ln, 256); /* * Encode 8-bit input. */ c = blk.buf[i]; if (c & 0x80) { if ( ! (curp->filenc && preconv_encode( &blk, &i, &ln, &pos, &curp->filenc))) { mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, curp->line, pos, "0x%x", c); ln.buf[pos++] = '?'; i++; } continue; } /* * Exclude control characters. */ if (c == 0x7f || (c < 0x20 && c != 0x09)) { mandoc_vmsg(c == 0x00 || c == 0x04 || c > 0x0a ? MANDOCERR_CHAR_BAD : MANDOCERR_CHAR_UNSUPP, curp, curp->line, pos, "0x%x", c); i++; if (c != '\r') ln.buf[pos++] = '?'; continue; } /* Trailing backslash = a plain char. */ if (blk.buf[i] != '\\' || i + 1 == blk.sz) { ln.buf[pos++] = blk.buf[i++]; continue; } /* * Found escape and at least one other character. * When it's a newline character, skip it. * When there is a carriage return in between, * skip that one as well. */ if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && '\n' == blk.buf[i + 2]) ++i; if ('\n' == blk.buf[i + 1]) { i += 2; ++lnn; continue; } if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { i += 2; /* Comment, skip to end of line */ for (; i < blk.sz; ++i) { if ('\n' == blk.buf[i]) { ++i; ++lnn; break; } } /* Backout trailing whitespaces */ for (; pos > 0; --pos) { if (ln.buf[pos - 1] != ' ') break; if (pos > 2 && ln.buf[pos - 2] == '\\') break; } break; } /* Catch escaped bogus characters. */ c = (unsigned char) blk.buf[i+1]; if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) { mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, curp->line, pos, "0x%x", c); i += 2; ln.buf[pos++] = '?'; continue; } /* Some other escape sequence, copy & cont. */ ln.buf[pos++] = blk.buf[i++]; ln.buf[pos++] = blk.buf[i++]; } if (pos >= ln.sz) resize_buf(&ln, 256); ln.buf[pos] = '\0'; /* * A significant amount of complexity is contained by * the roff preprocessor. It's line-oriented but can be * expressed on one line, so we need at times to * readjust our starting point and re-run it. The roff * preprocessor can also readjust the buffers with new * data, so we pass them in wholesale. */ of = 0; /* * Maintain a lookaside buffer of all parsed lines. We * only do this if mparse_keep() has been invoked (the * buffer may be accessed with mparse_getkeep()). */ if (curp->secondary) { curp->secondary->buf = mandoc_realloc( curp->secondary->buf, curp->secondary->sz + pos + 2); memcpy(curp->secondary->buf + curp->secondary->sz, ln.buf, pos); curp->secondary->sz += pos; curp->secondary->buf [curp->secondary->sz] = '\n'; curp->secondary->sz++; curp->secondary->buf [curp->secondary->sz] = '\0'; } rerun: rr = roff_parseln(curp->roff, curp->line, &ln, &of); switch (rr) { case ROFF_REPARSE: if (REPARSE_LIMIT >= ++curp->reparse_count) mparse_buf_r(curp, ln, of, 0); else mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, pos, NULL); pos = 0; continue; case ROFF_APPEND: pos = strlen(ln.buf); continue; case ROFF_RERUN: goto rerun; case ROFF_IGN: pos = 0; continue; case ROFF_SO: if ( ! (curp->options & MPARSE_SO) && (i >= blk.sz || blk.buf[i] == '\0')) { curp->sodest = mandoc_strdup(ln.buf + of); free(ln.buf); return; } /* * We remove `so' clauses from our lookaside * buffer because we're going to descend into * the file recursively. */ if (curp->secondary) curp->secondary->sz -= pos + 1; save_file = curp->file; if ((fd = mparse_open(curp, ln.buf + of)) != -1) { mparse_readfd(curp, fd, ln.buf + of); close(fd); curp->file = save_file; } else { curp->file = save_file; mandoc_vmsg(MANDOCERR_SO_FAIL, curp, curp->line, pos, ".so %s", ln.buf + of); ln.sz = mandoc_asprintf(&cp, ".sp\nSee the file %s.\n.sp", ln.buf + of); free(ln.buf); ln.buf = cp; of = 0; mparse_buf_r(curp, ln, of, 0); } pos = 0; continue; default: break; } /* * If input parsers have not been allocated, do so now. * We keep these instanced between parsers, but set them * locally per parse routine since we can use different * parsers with each one. */ if (curp->man == NULL || curp->man->macroset == MACROSET_NONE) choose_parser(curp); /* * Lastly, push down into the parsers themselves. * If libroff returns ROFF_TBL, then add it to the * currently open parse. Since we only get here if * there does exist data (see tbl_data.c), we're * guaranteed that something's been allocated. * Do the same for ROFF_EQN. */ if (rr == ROFF_TBL) while ((span = roff_span(curp->roff)) != NULL) roff_addtbl(curp->man, span); else if (rr == ROFF_EQN) roff_addeqn(curp->man, roff_eqn(curp->roff)); else if ((curp->man->macroset == MACROSET_MDOC ? mdoc_parseln(curp->man, curp->line, ln.buf, of) : man_parseln(curp->man, curp->line, ln.buf, of)) == 2) break; /* Temporary buffers typically are not full. */ if (0 == start && '\0' == blk.buf[i]) break; /* Start the next input line. */ pos = 0; } free(ln.buf); }
/* * Main parse routine for an opened file. This is called for each * opened file and simply loops around the full input file, possibly * nesting (i.e., with `so'). */ static void mparse_buf_r(struct mparse *curp, struct buf blk, int start) { const struct tbl_span *span; struct buf ln; enum rofferr rr; int i, of, rc; int pos; /* byte number in the ln buffer */ int lnn; /* line number in the real file */ unsigned char c; memset(&ln, 0, sizeof(struct buf)); lnn = curp->line; pos = 0; for (i = 0; i < (int)blk.sz; ) { if (0 == pos && '\0' == blk.buf[i]) break; if (start) { curp->line = lnn; curp->reparse_count = 0; } while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { /* * When finding an unescaped newline character, * leave the character loop to process the line. * Skip a preceding carriage return, if any. */ if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz && '\n' == blk.buf[i + 1]) ++i; if ('\n' == blk.buf[i]) { ++i; ++lnn; break; } /* * Warn about bogus characters. If you're using * non-ASCII encoding, you're screwing your * readers. Since I'd rather this not happen, * I'll be helpful and drop these characters so * we don't display gibberish. Note to manual * writers: use special characters. */ c = (unsigned char) blk.buf[i]; if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) { mandoc_msg(MANDOCERR_BADCHAR, curp, curp->line, pos, "ignoring byte"); i++; continue; } /* Trailing backslash = a plain char. */ if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { if (pos >= (int)ln.sz) resize_buf(&ln, 256); ln.buf[pos++] = blk.buf[i++]; continue; } /* * Found escape and at least one other character. * When it's a newline character, skip it. * When there is a carriage return in between, * skip that one as well. */ if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz && '\n' == blk.buf[i + 2]) ++i; if ('\n' == blk.buf[i + 1]) { i += 2; ++lnn; continue; } if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { i += 2; /* Comment, skip to end of line */ for (; i < (int)blk.sz; ++i) { if ('\n' == blk.buf[i]) { ++i; ++lnn; break; } } /* Backout trailing whitespaces */ for (; pos > 0; --pos) { if (ln.buf[pos - 1] != ' ') break; if (pos > 2 && ln.buf[pos - 2] == '\\') break; } break; } /* Some other escape sequence, copy & cont. */ if (pos + 1 >= (int)ln.sz) resize_buf(&ln, 256); ln.buf[pos++] = blk.buf[i++]; ln.buf[pos++] = blk.buf[i++]; } if (pos >= (int)ln.sz) resize_buf(&ln, 256); ln.buf[pos] = '\0'; /* * A significant amount of complexity is contained by * the roff preprocessor. It's line-oriented but can be * expressed on one line, so we need at times to * readjust our starting point and re-run it. The roff * preprocessor can also readjust the buffers with new * data, so we pass them in wholesale. */ of = 0; /* * Maintain a lookaside buffer of all parsed lines. We * only do this if mparse_keep() has been invoked (the * buffer may be accessed with mparse_getkeep()). */ if (curp->secondary) { curp->secondary->buf = mandoc_realloc (curp->secondary->buf, curp->secondary->sz + pos + 2); memcpy(curp->secondary->buf + curp->secondary->sz, ln.buf, pos); curp->secondary->sz += pos; curp->secondary->buf [curp->secondary->sz] = '\n'; curp->secondary->sz++; curp->secondary->buf [curp->secondary->sz] = '\0'; } rerun: rr = roff_parseln (curp->roff, curp->line, &ln.buf, &ln.sz, of, &of); switch (rr) { case (ROFF_REPARSE): if (REPARSE_LIMIT >= ++curp->reparse_count) mparse_buf_r(curp, ln, 0); else mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, pos, NULL); pos = 0; continue; case (ROFF_APPEND): pos = (int)strlen(ln.buf); continue; case (ROFF_RERUN): goto rerun; case (ROFF_IGN): pos = 0; continue; case (ROFF_ERR): assert(MANDOCLEVEL_FATAL <= curp->file_status); break; case (ROFF_SO): /* * We remove `so' clauses from our lookaside * buffer because we're going to descend into * the file recursively. */ if (curp->secondary) curp->secondary->sz -= pos + 1; mparse_readfd_r(curp, -1, ln.buf + of, 1); if (MANDOCLEVEL_FATAL <= curp->file_status) break; pos = 0; continue; default: break; } /* * If we encounter errors in the recursive parse, make * sure we don't continue parsing. */ if (MANDOCLEVEL_FATAL <= curp->file_status) break; /* * If input parsers have not been allocated, do so now. * We keep these instanced between parsers, but set them * locally per parse routine since we can use different * parsers with each one. */ if ( ! (curp->man || curp->mdoc)) pset(ln.buf + of, pos - of, curp); /* * Lastly, push down into the parsers themselves. One * of these will have already been set in the pset() * routine. * If libroff returns ROFF_TBL, then add it to the * currently open parse. Since we only get here if * there does exist data (see tbl_data.c), we're * guaranteed that something's been allocated. * Do the same for ROFF_EQN. */ rc = -1; if (ROFF_TBL == rr) while (NULL != (span = roff_span(curp->roff))) { rc = curp->man ? man_addspan(curp->man, span) : mdoc_addspan(curp->mdoc, span); if (0 == rc) break; } else if (ROFF_EQN == rr) rc = curp->mdoc ? mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff)) : man_addeqn(curp->man, roff_eqn(curp->roff)); else if (curp->man || curp->mdoc) rc = curp->man ? man_parseln(curp->man, curp->line, ln.buf, of) : mdoc_parseln(curp->mdoc, curp->line, ln.buf, of); if (0 == rc) { assert(MANDOCLEVEL_FATAL <= curp->file_status); break; } /* Temporary buffers typically are not full. */ if (0 == start && '\0' == blk.buf[i]) break; /* Start the next input line. */ pos = 0; } free(ln.buf); }
static int post_bl_block_tag(POST_ARGS) { struct mdoc_node *n, *nn; size_t sz, ssz; int i; char buf[NUMSIZ]; /* * Calculate the -width for a `Bl -tag' list if it hasn't been * provided. Uses the first head macro. NOTE AGAIN: this is * ONLY if the -width argument has NOT been provided. See * post_bl_block_width() for converting the -width string. */ sz = 10; n = mdoc->last; for (nn = n->body->child; nn; nn = nn->next) { if (MDOC_It != nn->tok) continue; assert(MDOC_BLOCK == nn->type); nn = nn->head->child; if (nn == NULL) break; if (MDOC_TEXT == nn->type) { sz = strlen(nn->string) + 1; break; } if (0 != (ssz = mdoc_macro2len(nn->tok))) sz = ssz; break; } /* Defaults to ten ens. */ snprintf(buf, NUMSIZ, "%zun", sz); /* * We have to dynamically add this to the macro's argument list. * We're guaranteed that a MDOC_Width doesn't already exist. */ assert(n->args); i = (int)(n->args->argc)++; n->args->argv = mandoc_realloc(n->args->argv, n->args->argc * sizeof(struct mdoc_argv)); n->args->argv[i].arg = MDOC_Width; n->args->argv[i].line = n->line; n->args->argv[i].pos = n->pos; n->args->argv[i].sz = 1; n->args->argv[i].value = mandoc_malloc(sizeof(char *)); n->args->argv[i].value[0] = mandoc_strdup(buf); /* Set our width! */ n->norm->Bl.width = n->args->argv[i].value[0]; return(1); }
static void pg_search(const struct req *req, char *path) { size_t tt, ressz; struct manpaths ps; int i, sz, rc; const char *ep, *start; struct res *res; char **cp; struct opts opt; struct expr *expr; if (req->q.manroot < 0 || 0 == req->psz) { resp_search(NULL, 0, (void *)req); return; } memset(&opt, 0, sizeof(struct opts)); ep = req->q.expr; opt.arch = req->q.arch; opt.cat = req->q.sec; rc = -1; sz = 0; cp = NULL; ressz = 0; res = NULL; /* * Begin by chdir()ing into the root of the manpath. * This way we can pick up the database files, which are * relative to the manpath root. */ assert(req->q.manroot < (int)req->psz); if (-1 == (chdir(req->p[req->q.manroot].path))) { perror(req->p[req->q.manroot].path); resp_search(NULL, 0, (void *)req); return; } memset(&ps, 0, sizeof(struct manpaths)); manpath_manconf(&ps, "etc/catman.conf"); /* * Poor man's tokenisation: just break apart by spaces. * Yes, this is half-ass. But it works for now. */ while (ep && isspace((unsigned char)*ep)) ep++; while (ep && '\0' != *ep) { cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *)); start = ep; while ('\0' != *ep && ! isspace((unsigned char)*ep)) ep++; cp[sz] = mandoc_malloc((ep - start) + 1); memcpy(cp[sz], start, ep - start); cp[sz++][ep - start] = '\0'; while (isspace((unsigned char)*ep)) ep++; } /* * Pump down into apropos backend. * The resp_search() function is called with the results. */ expr = req->q.legacy ? termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt); if (NULL != expr) rc = apropos_search (ps.sz, ps.paths, &opt, expr, tt, (void *)req, &ressz, &res, resp_search); /* ...unless errors occured. */ if (0 == rc) resp_baddb(); else if (-1 == rc) resp_search(NULL, 0, NULL); for (i = 0; i < sz; i++) free(cp[i]); free(cp); resfree(res, ressz); exprfree(expr); manpath_free(&ps); }
/* * Parse an argument from line text. This comes in the form of -key * [value0...], which may either have a single mandatory value, at least * one mandatory value, an optional single value, or no value. */ enum margverr mdoc_argv(struct mdoc *mdoc, int line, enum mdoct tok, struct mdoc_arg **v, int *pos, char *buf) { char *p, sv; struct mdoc_argv tmp; struct mdoc_arg *arg; const enum mdocargt *ap; if ('\0' == buf[*pos]) return(ARGV_EOLN); else if (NULL == (ap = mdocargs[tok].argvs)) return(ARGV_WORD); else if ('-' != buf[*pos]) return(ARGV_WORD); /* Seek to the first unescaped space. */ p = &buf[++(*pos)]; assert(*pos > 0); for ( ; buf[*pos] ; (*pos)++) if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) break; /* * We want to nil-terminate the word to look it up (it's easier * that way). But we may not have a flag, in which case we need * to restore the line as-is. So keep around the stray byte, * which we'll reset upon exiting (if necessary). */ if ('\0' != (sv = buf[*pos])) buf[(*pos)++] = '\0'; /* * Now look up the word as a flag. Use temporary storage that * we'll copy into the node's flags, if necessary. */ memset(&tmp, 0, sizeof(struct mdoc_argv)); tmp.line = line; tmp.pos = *pos; tmp.arg = MDOC_ARG_MAX; while (MDOC_ARG_MAX != (tmp.arg = *ap++)) if (0 == strcmp(p, mdoc_argnames[tmp.arg])) break; if (MDOC_ARG_MAX == tmp.arg) { /* * The flag was not found. * Restore saved zeroed byte and return as a word. */ if (sv) buf[*pos - 1] = sv; return(ARGV_WORD); } /* Read to the next word (the argument). */ while (buf[*pos] && ' ' == buf[*pos]) (*pos)++; switch (argvflags[tmp.arg]) { case (ARGV_SINGLE): if ( ! argv_single(mdoc, line, &tmp, pos, buf)) return(ARGV_ERROR); break; case (ARGV_MULTI): if ( ! argv_multi(mdoc, line, &tmp, pos, buf)) return(ARGV_ERROR); break; case (ARGV_NONE): break; } if (NULL == (arg = *v)) arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg)); arg->argc++; arg->argv = mandoc_realloc (arg->argv, arg->argc * sizeof(struct mdoc_argv)); memcpy(&arg->argv[(int)arg->argc - 1], &tmp, sizeof(struct mdoc_argv)); return(ARGV_ARG); }
/* * Get the next token from the input stream using the given quote * character. * Optionally make any replacements. */ static const char * eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) { char *start, *next; int q, diff, lim; size_t ssz, dummy; struct eqn_def *def; if (NULL == sz) sz = &dummy; lim = 0; ep->rew = ep->cur; again: /* Prevent self-definitions. */ if (lim >= EQN_NEST_MAX) { mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, ep->eqn.ln, ep->eqn.pos, NULL); return(NULL); } ep->cur = ep->rew; start = &ep->data[(int)ep->cur]; q = 0; if ('\0' == *start) return(NULL); if (quote == *start) { ep->cur++; q = 1; } start = &ep->data[(int)ep->cur]; if ( ! q) { if ('{' == *start || '}' == *start) ssz = 1; else ssz = strcspn(start + 1, " ^~\"{}\t") + 1; next = start + (int)ssz; if ('\0' == *next) next = NULL; } else next = strchr(start, quote); if (NULL != next) { *sz = (size_t)(next - start); ep->cur += *sz; if (q) ep->cur++; while (' ' == ep->data[(int)ep->cur] || '\t' == ep->data[(int)ep->cur] || '^' == ep->data[(int)ep->cur] || '~' == ep->data[(int)ep->cur]) ep->cur++; } else { if (q) mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, ep->eqn.ln, ep->eqn.pos, NULL); next = strchr(start, '\0'); *sz = (size_t)(next - start); ep->cur += *sz; } /* Quotes aren't expanded for values. */ if (q || ! repl) return(start); if (NULL != (def = eqn_def_find(ep, start, *sz))) { diff = def->valsz - *sz; if (def->valsz > *sz) { ep->sz += diff; ep->data = mandoc_realloc(ep->data, ep->sz + 1); ep->data[ep->sz] = '\0'; start = &ep->data[(int)ep->rew]; } diff = def->valsz - *sz; memmove(start + *sz + diff, start + *sz, (strlen(start) - *sz) + 1); memcpy(start, def->val, def->valsz); lim++; goto again; } return(start); }