Пример #1
0
/* Merge accession indexes */
int access_merge(char *dbase, uint64_t nb, indix_t *ind,char * new_index_dir) {
  int i;
  char *file;

  file = index_file(new_index_dir, dbase, ACCSUF);
  i = index_merge(file, nb, ind);
  free(file);

  return i; }
Пример #2
0
int
main(int argc, char *argv[])
{
	struct mparse	*mp; /* parse sequence */
	struct manpaths	 dirs;
	struct mdb	 mdb;
	struct recs	 recs;
	enum op		 op; /* current operation */
	const char	*dir;
	int		 ch, i, flags;
	char		 dirbuf[MAXPATHLEN];
	DB		*hash; /* temporary keyword hashtable */
	BTREEINFO	 info; /* btree configuration */
	size_t		 sz1, sz2;
	struct buf	 buf, /* keyword buffer */
			 dbuf; /* description buffer */
	struct of	*of; /* list of files for processing */
	extern int	 optind;
	extern char	*optarg;

	progname = strrchr(argv[0], '/');
	if (progname == NULL)
		progname = argv[0];
	else
		++progname;

	memset(&dirs, 0, sizeof(struct manpaths));
	memset(&mdb, 0, sizeof(struct mdb));
	memset(&recs, 0, sizeof(struct recs));

	of = NULL;
	mp = NULL;
	hash = NULL;
	op = OP_DEFAULT;
	dir = NULL;

	while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
		switch (ch) {
		case ('a'):
			use_all = 1;
			break;
		case ('C'):
			if (op) {
				fprintf(stderr,
				    "-C: conflicting options\n");
				goto usage;
			}
			dir = optarg;
			op = OP_CONFFILE;
			break;
		case ('d'):
			if (op) {
				fprintf(stderr,
				    "-d: conflicting options\n");
				goto usage;
			}
			dir = optarg;
			op = OP_UPDATE;
			break;
		case ('t'):
			dup2(STDOUT_FILENO, STDERR_FILENO);
			if (op) {
				fprintf(stderr,
				    "-t: conflicting options\n");
				goto usage;
			}
			op = OP_TEST;
			use_all = 1;
			warnings = 1;
			break;
		case ('u'):
			if (op) {
				fprintf(stderr,
				    "-u: conflicting options\n");
				goto usage;
			}
			dir = optarg;
			op = OP_DELETE;
			break;
		case ('v'):
			verb++;
			break;
		case ('W'):
			warnings = 1;
			break;
		default:
			goto usage;
		}

	argc -= optind;
	argv += optind;

	if (OP_CONFFILE == op && argc > 0) {
		fprintf(stderr, "-C: too many arguments\n");
		goto usage;
	}

	memset(&info, 0, sizeof(BTREEINFO));
	info.lorder = 4321;
	info.flags = R_DUP;

	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);

	memset(&buf, 0, sizeof(struct buf));
	memset(&dbuf, 0, sizeof(struct buf));

	buf.size = dbuf.size = MANDOC_BUFSZ;

	buf.cp = mandoc_malloc(buf.size);
	dbuf.cp = mandoc_malloc(dbuf.size);

	if (OP_TEST == op) {
		ofile_argbuild(argc, argv, &of, ".");
		if (NULL == of)
			goto out;
		index_merge(of, mp, &dbuf, &buf, 
				hash, &mdb, &recs, ".");
		goto out;
	}

	if (OP_UPDATE == op || OP_DELETE == op) {
		strlcat(mdb.dbn, dir, MAXPATHLEN);
		strlcat(mdb.dbn, "/", MAXPATHLEN);
		sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);

		strlcat(mdb.idxn, dir, MAXPATHLEN);
		strlcat(mdb.idxn, "/", MAXPATHLEN);
		sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);

		if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
			fprintf(stderr, "%s: path too long\n", dir);
			exit((int)MANDOCLEVEL_BADARG);
		}

		flags = O_CREAT | O_RDWR;
		mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
		mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);

		if (NULL == mdb.db) {
			perror(mdb.dbn);
			exit((int)MANDOCLEVEL_SYSERR);
		} else if (NULL == mdb.idx) {
			perror(mdb.idxn);
			exit((int)MANDOCLEVEL_SYSERR);
		}

		ofile_argbuild(argc, argv, &of, dir);

		if (NULL == of)
			goto out;

		index_prune(of, &mdb, &recs, dir);

		/*
		 * Go to the root of the respective manual tree.
		 * This must work or no manuals may be found (they're
		 * indexed relative to the root).
		 */

		if (OP_UPDATE == op) {
			if (-1 == chdir(dir)) {
				perror(dir);
				exit((int)MANDOCLEVEL_SYSERR);
			}
			index_merge(of, mp, &dbuf, &buf, hash,
					&mdb, &recs, dir);
		}

		goto out;
	}

	/*
	 * Configure the directories we're going to scan.
	 * If we have command-line arguments, use them.
	 * If not, we use man(1)'s method (see mandocdb.8).
	 */

	if (argc > 0) {
		dirs.paths = mandoc_calloc(argc, sizeof(char *));
		dirs.sz = argc;
		for (i = 0; i < argc; i++) 
			dirs.paths[i] = mandoc_strdup(argv[i]);
	} else
		manpath_parse(&dirs, dir, NULL, NULL);

	for (i = 0; i < dirs.sz; i++) {
		/*
		 * Go to the root of the respective manual tree.
		 * This must work or no manuals may be found:
		 * They are indexed relative to the root.
		 */

		if (-1 == chdir(dirs.paths[i])) {
			perror(dirs.paths[i]);
			exit((int)MANDOCLEVEL_SYSERR);
		}

		strlcpy(mdb.dbn, MANDOC_DB, MAXPATHLEN);
		strlcpy(mdb.idxn, MANDOC_IDX, MAXPATHLEN);

		flags = O_CREAT | O_TRUNC | O_RDWR;
		mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
		mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);

		if (NULL == mdb.db) {
			perror(mdb.dbn);
			exit((int)MANDOCLEVEL_SYSERR);
		} else if (NULL == mdb.idx) {
			perror(mdb.idxn);
			exit((int)MANDOCLEVEL_SYSERR);
		}

		/*
		 * Search for manuals and fill the new database.
		 */

		strlcpy(dirbuf, dirs.paths[i], MAXPATHLEN);
	       	ofile_dirbuild(".", "", "", 0, &of, dirbuf);

		if (NULL != of) {
			index_merge(of, mp, &dbuf, &buf, hash,
			     &mdb, &recs, dirs.paths[i]);
			ofile_free(of);
			of = NULL;
		}

		(*mdb.db->close)(mdb.db);
		(*mdb.idx->close)(mdb.idx);
		mdb.db = NULL;
		mdb.idx = NULL;
	}

out:
	if (mdb.db)
		(*mdb.db->close)(mdb.db);
	if (mdb.idx)
		(*mdb.idx->close)(mdb.idx);
	if (hash)
		(*hash->close)(hash);
	if (mp)
		mparse_free(mp);

	manpath_free(&dirs);
	ofile_free(of);
	free(buf.cp);
	free(dbuf.cp);
	free(recs.stack);

	return(MANDOCLEVEL_OK);

usage:
	fprintf(stderr,
		"usage: %s [-av] [-C file] | dir ... | -t file ...\n"
		"                        -d dir [file ...] | "
		"-u dir [file ...]\n",
		progname);

	return((int)MANDOCLEVEL_BADARG);
}
Пример #3
0
/*
 * Reads a single file (ie until get() returns EOF)
 */
static void read_file(paragraph *** ret, input * in, indexdata * idx)
{
  token t;
  paragraph par;
  word wd, **whptr, **idximplicit;
  tree234 *macros;
  wchar_t utext[2], *wdtext;
  int style, spcstyle;
  int already;
  int iswhite, seenwhite;
  int type;
  struct stack_item {
    enum {
      stack_nop = 0,            /* do nothing (for error recovery) */
      stack_ualt = 1,           /* \u alternative */
      stack_style = 2,          /* \e, \c, \cw */
      stack_idx = 4,            /* \I, \i, \ii */
      stack_hyper = 8,          /* \W */
      stack_quote = 16,         /* \q */
    } type;
    word **whptr;               /* to restore from \u alternatives */
    word **idximplicit;         /* to restore from \u alternatives */
  } *sitem;
  stack parsestk;
  word *indexword=NULL, *uword=NULL, *iword=NULL;
  word *idxwordlist;
  rdstring indexstr;
  int index_downcase=0, index_visible=0, indexing=0;
  const rdstring nullrs = { 0, 0, NULL };
  wchar_t uchr;

  t.text = NULL;
  macros = newtree234(macrocmp);
  already = FALSE;

  /*
   * Loop on each paragraph.
   */
  while (1)
  {
    int start_cmd = c__invalid;
    par.words = NULL;
    par.keyword = NULL;
    whptr = &par.words;

    /*
     * Get a token.
     */
    if (!already)
    {
      dtor(t), t = get_token(in);
    }
    already = FALSE;
    if (t.type == tok_eof)
      break;

    /*
     * Parse code paragraphs separately.
     */
    if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in))
    {
      par.type = para_Code;
      par.fpos = t.pos;
      while (1)
      {
        dtor(t), t = get_codepar_token(in);
        wd.type = word_WeakCode;
        wd.breaks = FALSE;      /* shouldn't need this... */
        wd.text = ustrdup(t.text);
        wd.alt = NULL;
        wd.fpos = t.pos;
        addword(wd, &whptr);
        dtor(t), t = get_token(in);
        if (t.type == tok_white)
        {
          /*
           * The newline after a code-paragraph line
           */
          dtor(t), t = get_token(in);
        }
        if (t.type == tok_eop || t.type == tok_eof)
          break;
        else if (t.type != tok_cmd || t.cmd != c_c)
        {
          error(err_brokencodepara, &t.pos);
          addpara(par, ret);
          while (t.type != tok_eop)     /* error recovery: */
            dtor(t), t = get_token(in); /* eat rest of paragraph */
          goto codeparabroken;  /* ick, but such is life */
        }
      }
      addpara(par, ret);
    codeparabroken:
      continue;
    }

    while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos))
    {
      dtor(t), t = get_token(in);
    }


    /*
     * This token begins a paragraph. See if it's one of the
     * special commands that define a paragraph type.
     *
     * (note that \# is special in a way, and \nocite takes no
     * text)
     */
    par.type = para_Normal;
    if (t.type == tok_cmd)
    {
      int needkw=0;
      int is_macro = FALSE;

      par.fpos = t.pos;
      switch (t.cmd)
      {
      default:
        needkw = -1;
        break;
      case c__invalid:
        error(err_badparatype, t.text, &t.pos);
        needkw = 4;
        break;
      case c__comment:
        if (isbrace(in))
          break;                /* `\#{': isn't a comment para */
        do
        {
          dtor(t), t = get_token(in);
        }
        while (t.type != tok_eop && t.type != tok_eof);
        continue;               /* next paragraph */
        /*
         * `needkw' values:
         *
         *   1 -- exactly one keyword
         *   2 -- at least one keyword
         *   4 -- any number of keywords including zero
         *   8 -- at least one keyword and then nothing else
         *  16 -- nothing at all! no keywords, no body
         *  32 -- no keywords at all
         */
      case c_A:
        needkw = 2;
        par.type = para_Appendix;
        break;
      case c_B:
        needkw = 2;
        par.type = para_Biblio;
        break;
      case c_BR:
        needkw = 1;
        par.type = para_BR;
        start_cmd = c_BR;
        break;
      case c_C:
        needkw = 2;
        par.type = para_Chapter;
        break;
      case c_H:
        needkw = 2;
        par.type = para_Heading;
        par.aux = 0;
        break;
      case c_IM:
        needkw = 2;
        par.type = para_IM;
        start_cmd = c_IM;
        break;
      case c_S:
        needkw = 2;
        par.type = para_Subsect;
        par.aux = t.aux;
        break;
      case c_U:
        needkw = 32;
        par.type = para_UnnumberedChapter;
        break;
        /* For \b and \n the keyword is optional */
      case c_b:
        needkw = 4;
        par.type = para_Bullet;
        break;
      case c_n:
        needkw = 4;
        par.type = para_NumberedList;
        break;
      case c_cfg:
        needkw = 8;
        par.type = para_Config;
        start_cmd = c_cfg;
        break;
      case c_copyright:
        needkw = 32;
        par.type = para_Copyright;
        break;
      case c_define:
        is_macro = TRUE;
        needkw = 1;
        break;
        /* For \nocite the keyword is _everything_ */
      case c_nocite:
        needkw = 8;
        par.type = para_NoCite;
        break;
      case c_preamble:
        needkw = 32;
        par.type = para_Preamble;
        break;
      case c_rule:
        needkw = 16;
        par.type = para_Rule;
        break;
      case c_title:
        needkw = 32;
        par.type = para_Title;
        break;
      case c_versionid:
        needkw = 32;
        par.type = para_VersionID;
        break;
      }

      if (needkw > 0)
      {
        rdstring rs = { 0, 0, NULL };
        int nkeys = 0;
        filepos fp;

        /* Get keywords. */
        dtor(t), t = get_token(in);
        fp = t.pos;
        while (t.type == tok_lbrace)
        {
          /* This is a keyword. */
          nkeys++;
          /* FIXME: there will be bugs if anyone specifies an
           * empty keyword (\foo{}), so trap this case. */
          while (dtor(t), t = get_token(in),
                 t.type == tok_word ||
                 t.type == tok_white ||
                 (t.type == tok_cmd && t.cmd == c__nbsp) ||
                 (t.type == tok_cmd && t.cmd == c__escaped))
          {
            if (t.type == tok_white ||
                (t.type == tok_cmd && t.cmd == c__nbsp))
              rdadd(&rs, ' ');
            else
              rdadds(&rs, t.text);
          }
          if (t.type != tok_rbrace)
          {
            error(err_kwunclosed, &t.pos);
            continue;
          }
          rdadd(&rs, 0);        /* add string terminator */
          dtor(t), t = get_token(in);   /* eat right brace */
        }

        rdadd(&rs, 0);          /* add string terminator */

        /* See whether we have the right number of keywords. */
        if ((needkw & 48) && nkeys > 0)
          error(err_kwillegal, &fp);
        if ((needkw & 11) && nkeys == 0)
          error(err_kwexpected, &fp);
        if ((needkw & 5) && nkeys > 1)
          error(err_kwtoomany, &fp);

        if (is_macro)
        {
          /*
           * Macro definition. Get the rest of the line
           * as a code-paragraph token, repeatedly until
           * there's nothing more left of it. Separate
           * with newlines.
           */
          rdstring macrotext = { 0, 0, NULL };
          while (1)
          {
            dtor(t), t = get_codepar_token(in);
            if (macrotext.pos > 0)
              rdadd(&macrotext, L'\n');
            rdadds(&macrotext, t.text);
            dtor(t), t = get_token(in);
            if (t.type == tok_eop)
              break;
          }
          macrodef(macros, rs.text, macrotext.text, fp);
          continue;             /* next paragraph */
        }

        par.keyword = rdtrim(&rs);

        /* Move to EOP in case of needkw==8 or 16 (no body) */
        if (needkw & 24)
        {
          /* We allow whitespace even when we expect no para body */
          while (t.type == tok_white)
            dtor(t), t = get_token(in);
          if (t.type != tok_eop && t.type != tok_eof &&
              (start_cmd == c__invalid ||
               t.type != tok_cmd || t.cmd != start_cmd))
          {
            error(err_bodyillegal, &t.pos);
            /* Error recovery: eat the rest of the paragraph */
            while (t.type != tok_eop && t.type != tok_eof &&
                   (start_cmd == c__invalid ||
                    t.type != tok_cmd || t.cmd != start_cmd))
              dtor(t), t = get_token(in);
          }
          if (t.type == tok_cmd)
            already = TRUE;     /* inhibit get_token at top of loop */
          addpara(par, ret);
          continue;             /* next paragraph */
        }
      }
    }

    /*
     * Now read the actual paragraph, word by word, adding to
     * the paragraph list.
     *
     * Mid-paragraph commands:
     *
     *  \K \k
     *  \c \cw
     *  \e
     *  \i \ii
     *  \I
     *  \u
     *  \W
     *  \date
     *  \\ \{ \}
     */
    parsestk = stk_new();
    style = word_Normal;
    spcstyle = word_WhiteSpace;
    indexing = FALSE;
    seenwhite = TRUE;
    while (t.type != tok_eop && t.type != tok_eof)
    {
      iswhite = FALSE;
      already = FALSE;

      /* Handle implicit paragraph breaks after \IM, \BR etc */
      if (start_cmd != c__invalid &&
          t.type == tok_cmd && t.cmd == start_cmd)
      {
        already = TRUE;         /* inhibit get_token at top of loop */
        break;
      }

      if (t.type == tok_cmd && t.cmd == c__escaped)
      {
        t.type = tok_word;      /* nice and simple */
        t.aux = 0;              /* even if `\-' - nonbreaking! */
      }
      if (t.type == tok_cmd && t.cmd == c__nbsp)
      {
        t.type = tok_word;      /* nice and simple */
        sfree(t.text);
        t.text = ustrdup(L" "); /* text is ` ' not `_' */
        t.aux = 0;              /* (nonbreaking) */
      }
      switch (t.type)
      {
      case tok_white:
        if (whptr == &par.words)
          break;                /* strip whitespace at start of para */
        wd.text = NULL;
        wd.type = spcstyle;
        wd.alt = NULL;
        wd.aux = 0;
        wd.fpos = t.pos;
        wd.breaks = FALSE;

        /*
         * Inhibit use of whitespace if it's (probably the
         * newline) before a repeat \IM / \BR type
         * directive.
         */
        if (start_cmd != c__invalid)
        {
          dtor(t), t = get_token(in);
          already = TRUE;
          if (t.type == tok_cmd && t.cmd == start_cmd)
            break;
        }

        if (indexing)
          rdadd(&indexstr, ' ');
        if (!indexing || index_visible)
          addword(wd, &whptr);
        if (indexing)
          addword(wd, &idximplicit);
        iswhite = TRUE;
        break;
      case tok_word:
        if (indexing)
          rdadds(&indexstr, t.text);
        wd.type = style;
        wd.alt = NULL;
        wd.aux = 0;
        wd.fpos = t.pos;
        wd.breaks = t.aux;
        if (!indexing || index_visible)
        {
          wd.text = ustrdup(t.text);
          addword(wd, &whptr);
        }
        if (indexing)
        {
          wd.text = ustrdup(t.text);
          addword(wd, &idximplicit);
        }
        break;
      case tok_lbrace:
        error(err_unexbrace, &t.pos);
        /* Error recovery: push nop */
        sitem = mknew(struct stack_item);
        sitem->type = stack_nop;
        stk_push(parsestk, sitem);
        break;
      case tok_rbrace:
        sitem = stk_pop(parsestk);
        if (!sitem)
          error(err_unexbrace, &t.pos);
        else
        {
          if (sitem->type & stack_ualt)
          {
            whptr = sitem->whptr;
            idximplicit = sitem->idximplicit;
          }
          if (sitem->type & stack_style)
          {
            style = word_Normal;
            spcstyle = word_WhiteSpace;
          }
          if (sitem->type & stack_idx )          {
            indexword->text = ustrdup(indexstr.text);
            if (index_downcase)
              ustrlow(indexword->text);
            indexing = FALSE;
            rdadd(&indexstr, L'\0');
            index_merge(idx, FALSE, indexstr.text, idxwordlist);
            sfree(indexstr.text);
          }
          if (sitem->type & stack_hyper)
          {
            wd.text = NULL;
            wd.type = word_HyperEnd;
            wd.alt = NULL;
            wd.aux = 0;
            wd.fpos = t.pos;
            wd.breaks = FALSE;
            if (!indexing || index_visible)
              addword(wd, &whptr);
            if (indexing)
              addword(wd, &idximplicit);
          }
          if (sitem->type & stack_quote)
          {
            wd.text = NULL;
            wd.type = toquotestyle(style);
            wd.alt = NULL;
            wd.aux = quote_Close;
            wd.fpos = t.pos;
            wd.breaks = FALSE;
            if (!indexing || index_visible)
              addword(wd, &whptr);
            if (indexing)
            {
              rdadd(&indexstr, L'"');
              addword(wd, &idximplicit);
            }
          }
        }
        sfree(sitem);
        break;
      case tok_cmd:
        switch (t.cmd)
        {
        case c__comment:
          /*
           * In-paragraph comment: \#{ balanced braces }
           *
           * Anything goes here; even tok_eop. We should
           * eat whitespace after the close brace _if_
           * there was whitespace before the \#.
           */
          dtor(t), t = get_token(in);
          if (t.type != tok_lbrace)
          {
            error(err_explbr, &t.pos);
          } else
          {
            int braces = 1;
            while (braces > 0)
            {
              dtor(t), t = get_token(in);
              if (t.type == tok_lbrace)
                braces++;
              else if (t.type == tok_rbrace)
                braces--;
              else if (t.type == tok_eof)
              {
                error(err_commenteof, &t.pos);
                break;
              }
            }
          }
          if (seenwhite)
          {
            already = TRUE;
            dtor(t), t = get_token(in);
            if (t.type == tok_white)
            {
              iswhite = TRUE;
              already = FALSE;
            }
          }
          break;
        case c_q:
          dtor(t), t = get_token(in);
          if (t.type != tok_lbrace)
          {
            error(err_explbr, &t.pos);
          } else
          {
            wd.text = NULL;
            wd.type = toquotestyle(style);
            wd.alt = NULL;
            wd.aux = quote_Open;
            wd.fpos = t.pos;
            wd.breaks = FALSE;
            if (!indexing || index_visible)
              addword(wd, &whptr);
            if (indexing)
            {
              rdadd(&indexstr, L'"');
              addword(wd, &idximplicit);
            }
            sitem = mknew(struct stack_item);
            sitem->type = stack_quote;
            stk_push(parsestk, sitem);
          }
          break;
        case c_K:
        case c_k:
        case c_R:
        case c_W:
        case c_L:
        case c_date:
          /*
           * Keyword, hyperlink, or \date. We expect a
           * left brace, some text, and then a right
           * brace. No nesting; no arguments.
           */
          wd.fpos = t.pos;
          wd.breaks = FALSE;
          if (t.cmd == c_K)
            wd.type = word_UpperXref;
          else if (t.cmd == c_k)
            wd.type = word_LowerXref;
          else if (t.cmd == c_R)
            wd.type = word_FreeTextXref;
          else if (t.cmd == c_W)
            wd.type = word_HyperLink;
          else if (t.cmd == c_L)
            wd.type = word_LocalHyperLink;
          else
            wd.type = word_Normal;
          dtor(t), t = get_token(in);
          if (t.type != tok_lbrace)
          {
            if (wd.type == word_Normal)
            {
              time_t thetime = time(NULL);
              struct tm *broken = localtime(&thetime);
              already = TRUE;
              wdtext = ustrftime(NULL, broken);
              wd.type = style;
            } else
            {
              error(err_explbr, &t.pos);
              wdtext = NULL;
            }
          } else
          {
            rdstring rs = { 0, 0, NULL };
            while (dtor(t), t = get_token(in),
                   t.type == tok_word || t.type == tok_white)
            {
              if (t.type == tok_white)
                rdadd(&rs, ' ');
              else
                rdadds(&rs, t.text);
            }
            if (wd.type == word_Normal)
            {
              time_t thetime = time(NULL);
              struct tm *broken = localtime(&thetime);
              wdtext = ustrftime(rs.text, broken);
              wd.type = style;
            } else
            {
              wdtext = ustrdup(rs.text);
            }
            sfree(rs.text);
            if (t.type != tok_rbrace)
            {
              error(err_kwexprbr, &t.pos);
            }
          }
          wd.alt = NULL;
          wd.aux = 0;
          if (!indexing || index_visible)
          {
            wd.text = ustrdup(wdtext);
            addword(wd, &whptr);
          }
          if (indexing)
          {
            wd.text = ustrdup(wdtext);
            addword(wd, &idximplicit);
          }
          sfree(wdtext);
          if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink)
          {
            /*
             * Hyperlinks are different: they then
             * expect another left brace, to begin
             * delimiting the text marked by the link.
             */
            dtor(t), t = get_token(in);
            /*
             * Special cases: \W{}\c, \W{}\e, \W{}\cw
             */
            sitem = mknew(struct stack_item);
            sitem->type = stack_hyper;
            if (t.type == tok_cmd &&
                (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
            {
              if (style != word_Normal)
                error(err_nestedstyles, &t.pos);
              else
              {
                style = (t.cmd == c_c ? word_Code :
                         t.cmd == c_cw ? word_WeakCode : word_Emph);
                spcstyle = tospacestyle(style);
                sitem->type |= stack_style;
              }
              dtor(t), t = get_token(in);
            }
            if (t.type != tok_lbrace)
            {
              error(err_explbr, &t.pos);
              sfree(sitem);
            } else
            {
              stk_push(parsestk, sitem);
            }
          }
          break;
        case c_c:
        case c_cw:
        case c_e:
          type = t.cmd;
          if (style != word_Normal)
          {
            error(err_nestedstyles, &t.pos);
            /* Error recovery: eat lbrace, push nop. */
            dtor(t), t = get_token(in);
            sitem = mknew(struct stack_item);
            sitem->type = stack_nop;
            stk_push(parsestk, sitem);
          }
          dtor(t), t = get_token(in);
          if (t.type != tok_lbrace)
          {
            error(err_explbr, &t.pos);
          } else
          {
            style = (type == c_c ? word_Code :
                     type == c_cw ? word_WeakCode : word_Emph);
            spcstyle = tospacestyle(style);
            sitem = mknew(struct stack_item);
            sitem->type = stack_style;
            stk_push(parsestk, sitem);
          }
          break;
        case c_i:
        case c_ii:
        case c_I:
          type = t.cmd;
          if (indexing)
          {
            error(err_nestedindex, &t.pos);
            /* Error recovery: eat lbrace, push nop. */
            dtor(t), t = get_token(in);
            sitem = mknew(struct stack_item);
            sitem->type = stack_nop;
            stk_push(parsestk, sitem);
          }
          sitem = mknew(struct stack_item);
          sitem->type = stack_idx;
          dtor(t), t = get_token(in);
          /*
           * Special cases: \i\c, \i\e, \i\cw
           */
          wd.fpos = t.pos;
          if (t.type == tok_cmd &&
              (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
          {
            if (style != word_Normal)
              error(err_nestedstyles, &t.pos);
            else
            {
              style = (t.cmd == c_c ? word_Code :
                       t.cmd == c_cw ? word_WeakCode : word_Emph);
              spcstyle = tospacestyle(style);
              sitem->type |= stack_style;
            }
            dtor(t), t = get_token(in);
          }
          if (t.type != tok_lbrace)
          {
            sfree(sitem);
            error(err_explbr, &t.pos);
          } else
          {
            /* Add an index-reference word with no text as yet */
            wd.type = word_IndexRef;
            wd.text = NULL;
            wd.alt = NULL;
            wd.aux = 0;
            wd.breaks = FALSE;
            indexword = addword(wd, &whptr);
            /* Set up a rdstring to read the index text */
            indexstr = nullrs;
            /* Flags so that we do the Right Things with text */
            index_visible = (type != c_I);
            index_downcase = (type == c_ii);
            indexing = TRUE;
            idxwordlist = NULL;
            idximplicit = &idxwordlist;
            /* Stack item to close the indexing on exit */
            stk_push(parsestk, sitem);
          }
          break;
        case c_u:
          uchr = t.aux;
          utext[0] = uchr;
          utext[1] = 0;
          wd.type = style;
          wd.breaks = FALSE;
          wd.alt = NULL;
          wd.aux = 0;
          wd.fpos = t.pos;
          if (!indexing || index_visible)
          {
            wd.text = ustrdup(utext);
            uword = addword(wd, &whptr);
          } else
            uword = NULL;
          if (indexing)
          {
            wd.text = ustrdup(utext);
            iword = addword(wd, &idximplicit);
          } else
            iword = NULL;
          dtor(t), t = get_token(in);
          if (t.type == tok_lbrace)
          {
            /*
             * \u with a left brace. Until the brace
             * closes, all further words go on a
             * sidetrack from the main thread of the
             * paragraph.
             */
            sitem = mknew(struct stack_item);
            sitem->type = stack_ualt;
            sitem->whptr = whptr;
            sitem->idximplicit = idximplicit;
            stk_push(parsestk, sitem);
            whptr = uword ? &uword->alt : NULL;
            idximplicit = iword ? &iword->alt : NULL;
          } else
          {
            if (indexing)
              rdadd(&indexstr, uchr);
            already = TRUE;
          }
          break;
        default:
          if (!macrolookup(macros, in, t.text, &t.pos))
            error(err_badmidcmd, t.text, &t.pos);
          break;
        }
Пример #4
0
int main(int argc, char **argv)
{
  char **infiles;
  char *outfile;
  int nfiles;
  int nogo;
  int errs;
  int reportcols;
  int debug;

  /*
   * Set up initial (default) parameters.
   */
  infiles = mknewa(char *, argc);
  outfile = NULL;
  nfiles = 0;
  nogo = errs = FALSE;
  reportcols = 0;
  debug = 0;

  if (argc == 1)
  {
    usage();
    exit(EXIT_SUCCESS);
  }

  /*
   * Parse command line arguments.
   */
  while (--argc)
  {
    char *p = *++argv;
    if (*p == '-')
    {
      /*
       * An option.
       */
      while (p && *++p)
      {
        char c = *p;
        switch (c)
        {
        case '-':
          /*
           * Long option.
           */
          {
            char *opt, *val;
            opt = p++;          /* opt will have _one_ leading - */
            while (*p && *p != '=')
              p++;              /* find end of option */
            if (*p == '=')
            {
              *p++ = '\0';
              val = p;
            } else
              val = NULL;
            if (!strcmp(opt, "-version"))
            {
              showversion();
              nogo = TRUE;
            } else if (!strcmp(opt, "-licence") ||
                       !strcmp(opt, "-license"))
            {
              licence();
              nogo = TRUE;
            } else if (!strcmp(opt, "-output"))
            {
              if (!val)
                errs = TRUE, error(err_optnoarg, opt);
              else
                outfile = val;
            } else if (!strcmp(opt, "-precise"))
            {
              reportcols = 1;
            } else
            {
              errs = TRUE, error(err_nosuchopt, opt);
            }
          }
          p = NULL;
          break;
        case 'V':
        case 'L':
        case 'P':
        case 'd':
          /*
           * Option requiring no parameter.
           */
          switch (c)
          {
          case 'V':
            showversion();
            nogo = TRUE;
            break;
          case 'L':
            licence();
            nogo = TRUE;
            break;
          case 'P':
            reportcols = 1;
            break;
          case 'd':
            debug = TRUE;
            break;
          }
          break;
        case 'o':
          /*
           * Option requiring parameter.
           */
          p++;
          if (!*p && argc > 1)
            --argc, p = *++argv;
          else if (!*p)
          {
            char opt[2];
            opt[0] = c;
            opt[1] = '\0';
            errs = TRUE, error(err_optnoarg, opt);
          }
          /*
           * Now c is the option and p is the parameter.
           */
          switch (c)
          {
          case 'o':
            outfile = p;
            break;
          }
          p = NULL;             /* prevent continued processing */
          break;
        default:
          /*
           * Unrecognised option.
           */
          {
            char opt[2];
            opt[0] = c;
            opt[1] = '\0';
            errs = TRUE, error(err_nosuchopt, opt);
          }
        }
      }
    } else
    {
      /*
       * A non-option argument.
       */
      infiles[nfiles++] = p;
    }
  }

  if (errs)
    exit(EXIT_FAILURE);
  if (nogo)
    exit(EXIT_SUCCESS);

  /*
   * Do the work.
   */
  if (nfiles == 0)
  {
    error(err_noinput);
    usage();
    exit(EXIT_FAILURE);
  }

  {
    input in;
    paragraph *sourceform, *p;
    indexdata *idx;
    keywordlist *keywords;

    in.filenames = infiles;
    in.nfiles = nfiles;
    in.currfp = NULL;
    in.currindex = 0;
    in.npushback = in.pushbacksize = 0;
    in.pushback = NULL;
    in.reportcols = reportcols;
    in.stack = NULL;

    idx = make_index();

    sourceform = read_input(&in, idx);
    if (!sourceform)
      exit(EXIT_FAILURE);

    sfree(in.pushback);

    mark_attr_ends(sourceform);

    sfree(infiles);

    keywords = get_keywords(sourceform);
    if (!keywords)
      exit(EXIT_FAILURE);
    gen_citations(sourceform, keywords);
    subst_keywords(sourceform, keywords);

    for (p = sourceform; p; p = p->next)
      if (p->type == para_IM)
        index_merge(idx, TRUE, p->keyword, p->words);

    build_index(idx);


    if (debug)
    {
      index_debug(idx);
      dbg_prtkws(keywords);
      dbg_prtsource(sourceform);
    }

    xhtml_backend(sourceform, keywords, idx);

    free_para_list(sourceform);
    free_keywords(keywords);
    cleanup_index(idx);
  }

  return 0;
}