示例#1
0
文件: bk_text.c 项目: mloar/halibut
static void text_versionid(textfile *tf, word *text, textconfig *cfg) {
    rdstring t = { 0, 0, NULL };

    rdadd(&t, L'[');
    text_rdaddw(&t, text, NULL, cfg);
    rdadd(&t, L']');
    rdadd(&t, L'\n');

    text_output(tf, t.text);
    sfree(t.text);
}
示例#2
0
/*
 * Read the rest of a line that starts `\c'. Including nothing at
 * all (tok_word with empty text).
 */
token get_codepar_token(input * in)
{
  int c;
  token ret;
  rdstring rs = { 0, 0, NULL };
  filepos cpos;

  ret.type = tok_word;
  c = get(in, &cpos);           /* expect (and discard) one space */
  ret.pos = cpos;
  if (c == ' ')
  {
    c = get(in, &cpos);
    ret.pos = cpos;
  }
  while (!isnl(c) && c != EOF)
  {
    int c2 = c;
    c = get(in, &cpos);
    /* Discard \r just before \n. */
    if (c2 != 13 || !isnl(c))
      rdadd(&rs, (wchar_t)c2);
  }
  unget(in, c, &cpos);
  ret.text = ustrdup(rs.text);
  sfree(rs.text);
  return ret;
}
示例#3
0
文件: bk_text.c 项目: rdebath/sgt
static void text_rdaddw(rdstring *rs, word *text, word *end, textconfig *cfg) {
    for (; text && text != end; text = text->next) switch (text->type) {
      case word_HyperLink:
      case word_HyperEnd:
      case word_UpperXref:
      case word_LowerXref:
      case word_XrefEnd:
      case word_IndexRef:
	break;

      case word_Normal:
      case word_Emph:
      case word_Code:
      case word_WeakCode:
      case word_WhiteSpace:
      case word_EmphSpace:
      case word_CodeSpace:
      case word_WkCodeSpace:
      case word_Quote:
      case word_EmphQuote:
      case word_CodeQuote:
      case word_WkCodeQuote:
	assert(text->type != word_CodeQuote &&
	       text->type != word_WkCodeQuote);
	if (towordstyle(text->type) == word_Emph &&
	    (attraux(text->aux) == attr_First ||
	     attraux(text->aux) == attr_Only))
	    rdadds(rs, cfg->startemph);
	else if (towordstyle(text->type) == word_Code &&
		 (attraux(text->aux) == attr_First ||
		  attraux(text->aux) == attr_Only))
	    rdadds(rs, cfg->lquote);
	if (removeattr(text->type) == word_Normal) {
	    if (cvt_ok(cfg->charset, text->text) || !text->alt)
		rdadds(rs, text->text);
	    else
		text_rdaddw(rs, text->alt, NULL, cfg);
	} else if (removeattr(text->type) == word_WhiteSpace) {
	    rdadd(rs, L' ');
	} else if (removeattr(text->type) == word_Quote) {
	    rdadds(rs, quoteaux(text->aux) == quote_Open ?
		   cfg->lquote : cfg->rquote);
	}
	if (towordstyle(text->type) == word_Emph &&
	    (attraux(text->aux) == attr_Last ||
	     attraux(text->aux) == attr_Only))
	    rdadds(rs, cfg->endemph);
	else if (towordstyle(text->type) == word_Code &&
		 (attraux(text->aux) == attr_Last ||
		  attraux(text->aux) == attr_Only))
	    rdadds(rs, cfg->rquote);
	break;
    }
}
示例#4
0
/*
 * Reads a single file (ie until get() returns EOF)
 */
static void read_file(paragraph *** ret, input * in, indexdata * idx)
{
  token t;
  paragraph par;
  word wd, **whptr, **idximplicit;
  tree234 *macros;
  wchar_t utext[2], *wdtext;
  int style, spcstyle;
  int already;
  int iswhite, seenwhite;
  int type;
  struct stack_item {
    enum {
      stack_nop = 0,            /* do nothing (for error recovery) */
      stack_ualt = 1,           /* \u alternative */
      stack_style = 2,          /* \e, \c, \cw */
      stack_idx = 4,            /* \I, \i, \ii */
      stack_hyper = 8,          /* \W */
      stack_quote = 16,         /* \q */
    } type;
    word **whptr;               /* to restore from \u alternatives */
    word **idximplicit;         /* to restore from \u alternatives */
  } *sitem;
  stack parsestk;
  word *indexword=NULL, *uword=NULL, *iword=NULL;
  word *idxwordlist;
  rdstring indexstr;
  int index_downcase=0, index_visible=0, indexing=0;
  const rdstring nullrs = { 0, 0, NULL };
  wchar_t uchr;

  t.text = NULL;
  macros = newtree234(macrocmp);
  already = FALSE;

  /*
   * Loop on each paragraph.
   */
  while (1)
  {
    int start_cmd = c__invalid;
    par.words = NULL;
    par.keyword = NULL;
    whptr = &par.words;

    /*
     * Get a token.
     */
    if (!already)
    {
      dtor(t), t = get_token(in);
    }
    already = FALSE;
    if (t.type == tok_eof)
      break;

    /*
     * Parse code paragraphs separately.
     */
    if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in))
    {
      par.type = para_Code;
      par.fpos = t.pos;
      while (1)
      {
        dtor(t), t = get_codepar_token(in);
        wd.type = word_WeakCode;
        wd.breaks = FALSE;      /* shouldn't need this... */
        wd.text = ustrdup(t.text);
        wd.alt = NULL;
        wd.fpos = t.pos;
        addword(wd, &whptr);
        dtor(t), t = get_token(in);
        if (t.type == tok_white)
        {
          /*
           * The newline after a code-paragraph line
           */
          dtor(t), t = get_token(in);
        }
        if (t.type == tok_eop || t.type == tok_eof)
          break;
        else if (t.type != tok_cmd || t.cmd != c_c)
        {
          error(err_brokencodepara, &t.pos);
          addpara(par, ret);
          while (t.type != tok_eop)     /* error recovery: */
            dtor(t), t = get_token(in); /* eat rest of paragraph */
          goto codeparabroken;  /* ick, but such is life */
        }
      }
      addpara(par, ret);
    codeparabroken:
      continue;
    }

    while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos))
    {
      dtor(t), t = get_token(in);
    }


    /*
     * This token begins a paragraph. See if it's one of the
     * special commands that define a paragraph type.
     *
     * (note that \# is special in a way, and \nocite takes no
     * text)
     */
    par.type = para_Normal;
    if (t.type == tok_cmd)
    {
      int needkw=0;
      int is_macro = FALSE;

      par.fpos = t.pos;
      switch (t.cmd)
      {
      default:
        needkw = -1;
        break;
      case c__invalid:
        error(err_badparatype, t.text, &t.pos);
        needkw = 4;
        break;
      case c__comment:
        if (isbrace(in))
          break;                /* `\#{': isn't a comment para */
        do
        {
          dtor(t), t = get_token(in);
        }
        while (t.type != tok_eop && t.type != tok_eof);
        continue;               /* next paragraph */
        /*
         * `needkw' values:
         *
         *   1 -- exactly one keyword
         *   2 -- at least one keyword
         *   4 -- any number of keywords including zero
         *   8 -- at least one keyword and then nothing else
         *  16 -- nothing at all! no keywords, no body
         *  32 -- no keywords at all
         */
      case c_A:
        needkw = 2;
        par.type = para_Appendix;
        break;
      case c_B:
        needkw = 2;
        par.type = para_Biblio;
        break;
      case c_BR:
        needkw = 1;
        par.type = para_BR;
        start_cmd = c_BR;
        break;
      case c_C:
        needkw = 2;
        par.type = para_Chapter;
        break;
      case c_H:
        needkw = 2;
        par.type = para_Heading;
        par.aux = 0;
        break;
      case c_IM:
        needkw = 2;
        par.type = para_IM;
        start_cmd = c_IM;
        break;
      case c_S:
        needkw = 2;
        par.type = para_Subsect;
        par.aux = t.aux;
        break;
      case c_U:
        needkw = 32;
        par.type = para_UnnumberedChapter;
        break;
        /* For \b and \n the keyword is optional */
      case c_b:
        needkw = 4;
        par.type = para_Bullet;
        break;
      case c_n:
        needkw = 4;
        par.type = para_NumberedList;
        break;
      case c_cfg:
        needkw = 8;
        par.type = para_Config;
        start_cmd = c_cfg;
        break;
      case c_copyright:
        needkw = 32;
        par.type = para_Copyright;
        break;
      case c_define:
        is_macro = TRUE;
        needkw = 1;
        break;
        /* For \nocite the keyword is _everything_ */
      case c_nocite:
        needkw = 8;
        par.type = para_NoCite;
        break;
      case c_preamble:
        needkw = 32;
        par.type = para_Preamble;
        break;
      case c_rule:
        needkw = 16;
        par.type = para_Rule;
        break;
      case c_title:
        needkw = 32;
        par.type = para_Title;
        break;
      case c_versionid:
        needkw = 32;
        par.type = para_VersionID;
        break;
      }

      if (needkw > 0)
      {
        rdstring rs = { 0, 0, NULL };
        int nkeys = 0;
        filepos fp;

        /* Get keywords. */
        dtor(t), t = get_token(in);
        fp = t.pos;
        while (t.type == tok_lbrace)
        {
          /* This is a keyword. */
          nkeys++;
          /* FIXME: there will be bugs if anyone specifies an
           * empty keyword (\foo{}), so trap this case. */
          while (dtor(t), t = get_token(in),
                 t.type == tok_word ||
                 t.type == tok_white ||
                 (t.type == tok_cmd && t.cmd == c__nbsp) ||
                 (t.type == tok_cmd && t.cmd == c__escaped))
          {
            if (t.type == tok_white ||
                (t.type == tok_cmd && t.cmd == c__nbsp))
              rdadd(&rs, ' ');
            else
              rdadds(&rs, t.text);
          }
          if (t.type != tok_rbrace)
          {
            error(err_kwunclosed, &t.pos);
            continue;
          }
          rdadd(&rs, 0);        /* add string terminator */
          dtor(t), t = get_token(in);   /* eat right brace */
        }

        rdadd(&rs, 0);          /* add string terminator */

        /* See whether we have the right number of keywords. */
        if ((needkw & 48) && nkeys > 0)
          error(err_kwillegal, &fp);
        if ((needkw & 11) && nkeys == 0)
          error(err_kwexpected, &fp);
        if ((needkw & 5) && nkeys > 1)
          error(err_kwtoomany, &fp);

        if (is_macro)
        {
          /*
           * Macro definition. Get the rest of the line
           * as a code-paragraph token, repeatedly until
           * there's nothing more left of it. Separate
           * with newlines.
           */
          rdstring macrotext = { 0, 0, NULL };
          while (1)
          {
            dtor(t), t = get_codepar_token(in);
            if (macrotext.pos > 0)
              rdadd(&macrotext, L'\n');
            rdadds(&macrotext, t.text);
            dtor(t), t = get_token(in);
            if (t.type == tok_eop)
              break;
          }
          macrodef(macros, rs.text, macrotext.text, fp);
          continue;             /* next paragraph */
        }

        par.keyword = rdtrim(&rs);

        /* Move to EOP in case of needkw==8 or 16 (no body) */
        if (needkw & 24)
        {
          /* We allow whitespace even when we expect no para body */
          while (t.type == tok_white)
            dtor(t), t = get_token(in);
          if (t.type != tok_eop && t.type != tok_eof &&
              (start_cmd == c__invalid ||
               t.type != tok_cmd || t.cmd != start_cmd))
          {
            error(err_bodyillegal, &t.pos);
            /* Error recovery: eat the rest of the paragraph */
            while (t.type != tok_eop && t.type != tok_eof &&
                   (start_cmd == c__invalid ||
                    t.type != tok_cmd || t.cmd != start_cmd))
              dtor(t), t = get_token(in);
          }
          if (t.type == tok_cmd)
            already = TRUE;     /* inhibit get_token at top of loop */
          addpara(par, ret);
          continue;             /* next paragraph */
        }
      }
    }

    /*
     * Now read the actual paragraph, word by word, adding to
     * the paragraph list.
     *
     * Mid-paragraph commands:
     *
     *  \K \k
     *  \c \cw
     *  \e
     *  \i \ii
     *  \I
     *  \u
     *  \W
     *  \date
     *  \\ \{ \}
     */
    parsestk = stk_new();
    style = word_Normal;
    spcstyle = word_WhiteSpace;
    indexing = FALSE;
    seenwhite = TRUE;
    while (t.type != tok_eop && t.type != tok_eof)
    {
      iswhite = FALSE;
      already = FALSE;

      /* Handle implicit paragraph breaks after \IM, \BR etc */
      if (start_cmd != c__invalid &&
          t.type == tok_cmd && t.cmd == start_cmd)
      {
        already = TRUE;         /* inhibit get_token at top of loop */
        break;
      }

      if (t.type == tok_cmd && t.cmd == c__escaped)
      {
        t.type = tok_word;      /* nice and simple */
        t.aux = 0;              /* even if `\-' - nonbreaking! */
      }
      if (t.type == tok_cmd && t.cmd == c__nbsp)
      {
        t.type = tok_word;      /* nice and simple */
        sfree(t.text);
        t.text = ustrdup(L" "); /* text is ` ' not `_' */
        t.aux = 0;              /* (nonbreaking) */
      }
      switch (t.type)
      {
      case tok_white:
        if (whptr == &par.words)
          break;                /* strip whitespace at start of para */
        wd.text = NULL;
        wd.type = spcstyle;
        wd.alt = NULL;
        wd.aux = 0;
        wd.fpos = t.pos;
        wd.breaks = FALSE;

        /*
         * Inhibit use of whitespace if it's (probably the
         * newline) before a repeat \IM / \BR type
         * directive.
         */
        if (start_cmd != c__invalid)
        {
          dtor(t), t = get_token(in);
          already = TRUE;
          if (t.type == tok_cmd && t.cmd == start_cmd)
            break;
        }

        if (indexing)
          rdadd(&indexstr, ' ');
        if (!indexing || index_visible)
          addword(wd, &whptr);
        if (indexing)
          addword(wd, &idximplicit);
        iswhite = TRUE;
        break;
      case tok_word:
        if (indexing)
          rdadds(&indexstr, t.text);
        wd.type = style;
        wd.alt = NULL;
        wd.aux = 0;
        wd.fpos = t.pos;
        wd.breaks = t.aux;
        if (!indexing || index_visible)
        {
          wd.text = ustrdup(t.text);
          addword(wd, &whptr);
        }
        if (indexing)
        {
          wd.text = ustrdup(t.text);
          addword(wd, &idximplicit);
        }
        break;
      case tok_lbrace:
        error(err_unexbrace, &t.pos);
        /* Error recovery: push nop */
        sitem = mknew(struct stack_item);
        sitem->type = stack_nop;
        stk_push(parsestk, sitem);
        break;
      case tok_rbrace:
        sitem = stk_pop(parsestk);
        if (!sitem)
          error(err_unexbrace, &t.pos);
        else
        {
          if (sitem->type & stack_ualt)
          {
            whptr = sitem->whptr;
            idximplicit = sitem->idximplicit;
          }
          if (sitem->type & stack_style)
          {
            style = word_Normal;
            spcstyle = word_WhiteSpace;
          }
          if (sitem->type & stack_idx )          {
            indexword->text = ustrdup(indexstr.text);
            if (index_downcase)
              ustrlow(indexword->text);
            indexing = FALSE;
            rdadd(&indexstr, L'\0');
            index_merge(idx, FALSE, indexstr.text, idxwordlist);
            sfree(indexstr.text);
          }
          if (sitem->type & stack_hyper)
          {
            wd.text = NULL;
            wd.type = word_HyperEnd;
            wd.alt = NULL;
            wd.aux = 0;
            wd.fpos = t.pos;
            wd.breaks = FALSE;
            if (!indexing || index_visible)
              addword(wd, &whptr);
            if (indexing)
              addword(wd, &idximplicit);
          }
          if (sitem->type & stack_quote)
          {
            wd.text = NULL;
            wd.type = toquotestyle(style);
            wd.alt = NULL;
            wd.aux = quote_Close;
            wd.fpos = t.pos;
            wd.breaks = FALSE;
            if (!indexing || index_visible)
              addword(wd, &whptr);
            if (indexing)
            {
              rdadd(&indexstr, L'"');
              addword(wd, &idximplicit);
            }
          }
        }
        sfree(sitem);
        break;
      case tok_cmd:
        switch (t.cmd)
        {
        case c__comment:
          /*
           * In-paragraph comment: \#{ balanced braces }
           *
           * Anything goes here; even tok_eop. We should
           * eat whitespace after the close brace _if_
           * there was whitespace before the \#.
           */
          dtor(t), t = get_token(in);
          if (t.type != tok_lbrace)
          {
            error(err_explbr, &t.pos);
          } else
          {
            int braces = 1;
            while (braces > 0)
            {
              dtor(t), t = get_token(in);
              if (t.type == tok_lbrace)
                braces++;
              else if (t.type == tok_rbrace)
                braces--;
              else if (t.type == tok_eof)
              {
                error(err_commenteof, &t.pos);
                break;
              }
            }
          }
          if (seenwhite)
          {
            already = TRUE;
            dtor(t), t = get_token(in);
            if (t.type == tok_white)
            {
              iswhite = TRUE;
              already = FALSE;
            }
          }
          break;
        case c_q:
          dtor(t), t = get_token(in);
          if (t.type != tok_lbrace)
          {
            error(err_explbr, &t.pos);
          } else
          {
            wd.text = NULL;
            wd.type = toquotestyle(style);
            wd.alt = NULL;
            wd.aux = quote_Open;
            wd.fpos = t.pos;
            wd.breaks = FALSE;
            if (!indexing || index_visible)
              addword(wd, &whptr);
            if (indexing)
            {
              rdadd(&indexstr, L'"');
              addword(wd, &idximplicit);
            }
            sitem = mknew(struct stack_item);
            sitem->type = stack_quote;
            stk_push(parsestk, sitem);
          }
          break;
        case c_K:
        case c_k:
        case c_R:
        case c_W:
        case c_L:
        case c_date:
          /*
           * Keyword, hyperlink, or \date. We expect a
           * left brace, some text, and then a right
           * brace. No nesting; no arguments.
           */
          wd.fpos = t.pos;
          wd.breaks = FALSE;
          if (t.cmd == c_K)
            wd.type = word_UpperXref;
          else if (t.cmd == c_k)
            wd.type = word_LowerXref;
          else if (t.cmd == c_R)
            wd.type = word_FreeTextXref;
          else if (t.cmd == c_W)
            wd.type = word_HyperLink;
          else if (t.cmd == c_L)
            wd.type = word_LocalHyperLink;
          else
            wd.type = word_Normal;
          dtor(t), t = get_token(in);
          if (t.type != tok_lbrace)
          {
            if (wd.type == word_Normal)
            {
              time_t thetime = time(NULL);
              struct tm *broken = localtime(&thetime);
              already = TRUE;
              wdtext = ustrftime(NULL, broken);
              wd.type = style;
            } else
            {
              error(err_explbr, &t.pos);
              wdtext = NULL;
            }
          } else
          {
            rdstring rs = { 0, 0, NULL };
            while (dtor(t), t = get_token(in),
                   t.type == tok_word || t.type == tok_white)
            {
              if (t.type == tok_white)
                rdadd(&rs, ' ');
              else
                rdadds(&rs, t.text);
            }
            if (wd.type == word_Normal)
            {
              time_t thetime = time(NULL);
              struct tm *broken = localtime(&thetime);
              wdtext = ustrftime(rs.text, broken);
              wd.type = style;
            } else
            {
              wdtext = ustrdup(rs.text);
            }
            sfree(rs.text);
            if (t.type != tok_rbrace)
            {
              error(err_kwexprbr, &t.pos);
            }
          }
          wd.alt = NULL;
          wd.aux = 0;
          if (!indexing || index_visible)
          {
            wd.text = ustrdup(wdtext);
            addword(wd, &whptr);
          }
          if (indexing)
          {
            wd.text = ustrdup(wdtext);
            addword(wd, &idximplicit);
          }
          sfree(wdtext);
          if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink)
          {
            /*
             * Hyperlinks are different: they then
             * expect another left brace, to begin
             * delimiting the text marked by the link.
             */
            dtor(t), t = get_token(in);
            /*
             * Special cases: \W{}\c, \W{}\e, \W{}\cw
             */
            sitem = mknew(struct stack_item);
            sitem->type = stack_hyper;
            if (t.type == tok_cmd &&
                (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
            {
              if (style != word_Normal)
                error(err_nestedstyles, &t.pos);
              else
              {
                style = (t.cmd == c_c ? word_Code :
                         t.cmd == c_cw ? word_WeakCode : word_Emph);
                spcstyle = tospacestyle(style);
                sitem->type |= stack_style;
              }
              dtor(t), t = get_token(in);
            }
            if (t.type != tok_lbrace)
            {
              error(err_explbr, &t.pos);
              sfree(sitem);
            } else
            {
              stk_push(parsestk, sitem);
            }
          }
          break;
        case c_c:
        case c_cw:
        case c_e:
          type = t.cmd;
          if (style != word_Normal)
          {
            error(err_nestedstyles, &t.pos);
            /* Error recovery: eat lbrace, push nop. */
            dtor(t), t = get_token(in);
            sitem = mknew(struct stack_item);
            sitem->type = stack_nop;
            stk_push(parsestk, sitem);
          }
          dtor(t), t = get_token(in);
          if (t.type != tok_lbrace)
          {
            error(err_explbr, &t.pos);
          } else
          {
            style = (type == c_c ? word_Code :
                     type == c_cw ? word_WeakCode : word_Emph);
            spcstyle = tospacestyle(style);
            sitem = mknew(struct stack_item);
            sitem->type = stack_style;
            stk_push(parsestk, sitem);
          }
          break;
        case c_i:
        case c_ii:
        case c_I:
          type = t.cmd;
          if (indexing)
          {
            error(err_nestedindex, &t.pos);
            /* Error recovery: eat lbrace, push nop. */
            dtor(t), t = get_token(in);
            sitem = mknew(struct stack_item);
            sitem->type = stack_nop;
            stk_push(parsestk, sitem);
          }
          sitem = mknew(struct stack_item);
          sitem->type = stack_idx;
          dtor(t), t = get_token(in);
          /*
           * Special cases: \i\c, \i\e, \i\cw
           */
          wd.fpos = t.pos;
          if (t.type == tok_cmd &&
              (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
          {
            if (style != word_Normal)
              error(err_nestedstyles, &t.pos);
            else
            {
              style = (t.cmd == c_c ? word_Code :
                       t.cmd == c_cw ? word_WeakCode : word_Emph);
              spcstyle = tospacestyle(style);
              sitem->type |= stack_style;
            }
            dtor(t), t = get_token(in);
          }
          if (t.type != tok_lbrace)
          {
            sfree(sitem);
            error(err_explbr, &t.pos);
          } else
          {
            /* Add an index-reference word with no text as yet */
            wd.type = word_IndexRef;
            wd.text = NULL;
            wd.alt = NULL;
            wd.aux = 0;
            wd.breaks = FALSE;
            indexword = addword(wd, &whptr);
            /* Set up a rdstring to read the index text */
            indexstr = nullrs;
            /* Flags so that we do the Right Things with text */
            index_visible = (type != c_I);
            index_downcase = (type == c_ii);
            indexing = TRUE;
            idxwordlist = NULL;
            idximplicit = &idxwordlist;
            /* Stack item to close the indexing on exit */
            stk_push(parsestk, sitem);
          }
          break;
        case c_u:
          uchr = t.aux;
          utext[0] = uchr;
          utext[1] = 0;
          wd.type = style;
          wd.breaks = FALSE;
          wd.alt = NULL;
          wd.aux = 0;
          wd.fpos = t.pos;
          if (!indexing || index_visible)
          {
            wd.text = ustrdup(utext);
            uword = addword(wd, &whptr);
          } else
            uword = NULL;
          if (indexing)
          {
            wd.text = ustrdup(utext);
            iword = addword(wd, &idximplicit);
          } else
            iword = NULL;
          dtor(t), t = get_token(in);
          if (t.type == tok_lbrace)
          {
            /*
             * \u with a left brace. Until the brace
             * closes, all further words go on a
             * sidetrack from the main thread of the
             * paragraph.
             */
            sitem = mknew(struct stack_item);
            sitem->type = stack_ualt;
            sitem->whptr = whptr;
            sitem->idximplicit = idximplicit;
            stk_push(parsestk, sitem);
            whptr = uword ? &uword->alt : NULL;
            idximplicit = iword ? &iword->alt : NULL;
          } else
          {
            if (indexing)
              rdadd(&indexstr, uchr);
            already = TRUE;
          }
          break;
        default:
          if (!macrolookup(macros, in, t.text, &t.pos))
            error(err_badmidcmd, t.text, &t.pos);
          break;
        }
示例#5
0
/*
 * Read a token from the input file, in the normal way (`normal' in
 * the sense that code paragraphs work a different way).
 */
token get_token(input * in)
{
  int c;
  int nls;
  token ret;
  rdstring rs = { 0, 0, NULL };
  filepos cpos;

  ret.cmd = c__invalid;
  ret.aux = FALSE;
  ret.text = NULL;              /* default */
  c = get(in, &cpos);
  ret.pos = cpos;
  if (iswhite(c))
  {                             /* tok_white or tok_eop */
    nls = 0;
    do
    {
      if (isnl(c))
        nls++;
    }
    while ((c = get(in, &cpos)) != EOF && iswhite(c));
    if (c == EOF)
    {
      ret.type = tok_eof;
      return ret;
    }
    unget(in, c, &cpos);
    ret.type = (nls > 1 ? tok_eop : tok_white);
    return ret;
  } else if (c == EOF)
  {                             /* tok_eof */
    ret.type = tok_eof;
    return ret;
  } else if (c == '\\')
  {                             /* tok_cmd */
    c = get(in, &cpos);
    if (c == '-' || c == '\\' || c == '_' ||
        c == '#' || c == '{' || c == '}')
    {
      /* single-char command */
      rdadd(&rs, (wchar_t)c);
    } else if (c == 'u')
    {
      int len = 0;
      do
      {
        rdadd(&rs, (wchar_t)c);
        len++;
        c = get(in, &cpos);
      }
      while (ishex(c) && len < 5);
      unget(in, c, &cpos);
    } else if (iscmd(c))
    {
      do
      {
        rdadd(&rs, (wchar_t)c);
        c = get(in, &cpos);
      }
      while (iscmd(c));
      unget(in, c, &cpos);
    }
    /*
     * Now match the command against the list of available
     * ones.
     */
    ret.type = tok_cmd;
    ret.text = ustrdup(rs.text);
    match_kw(&ret);
    sfree(rs.text);
    return ret;
  } else if (c == '{')
  {                             /* tok_lbrace */
    ret.type = tok_lbrace;
    return ret;
  } else if (c == '}')
  {                             /* tok_rbrace */
    ret.type = tok_rbrace;
    return ret;
  } else
  {                             /* tok_word */
    /*
     * Read a word: the longest possible contiguous sequence of
     * things other than whitespace, backslash, braces and
     * hyphen. A hyphen terminates the word but is returned as
     * part of it; everything else is pushed back for the next
     * token. The `aux' field contains TRUE if the word ends in
     * a hyphen.
     */
    ret.aux = FALSE;            /* assumed for now */
    while (1)
    {
      if (iswhite(c) || c == '{' || c == '}' || c == '\\' || c == EOF)
      {
        /* Put back the character that caused termination */
        unget(in, c, &cpos);
        break;
      } else
      {
        rdadd(&rs, (wchar_t)c);
        if (c == '-')
        {
          ret.aux = TRUE;
          break;                /* hyphen terminates word */
        }
      }
      c = get(in, &cpos);
    }
    ret.type = tok_word;
    ret.text = ustrdup(rs.text);
    sfree(rs.text);
    return ret;
  }
}