Esempio n. 1
0
/**
 * Count the number of characters in a word.
 * The first character is already valid for a keyword
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a word was parsed (always true)
 */
bool parse_word(tok_ctx& ctx, chunk_t& pc, bool skipcheck)
{
   int             ch;
   static unc_text intr_txt("@interface");

   /* The first character is already valid */
   pc.str.clear();
   pc.str.append(ctx.get());

   while (ctx.more() && CharTable::IsKw2(ctx.peek()))
   {
      ch = ctx.get();
      pc.str.append(ch);

      /* HACK: Non-ASCII character are only allowed in identifiers */
      if (ch > 0x7f)
      {
         skipcheck = true;
      }
   }
   pc.type = CT_WORD;

   if (skipcheck)
   {
      return(true);
   }

   /* Detect pre-processor functions now */
   if ((cpd.in_preproc == CT_PP_DEFINE) &&
       (cpd.preproc_ncnl_count == 1))
   {
      if (ctx.peek() == '(')
      {
         pc.type = CT_MACRO_FUNC;
      }
      else
      {
         pc.type = CT_MACRO;
      }
   }
   else
   {
      /* '@interface' is reserved, not an interface itself */
      if ((cpd.lang_flags & LANG_JAVA) && pc.str.startswith("@") &&
          !pc.str.equals(intr_txt))
      {
         pc.type = CT_ANNOTATION;
      }
      else
      {
         /* Turn it into a keyword now */
         pc.type = find_keyword_type(pc.text(), pc.str.size());
      }
   }

   return(true);
} // parse_word
Esempio n. 2
0
/**
 * Count the number of characters in the number.
 * The next bit of text starts with a number (0-9 or '.'), so it is a number.
 * Count the number of characters in the number.
 *
 * This should cover all number formats for all languages.
 * Note that this is not a strict parser. It will happily parse numbers in
 * an invalid format.
 *
 * For example, only D allows underscores in the numbers, but they are
 * allowed in all formats.
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a number was parsed
 */
static bool parse_number(tok_ctx &ctx, chunk_t &pc)
{
   int  tmp;
   bool is_float;
   bool did_hex = false;

   /* A number must start with a digit or a dot, followed by a digit */
   if (!is_dec(ctx.peek()) &&
       ((ctx.peek() != '.') || !is_dec(ctx.peek(1))))
   {
      return(false);
   }

   is_float = (ctx.peek() == '.');
   if (is_float && (ctx.peek(1) == '.'))
   {
      return(false);
   }

   /* Check for Hex, Octal, or Binary
    * Note that only D and Pawn support binary, but who cares?
    */
   if (ctx.peek() == '0')
   {
      pc.str.append(ctx.get());  /* store the '0' */
      int     ch;
      chunk_t pc_temp;
      size_t  pc_length;

      pc_temp.str.append('0');
      // MS constant might have an "h" at the end. Look for it
      ctx.save();
      while (ctx.more() && CharTable::IsKw2(ctx.peek()))
      {
         ch = ctx.get();
         pc_temp.str.append(ch);
      }
      pc_length = pc_temp.len();
      ch        = pc_temp.str[pc_length - 1];
      ctx.restore();
      LOG_FMT(LGUY, "%s(%d): pc_temp:%s\n", __func__, __LINE__, pc_temp.text());
      if (ch == 'h')
      {
         // we have an MS hexadecimal number with "h" at the end
         LOG_FMT(LGUY, "%s(%d): MS hexadecimal number\n", __func__, __LINE__);
         did_hex = true;
         do
         {
            pc.str.append(ctx.get()); /* store the rest */
         } while (is_hex_(ctx.peek()));
         pc.str.append(ctx.get());    /* store the h */
         LOG_FMT(LGUY, "%s(%d): pc:%s\n", __func__, __LINE__, pc.text());
      }
      else
      {
         switch (unc_toupper(ctx.peek()))
         {
         case 'X':               /* hex */
            did_hex = true;
            do
            {
               pc.str.append(ctx.get());  /* store the 'x' and then the rest */
            } while (is_hex_(ctx.peek()));
            break;

         case 'B':               /* binary */
            do
            {
               pc.str.append(ctx.get());  /* store the 'b' and then the rest */
            } while (is_bin_(ctx.peek()));
            break;

         case '0':                /* octal or decimal */
         case '1':
         case '2':
         case '3':
         case '4':
         case '5':
         case '6':
         case '7':
         case '8':
         case '9':
            do
            {
               pc.str.append(ctx.get());
            } while (is_oct_(ctx.peek()));
            break;

         default:
            /* either just 0 or 0.1 or 0UL, etc */
            break;
         }
      }
   }
   else
   {
      /* Regular int or float */
      while (is_dec_(ctx.peek()))
      {
         pc.str.append(ctx.get());
      }
   }

   /* Check if we stopped on a decimal point & make sure it isn't '..' */
   if ((ctx.peek() == '.') && (ctx.peek(1) != '.'))
   {
      pc.str.append(ctx.get());
      is_float = true;
      if (did_hex)
      {
         while (is_hex_(ctx.peek()))
         {
            pc.str.append(ctx.get());
         }
      }
      else
      {
         while (is_dec_(ctx.peek()))
         {
            pc.str.append(ctx.get());
         }
      }
   }

   /* Check exponent
    * Valid exponents per language (not that it matters):
    * C/C++/D/Java: eEpP
    * C#/Pawn:      eE
    */
   tmp = unc_toupper(ctx.peek());
   if ((tmp == 'E') || (tmp == 'P'))
   {
      is_float = true;
      pc.str.append(ctx.get());
      if ((ctx.peek() == '+') || (ctx.peek() == '-'))
      {
         pc.str.append(ctx.get());
      }
      while (is_dec_(ctx.peek()))
      {
         pc.str.append(ctx.get());
      }
   }

   /* Check the suffixes
    * Valid suffixes per language (not that it matters):
    *        Integer       Float
    * C/C++: uUlL64        lLfF
    * C#:    uUlL          fFdDMm
    * D:     uUL           ifFL
    * Java:  lL            fFdD
    * Pawn:  (none)        (none)
    *
    * Note that i, f, d, and m only appear in floats.
    */
   while (1)
   {
      tmp = unc_toupper(ctx.peek());
      if ((tmp == 'I') || (tmp == 'F') || (tmp == 'D') || (tmp == 'M'))
      {
         is_float = true;
      }
      else if ((tmp != 'L') && (tmp != 'U'))
      {
         break;
      }
      pc.str.append(ctx.get());
   }

   /* skip the Microsoft-specific '64' suffix */
   if ((ctx.peek() == '6') && (ctx.peek(1) == '4'))
   {
      pc.str.append(ctx.get());
      pc.str.append(ctx.get());
   }

   pc.type = is_float ? CT_NUMBER_FP : CT_NUMBER;

   /* If there is anything left, then we are probably dealing with garbage or
    * some sick macro junk. Eat it.
    */
   parse_suffix(ctx, pc);

   return(true);
} // parse_number