示例#1
0
/**
 * Parses all legal D string constants.
 *
 * Quoted strings:
 *   r"Wysiwyg"      # WYSIWYG string
 *   x"hexstring"    # Hexadecimal array
 *   `Wysiwyg`       # WYSIWYG string
 *   'char'          # single character
 *   "reg_string"    # regular string
 *
 * Non-quoted strings:
 * \x12              # 1-byte hex constant
 * \u1234            # 2-byte hex constant
 * \U12345678        # 4-byte hex constant
 * \123              # octal constant
 * \&            # named entity
 * \n                # single character
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a string was parsed
 */
static bool d_parse_string(tok_ctx& ctx, chunk_t& pc)
{
   int ch = ctx.peek();

   if ((ch == '"') || (ch == '\'') || (ch == '`'))
   {
      return(parse_string(ctx, pc, 0, true));
   }
   else if (ch == '\\')
   {
      ctx.save();
      int cnt;
      pc.str.clear();
      while (ctx.peek() == '\\')
      {
         pc.str.append(ctx.get());
         /* Check for end of file */
         switch (ctx.peek())
         {
         case 'x':
            /* \x HexDigit HexDigit */
            cnt = 3;
            while (cnt--)
            {
               pc.str.append(ctx.get());
            }
            break;

         case 'u':
            /* \u HexDigit HexDigit HexDigit HexDigit */
            cnt = 5;
            while (cnt--)
            {
               pc.str.append(ctx.get());
            }
            break;

         case 'U':
            /* \U HexDigit (x8) */
            cnt = 9;
            while (cnt--)
            {
               pc.str.append(ctx.get());
            }
            break;

         case '0':
         case '1':
         case '2':
         case '3':
         case '4':
         case '5':
         case '6':
         case '7':
            /* handle up to 3 octal digits */
            pc.str.append(ctx.get());
            ch = ctx.peek();
            if ((ch >= '0') && (ch <= '7'))
            {
               pc.str.append(ctx.get());
               ch = ctx.peek();
               if ((ch >= '0') && (ch <= '7'))
               {
                  pc.str.append(ctx.get());
               }
            }
            break;

         case '&':
            /* \& NamedCharacterEntity ; */
            pc.str.append(ctx.get());
            while (unc_isalpha(ctx.peek()))
            {
               pc.str.append(ctx.get());
            }
            if (ctx.peek() == ';')
            {
               pc.str.append(ctx.get());
            }
            break;

         default:
            /* Everything else is a single character */
            pc.str.append(ctx.get());
            break;
         } // switch
      }

      if (pc.str.size() > 1)
      {
         pc.type = CT_STRING;
         return(true);
      }
      ctx.restore();
   }
   else if (((ch == 'r') || (ch == 'x')) && (ctx.peek(1) == '"'))
   {
      return(parse_string(ctx, pc, 1, false));
   }
   return(false);
} // d_parse_string
示例#2
0
void tokenize_cleanup(void)
{
   LOG_FUNC_ENTRY();

   chunk_t *pc   = chunk_get_head();
   chunk_t *prev = NULL;
   chunk_t *next;
   chunk_t *tmp;
   chunk_t *tmp2;
   bool    in_type_cast = false;

   cpd.unc_stage = US_TOKENIZE_CLEANUP;

   /* Since [] is expected to be TSQUARE for the 'operator', we need to make
    * this change in the first pass.
    */
   for (pc = chunk_get_head(); pc != NULL; pc = chunk_get_next_ncnl(pc))
   {
      if (pc->type == CT_SQUARE_OPEN)
      {
         next = chunk_get_next_ncnl(pc);
         if (chunk_is_token(next, CT_SQUARE_CLOSE))
         {
            /* Change '[' + ']' into '[]' */
            set_chunk_type(pc, CT_TSQUARE);
            pc->str = "[]";
            // bug # 664
            // The original orig_col_end of CT_SQUARE_CLOSE is stored at orig_col_end of CT_TSQUARE.
            // pc->orig_col_end += 1;
            pc->orig_col_end = next->orig_col_end;
            chunk_del(next);
         }
      }
      if ((pc->type == CT_SEMICOLON) &&
          (pc->flags & PCF_IN_PREPROC) &&
          !chunk_get_next_ncnl(pc, CNAV_PREPROC))
      {
         LOG_FMT(LNOTE, "%s:%d Detected a macro that ends with a semicolon. Possible failures if used.\n",
                 cpd.filename, pc->orig_line);
      }
   }

   /* We can handle everything else in the second pass */
   pc   = chunk_get_head();
   next = chunk_get_next_ncnl(pc);
   while ((pc != NULL) && (next != NULL))
   {
      if ((pc->type == CT_DOT) && (cpd.lang_flags & LANG_ALLC))
      {
         set_chunk_type(pc, CT_MEMBER);
      }

      if ((pc->type == CT_NULLCOND) && (cpd.lang_flags & LANG_CS))
      {
         set_chunk_type(pc, CT_MEMBER);
      }

      /* Determine the version stuff (D only) */
      if (pc->type == CT_D_VERSION)
      {
         if (next->type == CT_PAREN_OPEN)
         {
            set_chunk_type(pc, CT_D_VERSION_IF);
         }
         else
         {
            if (next->type != CT_ASSIGN)
            {
               LOG_FMT(LERR, "%s:%d %s: version: Unexpected token %s\n",
                       cpd.filename, pc->orig_line, __func__, get_token_name(next->type));
               cpd.error_count++;
            }
            set_chunk_type(pc, CT_WORD);
         }
      }

      /* Determine the scope stuff (D only) */
      if (pc->type == CT_D_SCOPE)
      {
         if (next->type == CT_PAREN_OPEN)
         {
            set_chunk_type(pc, CT_D_SCOPE_IF);
         }
         else
         {
            set_chunk_type(pc, CT_TYPE);
         }
      }

      /**
       * Change CT_BASE before CT_PAREN_OPEN to CT_WORD.
       * public myclass() : base() {
       * }
       */
      if ((pc->type == CT_BASE) && (next->type == CT_PAREN_OPEN))
      {
         set_chunk_type(pc, CT_WORD);
      }

      if ((pc->type == CT_ENUM) && (next->type == CT_CLASS))
      {
         set_chunk_type(next, CT_ENUM_CLASS);
      }

      /**
       * Change CT_WORD after CT_ENUM, CT_UNION, or CT_STRUCT to CT_TYPE
       * Change CT_WORD before CT_WORD to CT_TYPE
       */
      if (next->type == CT_WORD)
      {
         if ((pc->type == CT_ENUM) ||
             (pc->type == CT_ENUM_CLASS) ||
             (pc->type == CT_UNION) ||
             (pc->type == CT_STRUCT))
         {
            set_chunk_type(next, CT_TYPE);
         }
         if (pc->type == CT_WORD)
         {
            set_chunk_type(pc, CT_TYPE);
         }
      }

      /* change extern to qualifier if extern isn't followed by a string or
       * an open paren
       */
      if (pc->type == CT_EXTERN)
      {
         if (next->type == CT_STRING)
         {
            /* Probably 'extern "C"' */
         }
         else if (next->type == CT_PAREN_OPEN)
         {
            /* Probably 'extern (C)' */
         }
         else
         {
            /* Something else followed by a open brace */
            tmp = chunk_get_next_ncnl(next);
            if ((tmp == NULL) || (tmp->type != CT_BRACE_OPEN))
            {
               set_chunk_type(pc, CT_QUALIFIER);
            }
         }
      }

      /**
       * Change CT_STAR to CT_PTR_TYPE if preceded by CT_TYPE,
       * CT_QUALIFIER, or CT_PTR_TYPE.
       */
      if ((next->type == CT_STAR) &&
          ((pc->type == CT_TYPE) ||
           (pc->type == CT_QUALIFIER) ||
           (pc->type == CT_PTR_TYPE)))
      {
         set_chunk_type(next, CT_PTR_TYPE);
      }

      if ((pc->type == CT_TYPE_CAST) &&
          (next->type == CT_ANGLE_OPEN))
      {
         set_chunk_parent(next, CT_TYPE_CAST);
         in_type_cast = true;
      }

      /**
       * Change angle open/close to CT_COMPARE, if not a template thingy
       */
      if ((pc->type == CT_ANGLE_OPEN) && (pc->parent_type != CT_TYPE_CAST))
      {
         /* pretty much all languages except C use <> for something other than
          * comparisons.  "#include<xxx>" is handled elsewhere.
          */
         if (cpd.lang_flags & (LANG_CPP | LANG_CS | LANG_JAVA | LANG_VALA | LANG_OC))
         {
            // bug #663
            check_template(pc);
         }
         else
         {
            /* convert CT_ANGLE_OPEN to CT_COMPARE */
            set_chunk_type(pc, CT_COMPARE);
         }
      }
      if ((pc->type == CT_ANGLE_CLOSE) && (pc->parent_type != CT_TEMPLATE))
      {
         if (in_type_cast)
         {
            in_type_cast = false;
            set_chunk_parent(pc, CT_TYPE_CAST);
         }
         else
         {
            next = handle_double_angle_close(pc);
         }
      }

      if (cpd.lang_flags & LANG_D)
      {
         /* Check for the D string concat symbol '~' */
         if ((pc->type == CT_INV) &&
             ((prev->type == CT_STRING) ||
              (prev->type == CT_WORD) ||
              (next->type == CT_STRING)))
         {
            set_chunk_type(pc, CT_CONCAT);
         }

         /* Check for the D template symbol '!' (word + '!' + word or '(') */
         if ((pc->type == CT_NOT) &&
             (prev->type == CT_WORD) &&
             ((next->type == CT_PAREN_OPEN) ||
              (next->type == CT_WORD) ||
              (next->type == CT_TYPE)))
         {
            set_chunk_type(pc, CT_D_TEMPLATE);
         }

         /* handle "version(unittest) { }" vs "unittest { }" */
         if (prev && (pc->type == CT_UNITTEST) && (prev->type == CT_PAREN_OPEN))
         {
            set_chunk_type(pc, CT_WORD);
         }

         /* handle 'static if' and merge the tokens */
         if (prev && (pc->type == CT_IF) && chunk_is_str(prev, "static", 6))
         {
            /* delete PREV and merge with IF */
            pc->str.insert(0, ' ');
            pc->str.insert(0, prev->str);
            pc->orig_col  = prev->orig_col;
            pc->orig_line = prev->orig_line;
            chunk_t *to_be_deleted = prev;
            prev = chunk_get_prev_ncnl(prev);
            chunk_del(to_be_deleted);
         }
      }

      if (cpd.lang_flags & LANG_CPP)
      {
         /* Change Word before '::' into a type */
         if ((pc->type == CT_WORD) && (next->type == CT_DC_MEMBER))
         {
            set_chunk_type(pc, CT_TYPE);
         }
      }

      /* Change get/set to CT_WORD if not followed by a brace open */
      if ((pc->type == CT_GETSET) && (next->type != CT_BRACE_OPEN))
      {
         if ((next->type == CT_SEMICOLON) &&
             ((prev->type == CT_BRACE_CLOSE) ||
              (prev->type == CT_BRACE_OPEN) ||
              (prev->type == CT_SEMICOLON)))
         {
            set_chunk_type(pc, CT_GETSET_EMPTY);
            set_chunk_parent(next, CT_GETSET);
         }
         else
         {
            set_chunk_type(pc, CT_WORD);
         }
      }

      /* Interface is only a keyword in MS land if followed by 'class' or 'struct'
       * likewise, 'class' may be a member name in Java.
       */
      if ((pc->type == CT_CLASS) && !CharTable::IsKw1(next->str[0]))
      {
         set_chunk_type(pc, CT_WORD);
      }

      /* Change item after operator (>=, ==, etc) to a CT_OPERATOR_VAL
       * Usually the next item is part of the operator.
       * In a few cases the next few tokens are part of it:
       *  operator +       - common case
       *  operator >>      - need to combine '>' and '>'
       *  operator ()
       *  operator []      - already converted to TSQUARE
       *  operator new []
       *  operator delete []
       *  operator const char *
       *  operator const B&
       *  operator std::allocator<U>
       *
       * In all cases except the last, this will put the entire operator value
       * in one chunk.
       */
      if (pc->type == CT_OPERATOR)
      {
         tmp2 = chunk_get_next(next);
         /* Handle special case of () operator -- [] already handled */
         if (next->type == CT_PAREN_OPEN)
         {
            tmp = chunk_get_next(next);
            if ((tmp != NULL) && (tmp->type == CT_PAREN_CLOSE))
            {
               next->str = "()";
               set_chunk_type(next, CT_OPERATOR_VAL);
               chunk_del(tmp);
               next->orig_col_end += 1;
            }
         }
         else if ((next->type == CT_ANGLE_CLOSE) &&
                  tmp2 && (tmp2->type == CT_ANGLE_CLOSE) &&
                  (tmp2->orig_col == next->orig_col_end))
         {
            next->str.append('>');
            next->orig_col_end++;
            set_chunk_type(next, CT_OPERATOR_VAL);
            chunk_del(tmp2);
         }
         else if (next->flags & PCF_PUNCTUATOR)
         {
            set_chunk_type(next, CT_OPERATOR_VAL);
         }
         else
         {
            set_chunk_type(next, CT_TYPE);

            /* Replace next with a collection of all tokens that are part of
             * the type.
             */
            tmp2 = next;
            while ((tmp = chunk_get_next(tmp2)) != NULL)
            {
               if ((tmp->type != CT_WORD) &&
                   (tmp->type != CT_TYPE) &&
                   (tmp->type != CT_QUALIFIER) &&
                   (tmp->type != CT_STAR) &&
                   (tmp->type != CT_CARET) &&
                   (tmp->type != CT_AMP) &&
                   (tmp->type != CT_TSQUARE))
               {
                  break;
               }
               /* Change tmp into a type so that space_needed() works right */
               make_type(tmp);
               int num_sp = space_needed(tmp2, tmp);
               while (num_sp-- > 0)
               {
                  next->str.append(" ");
               }
               next->str.append(tmp->str);
               tmp2 = tmp;
            }

            while ((tmp2 = chunk_get_next(next)) != tmp)
            {
               chunk_del(tmp2);
            }

            set_chunk_type(next, CT_OPERATOR_VAL);

            next->orig_col_end = next->orig_col + next->len();
         }
         set_chunk_parent(next, CT_OPERATOR);

         LOG_FMT(LOPERATOR, "%s: %d:%d operator '%s'\n",
                 __func__, pc->orig_line, pc->orig_col, next->text());
      }

      /* Change private, public, protected into either a qualifier or label */
      if (pc->type == CT_PRIVATE)
      {
         /* Handle Qt slots - maybe should just check for a CT_WORD? */
         if (chunk_is_str(next, "slots", 5) || chunk_is_str(next, "Q_SLOTS", 7))
         {
            tmp = chunk_get_next(next);
            if ((tmp != NULL) && (tmp->type == CT_COLON))
            {
               next = tmp;
            }
         }
         if (next->type == CT_COLON)
         {
            set_chunk_type(next, CT_PRIVATE_COLON);
            if ((tmp = chunk_get_next_ncnl(next)) != NULL)
            {
               chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START);
            }
         }
         else
         {
            set_chunk_type(pc, (chunk_is_str(pc, "signals", 7) || chunk_is_str(pc, "Q_SIGNALS", 9)) ? CT_WORD : CT_QUALIFIER);
         }
      }

      /* Look for <newline> 'EXEC' 'SQL' */
      if ((chunk_is_str(pc, "EXEC", 4) && chunk_is_str(next, "SQL", 3)) ||
          ((*pc->str == '$') && (pc->type != CT_SQL_WORD)))
      {
         tmp = chunk_get_prev(pc);
         if (chunk_is_newline(tmp))
         {
            if (*pc->str == '$')
            {
               set_chunk_type(pc, CT_SQL_EXEC);
               if (pc->len() > 1)
               {
                  /* SPLIT OFF '$' */
                  chunk_t nc;

                  nc = *pc;
                  pc->str.resize(1);
                  pc->orig_col_end = pc->orig_col + 1;

                  nc.type = CT_SQL_WORD;
                  nc.str.pop_front();
                  nc.orig_col++;
                  nc.column++;
                  chunk_add_after(&nc, pc);

                  next = chunk_get_next(pc);
               }
            }
            tmp = chunk_get_next(next);
            if (chunk_is_str_case(tmp, "BEGIN", 5))
            {
               set_chunk_type(pc, CT_SQL_BEGIN);
            }
            else if (chunk_is_str_case(tmp, "END", 3))
            {
               set_chunk_type(pc, CT_SQL_END);
            }
            else
            {
               set_chunk_type(pc, CT_SQL_EXEC);
            }

            /* Change words into CT_SQL_WORD until CT_SEMICOLON */
            while (tmp != NULL)
            {
               if (tmp->type == CT_SEMICOLON)
               {
                  break;
               }
               if ((tmp->len() > 0) && (unc_isalpha(*tmp->str) || (*tmp->str == '$')))
               {
                  set_chunk_type(tmp, CT_SQL_WORD);
               }
               tmp = chunk_get_next_ncnl(tmp);
            }
         }
      }

      /* handle MS abomination 'for each' */
      if ((pc->type == CT_FOR) && chunk_is_str(next, "each", 4) &&
          (next == chunk_get_next(pc)))
      {
         /* merge the two with a space between */
         pc->str.append(' ');
         pc->str         += next->str;
         pc->orig_col_end = next->orig_col_end;
         chunk_del(next);
         next = chunk_get_next_ncnl(pc);
         /* label the 'in' */
         if (next && (next->type == CT_PAREN_OPEN))
         {
            tmp = chunk_get_next_ncnl(next);
            while (tmp && (tmp->type != CT_PAREN_CLOSE))
            {
               if (chunk_is_str(tmp, "in", 2))
               {
                  set_chunk_type(tmp, CT_IN);
                  break;
               }
               tmp = chunk_get_next_ncnl(tmp);
            }
         }
      }

      /* ObjectiveC allows keywords to be used as identifiers in some situations
       * This is a dirty hack to allow some of the more common situations.
       */
      if (cpd.lang_flags & LANG_OC)
      {
         if (((pc->type == CT_IF) ||
              (pc->type == CT_FOR) ||
              (pc->type == CT_WHILE)) &&
             !chunk_is_token(next, CT_PAREN_OPEN))
         {
            set_chunk_type(pc, CT_WORD);
         }
         if ((pc->type == CT_DO) &&
             (chunk_is_token(prev, CT_MINUS) ||
              chunk_is_token(next, CT_SQUARE_CLOSE)))
         {
            set_chunk_type(pc, CT_WORD);
         }
      }

      /* Another hack to clean up more keyword abuse */
      if ((pc->type == CT_CLASS) &&
          (chunk_is_token(prev, CT_DOT) ||
           chunk_is_token(next, CT_DOT)))
      {
         set_chunk_type(pc, CT_WORD);
      }

      /* Detect Objective C class name */
      if ((pc->type == CT_OC_IMPL) ||
          (pc->type == CT_OC_INTF) ||
          (pc->type == CT_OC_PROTOCOL))
      {
         if (next->type != CT_PAREN_OPEN)
         {
            set_chunk_type(next, CT_OC_CLASS);
         }
         set_chunk_parent(next, pc->type);

         tmp = chunk_get_next_ncnl(next);
         if (tmp != NULL)
         {
            chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START);
         }

         tmp = chunk_get_next_type(pc, CT_OC_END, pc->level);
         if (tmp != NULL)
         {
            set_chunk_parent(tmp, pc->type);
         }
      }

      if (pc->type == CT_OC_INTF)
      {
         tmp = chunk_get_next_ncnl(pc, CNAV_PREPROC);
         while ((tmp != NULL) && (tmp->type != CT_OC_END))
         {
            if (get_token_pattern_class(tmp->type) != PATCLS_NONE)
            {
               LOG_FMT(LOBJCWORD, "@interface %d:%d change '%s' (%s) to CT_WORD\n",
                       pc->orig_line, pc->orig_col, tmp->text(),
                       get_token_name(tmp->type));
               set_chunk_type(tmp, CT_WORD);
            }
            tmp = chunk_get_next_ncnl(tmp, CNAV_PREPROC);
         }
      }

      /* Detect Objective-C categories and class extensions */
      /* @interface ClassName (CategoryName) */
      /* @implementation ClassName (CategoryName) */
      /* @interface ClassName () */
      /* @implementation ClassName () */
      if (((pc->parent_type == CT_OC_IMPL) ||
           (pc->parent_type == CT_OC_INTF) ||
           (pc->type == CT_OC_CLASS)) &&
          (next->type == CT_PAREN_OPEN))
      {
         set_chunk_parent(next, pc->parent_type);

         tmp = chunk_get_next(next);
         if ((tmp != NULL) && (tmp->next != NULL))
         {
            if (tmp->type == CT_PAREN_CLOSE)
            {
               //set_chunk_type(tmp, CT_OC_CLASS_EXT);
               set_chunk_parent(tmp, pc->parent_type);
            }
            else
            {
               set_chunk_type(tmp, CT_OC_CATEGORY);
               set_chunk_parent(tmp, pc->parent_type);
            }
         }

         tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level);
         if (tmp != NULL)
         {
            set_chunk_parent(tmp, pc->parent_type);
         }
      }

      /* Detect Objective C @property
       *  @property NSString *stringProperty;
       *  @property(nonatomic, retain) NSMutableDictionary *shareWith;
       */
      if (pc->type == CT_OC_PROPERTY)
      {
         if (next->type != CT_PAREN_OPEN)
         {
            chunk_flags_set(next, PCF_STMT_START | PCF_EXPR_START);
         }
         else
         {
            set_chunk_parent(next, pc->type);

            tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level);
            if (tmp != NULL)
            {
               set_chunk_parent(tmp, pc->type);
               tmp = chunk_get_next_ncnl(tmp);
               if (tmp != NULL)
               {
                  chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START);

                  tmp = chunk_get_next_type(tmp, CT_SEMICOLON, pc->level);
                  if (tmp != NULL)
                  {
                     set_chunk_parent(tmp, pc->type);
                  }
               }
            }
         }
      }

      /* Detect Objective C @selector
       *  @selector(msgNameWithNoArg)
       *  @selector(msgNameWith1Arg:)
       *  @selector(msgNameWith2Args:arg2Name:)
       */
      if ((pc->type == CT_OC_SEL) && (next->type == CT_PAREN_OPEN))
      {
         set_chunk_parent(next, pc->type);

         tmp = chunk_get_next(next);
         if (tmp != NULL)
         {
            set_chunk_type(tmp, CT_OC_SEL_NAME);
            set_chunk_parent(tmp, pc->type);

            while ((tmp = chunk_get_next_ncnl(tmp)) != NULL)
            {
               if (tmp->type == CT_PAREN_CLOSE)
               {
                  set_chunk_parent(tmp, CT_OC_SEL);
                  break;
               }
               set_chunk_type(tmp, CT_OC_SEL_NAME);
               set_chunk_parent(tmp, pc->type);
            }
         }
      }

      /* Handle special preprocessor junk */
      if (pc->type == CT_PREPROC)
      {
         set_chunk_parent(pc, next->type);
      }

      /* Detect "pragma region" and "pragma endregion" */
      if ((pc->type == CT_PP_PRAGMA) && (next->type == CT_PREPROC_BODY))
      {
         if ((memcmp(next->str, "region", 6) == 0) ||
             (memcmp(next->str, "endregion", 9) == 0))
         {
            set_chunk_type(pc, (*next->str == 'r') ? CT_PP_REGION : CT_PP_ENDREGION);

            set_chunk_parent(prev, pc->type);
         }
      }

      /* Check for C# nullable types '?' is in next */
      if ((cpd.lang_flags & LANG_CS) &&
          (next->type == CT_QUESTION) &&
          (next->orig_col == (pc->orig_col + pc->len())))
      {
         tmp = chunk_get_next_ncnl(next);
         if (tmp != NULL)
         {
            bool doit = ((tmp->type == CT_PAREN_CLOSE) ||
                         (tmp->type == CT_ANGLE_CLOSE));

            if (tmp->type == CT_WORD)
            {
               tmp2 = chunk_get_next_ncnl(tmp);
               if ((tmp2 != NULL) &&
                   ((tmp2->type == CT_SEMICOLON) ||
                    (tmp2->type == CT_ASSIGN) ||
                    (tmp2->type == CT_COMMA) ||
                    (tmp2->type == CT_BRACE_OPEN)))
               {
                  doit = true;
               }
            }

            if (doit)
            {
               pc->str         += next->str;
               pc->orig_col_end = next->orig_col_end;
               chunk_del(next);
               next = tmp;
            }
         }
      }

      /* Change 'default(' into a sizeof-like statement */
      if ((cpd.lang_flags & LANG_CS) &&
          (pc->type == CT_DEFAULT) &&
          (next->type == CT_PAREN_OPEN))
      {
         set_chunk_type(pc, CT_SIZEOF);
      }

      if ((pc->type == CT_UNSAFE) && (next->type != CT_BRACE_OPEN))
      {
         set_chunk_type(pc, CT_QUALIFIER);
      }

      if (((pc->type == CT_USING) ||
           ((pc->type == CT_TRY) && (cpd.lang_flags & LANG_JAVA))) &&
          (next->type == CT_PAREN_OPEN))
      {
         set_chunk_type(pc, CT_USING_STMT);
      }

      /* Add minimal support for C++0x rvalue references */
      if ((pc->type == CT_BOOL) && chunk_is_str(pc, "&&", 2))
      {
         if (prev->type == CT_TYPE)
         {
            set_chunk_type(pc, CT_BYREF);
         }
      }

      /* HACK: treat try followed by a colon as a qualifier to handle this:
       *   A::A(int) try : B() { } catch (...) { }
       */
      if ((pc->type == CT_TRY) && chunk_is_str(pc, "try", 3) &&
          (next != NULL) && (next->type == CT_COLON))
      {
         set_chunk_type(pc, CT_QUALIFIER);
      }

      /* If Java's 'synchronized' is in a method declaration, it should be
       * a qualifier. */
      if ((cpd.lang_flags & LANG_JAVA) &&
          (pc->type == CT_SYNCHRONIZED) &&
          (next->type != CT_PAREN_OPEN))
      {
         set_chunk_type(pc, CT_QUALIFIER);
      }

      // guy 2015-11-05
      // change CT_DC_MEMBER + CT_FOR into CT_DC_MEMBER + CT_FUNC_CALL
      if ((pc->type == CT_FOR) &&
          (pc->prev->type == CT_DC_MEMBER))
      {
         set_chunk_type(pc, CT_FUNC_CALL);
      }
      /* TODO: determine other stuff here */

      prev = pc;
      pc   = next;
      next = chunk_get_next_ncnl(pc);
   }
} // tokenize_cleanup
示例#3
0
/**
 * Skips the next bit of whatever and returns the type of block.
 *
 * pc.str is the input text.
 * pc.len in the output length.
 * pc.type is the output type
 * pc.column is output column
 *
 * @param pc      The structure to update, str is an input.
 * @return        true/false - whether anything was parsed
 */
static bool parse_next(tok_ctx& ctx, chunk_t& pc)
{
   const chunk_tag_t *punc;
   int               ch, ch1;

   if (!ctx.more())
   {
      //fprintf(stderr, "All done!\n");
      return(false);
   }

   /* Save off the current column */
   pc.orig_line = ctx.c.row;
   pc.column    = ctx.c.col;
   pc.orig_col  = ctx.c.col;
   pc.type      = CT_NONE;
   pc.nl_count  = 0;
   pc.flags     = 0;

   /* If it is turned off, we put everything except newlines into CT_UNKNOWN */
   if (cpd.unc_off)
   {
      if (parse_ignored(ctx, pc))
      {
         return(true);
      }
   }

   /**
    * Parse whitespace
    */
   if (parse_whitespace(ctx, pc))
   {
      return(true);
   }

   /**
    * Handle unknown/unhandled preprocessors
    */
   if ((cpd.in_preproc > CT_PP_BODYCHUNK) &&
       (cpd.in_preproc <= CT_PP_OTHER))
   {
      pc.str.clear();
      tok_info ss;
      ctx.save(ss);
      /* Chunk to a newline or comment */
      pc.type = CT_PREPROC_BODY;
      int last = 0;
      while (ctx.more())
      {
         int ch = ctx.peek();

         if ((ch == '\n') || (ch == '\r'))
         {
            /* Back off if this is an escaped newline */
            if (last == '\\')
            {
               ctx.restore(ss);
               pc.str.pop_back();
            }
            break;
         }

         /* Quit on a C++ comment start */
         if ((ch == '/') && (ctx.peek(1) == '/'))
         {
            break;
         }
         last = ch;
         ctx.save(ss);

         pc.str.append(ctx.get());
      }
      if (pc.str.size() > 0)
      {
         return(true);
      }
   }

   /**
    * Detect backslash-newline
    */
   if ((ctx.peek() == '\\') && parse_bs_newline(ctx, pc))
   {
      return(true);
   }

   /**
    * Parse comments
    */
   if (parse_comment(ctx, pc))
   {
      return(true);
   }

   /* Parse code placeholders */
   if (parse_code_placeholder(ctx, pc))
   {
      return(true);
   }

   /* Check for C# literal strings, ie @"hello" and identifiers @for*/
   if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '@'))
   {
      if (ctx.peek(1) == '"')
      {
         parse_cs_string(ctx, pc);
         return(true);
      }
      /* check for non-keyword identifiers such as @if @switch, etc */
      if (CharTable::IsKw1(ctx.peek(1)))
      {
         parse_word(ctx, pc, true);
         return(true);
      }
   }

   /* Check for C# Interpolated strings */
   if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '$') && (ctx.peek(1) == '"'))
   {
      parse_cs_interpolated_string(ctx, pc);
      return(true);
   }

   /* handle VALA """ strings """ */
   if ((cpd.lang_flags & LANG_VALA) &&
       (ctx.peek() == '"') &&
       (ctx.peek(1) == '"') &&
       (ctx.peek(2) == '"'))
   {
      parse_verbatim_string(ctx, pc);
      return(true);
   }

   /* handle C++0x strings u8"x" u"x" U"x" R"x" u8R"XXX(I'm a "raw UTF-8" string.)XXX" */
   ch = ctx.peek();
   if ((cpd.lang_flags & LANG_CPP) &&
       ((ch == 'u') || (ch == 'U') || (ch == 'R')))
   {
      int  idx     = 0;
      bool is_real = false;

      if ((ch == 'u') && (ctx.peek(1) == '8'))
      {
         idx = 2;
      }
      else if (unc_tolower(ch) == 'u')
      {
         idx++;
      }

      if (ctx.peek(idx) == 'R')
      {
         idx++;
         is_real = true;
      }
      if (ctx.peek(idx) == '"')
      {
         if (is_real)
         {
            if (parse_cr_string(ctx, pc, idx))
            {
               return(true);
            }
         }
         else
         {
            if (parse_string(ctx, pc, idx, true))
            {
               parse_suffix(ctx, pc, true);
               return(true);
            }
         }
      }
   }

   /* PAWN specific stuff */
   if (cpd.lang_flags & LANG_PAWN)
   {
      if ((cpd.preproc_ncnl_count == 1) &&
          ((cpd.in_preproc == CT_PP_DEFINE) ||
           (cpd.in_preproc == CT_PP_EMIT)))
      {
         parse_pawn_pattern(ctx, pc, CT_MACRO);
         return(true);
      }
      /* Check for PAWN strings: \"hi" or !"hi" or !\"hi" or \!"hi" */
      if ((ctx.peek() == '\\') || (ctx.peek() == '!'))
      {
         if (ctx.peek(1) == '"')
         {
            parse_string(ctx, pc, 1, (ctx.peek() == '!'));
            return(true);
         }
         else if (((ctx.peek(1) == '\\') || (ctx.peek(1) == '!')) &&
                  (ctx.peek(2) == '"'))
         {
            parse_string(ctx, pc, 2, false);
            return(true);
         }
      }

      /* handle PAWN preprocessor args %0 .. %9 */
      if ((cpd.in_preproc == CT_PP_DEFINE) &&
          (ctx.peek() == '%') &&
          unc_isdigit(ctx.peek(1)))
      {
         pc.str.clear();
         pc.str.append(ctx.get());
         pc.str.append(ctx.get());
         pc.type = CT_WORD;
         return(true);
      }
   }

   /**
    * Parse strings and character constants
    */

   if (parse_number(ctx, pc))
   {
      return(true);
   }

   if (cpd.lang_flags & LANG_D)
   {
      /* D specific stuff */
      if (d_parse_string(ctx, pc))
      {
         return(true);
      }
   }
   else
   {
      /* Not D stuff */

      /* Check for L'a', L"abc", 'a', "abc", <abc> strings */
      ch  = ctx.peek();
      ch1 = ctx.peek(1);
      if ((((ch == 'L') || (ch == 'S')) &&
           ((ch1 == '"') || (ch1 == '\''))) ||
          (ch == '"') ||
          (ch == '\'') ||
          ((ch == '<') && (cpd.in_preproc == CT_PP_INCLUDE)))
      {
         parse_string(ctx, pc, unc_isalpha(ch) ? 1 : 0, true);
         return(true);
      }

      if ((ch == '<') && (cpd.in_preproc == CT_PP_DEFINE))
      {
         if (chunk_get_tail()->type == CT_MACRO)
         {
            /* We have "#define XXX <", assume '<' starts an include string */
            parse_string(ctx, pc, 0, false);
            return(true);
         }
      }
   }

   /* Check for Objective C literals and VALA identifiers ('@1', '@if')*/
   if ((cpd.lang_flags & (LANG_OC | LANG_VALA)) && (ctx.peek() == '@'))
   {
      int nc = ctx.peek(1);
      if ((nc == '"') || (nc == '\''))
      {
         /* literal string */
         parse_string(ctx, pc, 1, true);
         return(true);
      }
      else if ((nc >= '0') && (nc <= '9'))
      {
         /* literal number */
         pc.str.append(ctx.get());  /* store the '@' */
         parse_number(ctx, pc);
         return(true);
      }
   }

   /* Check for pawn/ObjectiveC/Java and normal identifiers */
   if (CharTable::IsKw1(ctx.peek()) ||
       ((ctx.peek() == '@') && CharTable::IsKw1(ctx.peek(1))))
   {
      parse_word(ctx, pc, false);
      return(true);
   }

   /* see if we have a punctuator */
   char punc_txt[4];
   punc_txt[0] = ctx.peek();
   punc_txt[1] = ctx.peek(1);
   punc_txt[2] = ctx.peek(2);
   punc_txt[3] = ctx.peek(3);
   if ((punc = find_punctuator(punc_txt, cpd.lang_flags)) != NULL)
   {
      int cnt = strlen(punc->tag);
      while (cnt--)
      {
         pc.str.append(ctx.get());
      }
      pc.type   = punc->type;
      pc.flags |= PCF_PUNCTUATOR;
      return(true);
   }

   /* throw away this character */
   pc.type = CT_UNKNOWN;
   pc.str.append(ctx.get());

   LOG_FMT(LWARN, "%s:%d Garbage in col %d: %x\n",
           cpd.filename, pc.orig_line, (int)ctx.c.col, pc.str[0]);
   cpd.error_count++;
   return(true);
} // parse_next
示例#4
0
文件: output.cpp 项目: Limsik/e17
/**
 * A multiline comment -- woopeee!
 * The only trick here is that we have to trim out whitespace characters
 * to get the comment to line up.
 */
static void output_comment_multi(chunk_t *pc)
{
   int        cmt_col;
   int        cmt_idx;
   int        ch;
   unc_text   line;
   int        line_count = 0;
   int        ccol; /* the col of subsequent comment lines */
   int        col_diff = 0;
   bool       nl_end = false;
   cmt_reflow cmt;

   //LOG_FMT(LSYS, "%s: line %d\n", __func__, pc->orig_line);

   output_cmt_start(cmt, pc);
   cmt.reflow = (cpd.settings[UO_cmt_reflow_mode].n != 1);

   cmt_col = cmt.base_col;
   col_diff = pc->orig_col - cmt.base_col;

   calculate_comment_body_indent(cmt, pc->str);

   cmt.cont_text = !cpd.settings[UO_cmt_indent_multi].b ? "" :
                   (cpd.settings[UO_cmt_star_cont].b ? "* " : "  ");
   LOG_CONTTEXT();

   //LOG_FMT(LSYS, "Indenting1 line %d to col %d (orig=%d) col_diff=%d xtra=%d cont='%s'\n",
   //        pc->orig_line, cmt_col, pc->orig_col, col_diff, cmt.xtra_indent, cmt.cont_text.c_str());

   ccol    = pc->column;
   cmt_idx = 0;
   line.clear();
   while (cmt_idx < pc->len())
   {
      ch = pc->str[cmt_idx++];

      /* handle the CRLF and CR endings. convert both to LF */
      if (ch == '\r')
      {
         ch = '\n';
         if ((cmt_idx < pc->len()) && (pc->str[cmt_idx] == '\n'))
         {
            cmt_idx++;
         }
      }

      /* Find the start column */
      if (line.size() == 0)
      {
         nl_end = false;
         if (ch == ' ')
         {
            ccol++;
            continue;
         }
         else if (ch == '\t')
         {
            ccol = calc_next_tab_column(ccol, cpd.settings[UO_input_tab_size].n);
            continue;
         }
         else
         {
            //LOG_FMT(LSYS, "%d] Text starts in col %d\n", line_count, ccol);
         }
      }

      /*
       * Now see if we need/must fold the next line with the current to enable
       * full reflow
       */
      if ((cpd.settings[UO_cmt_reflow_mode].n == 2) &&
          (ch == '\n') &&
          (cmt_idx < pc->len()))
      {
         int  nxt_len            = 0;
         int  next_nonempty_line = -1;
         int  prev_nonempty_line = -1;
         int  nwidx          = line.size();
         bool star_is_bullet = false;

         /* strip trailing whitespace from the line collected so far */
         while (nwidx > 0)
         {
            nwidx--;
            if ((prev_nonempty_line < 0) &&
                !unc_isspace(line[nwidx]) &&
                (line[nwidx] != '*') && // block comment: skip '*' at end of line
                ((pc->flags & PCF_IN_PREPROC)
                 ? (line[nwidx] != '\\') ||
                 ((line[nwidx + 1] != 'r') &&
                  (line[nwidx + 1] != '\n'))
                 : true))
            {
               prev_nonempty_line = nwidx; // last nonwhitespace char in the previous line
            }
         }

         int remaining = pc->len() - cmt_idx;
         for (nxt_len = 0;
              (nxt_len <= remaining) &&
              (pc->str[nxt_len] != 'r') &&
              (pc->str[nxt_len] != '\n');
              nxt_len++)
         {
            if ((next_nonempty_line < 0) &&
                !unc_isspace(pc->str[nxt_len]) &&
                (pc->str[nxt_len] != '*') &&
                ((nxt_len == remaining) ||
                 ((pc->flags & PCF_IN_PREPROC)
                  ? (pc->str[nxt_len] != '\\') ||
                  ((pc->str[nxt_len + 1] != 'r') &&
                   (pc->str[nxt_len + 1] != '\n'))
                  : true)))
            {
               next_nonempty_line = nxt_len; // first nonwhitespace char in the next line
            }
         }

         /*
          * see if we should fold up; usually that'd be a YES, but there are a few
          * situations where folding/reflowing by merging lines is frowned upon:
          *
          * - ASCII art in the comments (most often, these are drawings done in +-\/|.,*)
          *
          * - Doxygen/JavaDoc/etc. parameters: these often start with \ or @, at least
          *   something clearly non-alphanumeric (you see where we're going with this?)
          *
          * - bullet lists that are closely spaced: bullets are always non-alphanumeric
          *   characters, such as '-' or '+' (or, oh horor, '*' - that's bloody ambiguous
          *   to parse :-( ... with or without '*' comment start prefix, that's the
          *   question, then.)
          *
          * - semi-HTML formatted code, e.g. <pre>...</pre> comment sections (NDoc, etc.)
          *
          * - New lines which form a new paragraph without there having been added an
          *   extra empty line between the last sentence and the new one.
          *   A bit like this, really; so it is opportune to check if the last line ended
          *   in a terminal (that would be the set '.:;!?') and the new line starts with
          *   a capital.
          *   Though new lines starting with comment delimiters, such as '(', should be
          *   pulled up.
          *
          * So it bores down to this: the only folding (& reflowing) that's going to happen
          * is when the next line starts with an alphanumeric character AND the last
          * line didn't end with an non-alphanumeric character, except: ',' AND the next
          * line didn't start with a '*' all of a sudden while the previous one didn't
          * (the ambiguous '*'-for-bullet case!)
          */
         if ((prev_nonempty_line >= 0) && (next_nonempty_line >= 0) &&
             (((unc_isalnum(line[prev_nonempty_line]) ||
                strchr(",)]", line[prev_nonempty_line])) &&
               (unc_isalnum(pc->str[next_nonempty_line]) ||
                strchr("([", pc->str[next_nonempty_line]))) ||
              (('.' == line[prev_nonempty_line]) &&    // dot followed by non-capital is NOT a new sentence start
               unc_isupper(pc->str[next_nonempty_line]))) &&
             !star_is_bullet)
         {
            // rewind the line to the last non-alpha:
            line.resize(prev_nonempty_line + 1);
            // roll the current line forward to the first non-alpha:
            cmt_idx += next_nonempty_line;
            // override the NL and make it a single whitespace:
            ch = ' ';
         }
      }

      line.append(ch);

      /* If we just hit an end of line OR we just hit end-of-comment... */
      if ((ch == '\n') || (cmt_idx == pc->len()))
      {
         line_count++;

         /* strip trailing tabs and spaces before the newline */
         if (ch == '\n')
         {
            nl_end = true;
            line.pop_back();
            cmt_trim_whitespace(line, pc->flags & PCF_IN_PREPROC);
         }

         //LOG_FMT(LSYS, "[%3d]%s\n", ccol, line);

         if (line_count == 1)
         {
            /* this is the first line - add unchanged */
            add_comment_text(line, cmt, false);
            if (nl_end)
            {
               add_char('\n');
            }
         }
         else
         {
            /* This is not the first line, so we need to indent to the
             * correct column. Each line is indented 0 or more spaces.
             */
            ccol -= col_diff;
            if (ccol < (cmt_col + 3))
            {
               ccol = cmt_col + 3;
            }

            if (line.size() == 0)
            {
               /* Empty line - just a '\n' */
               if (cpd.settings[UO_cmt_star_cont].b)
               {
                  cmt.column = cmt_col + cpd.settings[UO_cmt_sp_before_star_cont].n;
                  cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
                  if (cmt.xtra_indent)
                  {
                     add_char(' ');
                  }
                  add_text(cmt.cont_text);
               }
               add_char('\n');
            }
            else
            {
               /* If this doesn't start with a '*' or '|'.
                * '\name' is a common parameter documentation thing.
                */
               if (cpd.settings[UO_cmt_indent_multi].b &&
                   (line[0] != '*') && (line[0] != '|') && (line[0] != '#') &&
                   ((line[0] != '\\') || unc_isalpha(line[1])) && (line[0] != '+'))
               {
                  int start_col = cmt_col + cpd.settings[UO_cmt_sp_before_star_cont].n;

                  if (cpd.settings[UO_cmt_star_cont].b)
                  {
                     cmt.column = start_col;
                     cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
                     if (cmt.xtra_indent)
                     {
                        add_char(' ');
                     }
                     add_text(cmt.cont_text);
                     output_to_column(ccol + cpd.settings[UO_cmt_sp_after_star_cont].n,
                                      false);
                  }
                  else
                  {
                     cmt.column = ccol;
                     cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
                  }
               }
               else
               {
                  cmt.column = cmt_col + cpd.settings[UO_cmt_sp_before_star_cont].n;
                  cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
                  if (cmt.xtra_indent)
                  {
                     add_char(' ');
                  }

                  int idx;

                  idx = cmt_parse_lead(line, (cmt_idx == pc->len()));
                  if (idx > 0)
                  {
                     cmt.cont_text.set(line, 0, idx);
                     LOG_CONTTEXT();
                     if ((line.size() >= 2) && (line[0] == '*') && unc_isalnum(line[1]))
                     {
                        line.insert(1, ' ');
                     }
                  }
                  else
                  {
                     add_text(cmt.cont_text);
                  }
               }

               add_comment_text(line, cmt, false);
               if (nl_end)
               {
                  add_text("\n");
               }
            }
         }
         line.clear();
         ccol = 1;
      }
   }
}