예제 #1
0
파일: charset.c 프로젝트: ashleyh/neovim
/// Helper for init_chartab
///
/// @param global FALSE: only set buf->b_chartab[]
///
/// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has
/// an error, OK otherwise.
int buf_init_chartab(buf_T *buf, int global)
{
  int c;
  int c2;
  char_u *p;
  int i;
  int tilde;
  int do_isalpha;

  if (global) {
    // Set the default size for printable characters:
    // From <Space> to '~' is 1 (printable), others are 2 (not printable).
    // This also inits all 'isident' and 'isfname' flags to FALSE.
    //
    // EBCDIC: all chars below ' ' are not printable, all others are
    // printable.
    c = 0;

    while (c < ' ') {
      chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
    }

    while (c <= '~') {
      chartab[c++] = 1 + CT_PRINT_CHAR;
    }

    if (p_altkeymap) {
      while (c < YE) {
        chartab[c++] = 1 + CT_PRINT_CHAR;
      }
    }

    while (c < 256) {
      if (enc_utf8 && (c >= 0xa0)) {
        // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
        chartab[c++] = CT_PRINT_CHAR + 1;
      } else if ((enc_dbcs == DBCS_JPNU) && (c == 0x8e)) {
        // euc-jp characters starting with 0x8e are single width
        chartab[c++] = CT_PRINT_CHAR + 1;
      } else if ((enc_dbcs != 0) && (MB_BYTE2LEN(c) == 2)) {
        // other double-byte chars can be printable AND double-width
        chartab[c++] = CT_PRINT_CHAR + 2;
      } else {
        // the rest is unprintable by default
        chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
      }
    }

    // Assume that every multi-byte char is a filename character.
    for (c = 1; c < 256; ++c) {
      if (((enc_dbcs != 0) && (MB_BYTE2LEN(c) > 1))
          || ((enc_dbcs == DBCS_JPNU) && (c == 0x8e))
          || (enc_utf8 && (c >= 0xa0))) {
        chartab[c] |= CT_FNAME_CHAR;
      }
    }
  }

  // Init word char flags all to FALSE
  memset(buf->b_chartab, 0, (size_t)32);

  if (enc_dbcs != 0) {
    for (c = 0; c < 256; ++c) {
      // double-byte characters are probably word characters
      if (MB_BYTE2LEN(c) == 2) {
        SET_CHARTAB(buf, c);
      }
    }
  }

  // In lisp mode the '-' character is included in keywords.
  if (buf->b_p_lisp) {
    SET_CHARTAB(buf, '-');
  }

  // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
  // options Each option is a list of characters, character numbers or
  // ranges, separated by commas, e.g.: "200-210,x,#-178,-"
  for (i = global ? 0 : 3; i <= 3; ++i) {
    if (i == 0) {
      // first round: 'isident'
      p = p_isi;
    } else if (i == 1) {
      // second round: 'isprint'
      p = p_isp;
    } else if (i == 2) {
      // third round: 'isfname'
      p = p_isf;
    } else {  // i == 3
      // fourth round: 'iskeyword'
      p = buf->b_p_isk;
    }

    while (*p) {
      tilde = FALSE;
      do_isalpha = FALSE;

      if ((*p == '^') && (p[1] != NUL)) {
        tilde = TRUE;
        ++p;
      }

      if (VIM_ISDIGIT(*p)) {
        c = getdigits(&p);
      } else if (has_mbyte) {
        c = mb_ptr2char_adv(&p);
      } else {
        c = *p++;
      }
      c2 = -1;

      if ((*p == '-') && (p[1] != NUL)) {
        ++p;

        if (VIM_ISDIGIT(*p)) {
          c2 = getdigits(&p);
        } else if (has_mbyte) {
          c2 = mb_ptr2char_adv(&p);
        } else {
          c2 = *p++;
        }
      }

      if ((c <= 0)
          || (c >= 256)
          || ((c2 < c) && (c2 != -1))
          || (c2 >= 256)
          || !((*p == NUL) || (*p == ','))) {
        return FAIL;
      }

      if (c2 == -1) {  // not a range
        // A single '@' (not "@-@"):
        // Decide on letters being ID/printable/keyword chars with
        // standard function isalpha(). This takes care of locale for
        // single-byte characters).
        if (c == '@') {
          do_isalpha = TRUE;
          c = 1;
          c2 = 255;
        } else {
          c2 = c;
        }
      }

      while (c <= c2) {
        // Use the MB_ functions here, because isalpha() doesn't
        // work properly when 'encoding' is "latin1" and the locale is
        // "C".
        if (!do_isalpha
            || MB_ISLOWER(c)
            || MB_ISUPPER(c)
            || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))) {
          if (i == 0) {
            // (re)set ID flag
            if (tilde) {
              chartab[c] &= ~CT_ID_CHAR;
            } else {
              chartab[c] |= CT_ID_CHAR;
            }
          } else if (i == 1) {
            // (re)set printable
            // For double-byte we keep the cell width, so
            // that we can detect it from the first byte.
            if (((c < ' ')
                 || (c > '~')
                 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c))))
                && !(enc_dbcs && (MB_BYTE2LEN(c) == 2))) {
              if (tilde) {
                chartab[c] = (chartab[c] & ~CT_CELL_MASK)
                             + ((dy_flags & DY_UHEX) ? 4 : 2);
                chartab[c] &= ~CT_PRINT_CHAR;
              } else {
                chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
                chartab[c] |= CT_PRINT_CHAR;
              }
            }
          } else if (i == 2) {
            // (re)set fname flag
            if (tilde) {
              chartab[c] &= ~CT_FNAME_CHAR;
            } else {
              chartab[c] |= CT_FNAME_CHAR;
            }
          } else {  // i == 3
            // (re)set keyword flag
            if (tilde) {
              RESET_CHARTAB(buf, c);
            } else {
              SET_CHARTAB(buf, c);
            }
          }
        }
        ++c;
      }

      c = *p;
      p = skip_to_option_part(p);

      if ((c == ',') && (*p == NUL)) {
        // Trailing comma is not allowed.
        return FAIL;
      }
    }
  }
  chartab_initialized = TRUE;
  return OK;
}
예제 #2
0
파일: charset.c 프로젝트: Gaelan/neovim
int 
buf_init_chartab (
    buf_T *buf,
    int global                     /* FALSE: only set buf->b_chartab[] */
)
{
  int c;
  int c2;
  char_u      *p;
  int i;
  int tilde;
  int do_isalpha;

  if (global) {
    /*
     * Set the default size for printable characters:
     * From <Space> to '~' is 1 (printable), others are 2 (not printable).
     * This also inits all 'isident' and 'isfname' flags to FALSE.
     *
     * EBCDIC: all chars below ' ' are not printable, all others are
     * printable.
     */
    c = 0;
    while (c < ' ')
      chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
    while (c <= '~')
      chartab[c++] = 1 + CT_PRINT_CHAR;
    if (p_altkeymap) {
      while (c < YE)
        chartab[c++] = 1 + CT_PRINT_CHAR;
    }
    while (c < 256) {
      /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
      if (enc_utf8 && c >= 0xa0)
        chartab[c++] = CT_PRINT_CHAR + 1;
      /* euc-jp characters starting with 0x8e are single width */
      else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
        chartab[c++] = CT_PRINT_CHAR + 1;
      /* other double-byte chars can be printable AND double-width */
      else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
        chartab[c++] = CT_PRINT_CHAR + 2;
      else
        /* the rest is unprintable by default */
        chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
    }

    /* Assume that every multi-byte char is a filename character. */
    for (c = 1; c < 256; ++c)
      if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
          || (enc_dbcs == DBCS_JPNU && c == 0x8e)
          || (enc_utf8 && c >= 0xa0))
        chartab[c] |= CT_FNAME_CHAR;
  }

  /*
   * Init word char flags all to FALSE
   */
  vim_memset(buf->b_chartab, 0, (size_t)32);
  if (enc_dbcs != 0)
    for (c = 0; c < 256; ++c) {
      /* double-byte characters are probably word characters */
      if (MB_BYTE2LEN(c) == 2)
        SET_CHARTAB(buf, c);
    }

  /*
   * In lisp mode the '-' character is included in keywords.
   */
  if (buf->b_p_lisp)
    SET_CHARTAB(buf, '-');

  /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
   * options Each option is a list of characters, character numbers or
   * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
   */
  for (i = global ? 0 : 3; i <= 3; ++i) {
    if (i == 0)
      p = p_isi;                /* first round: 'isident' */
    else if (i == 1)
      p = p_isp;                /* second round: 'isprint' */
    else if (i == 2)
      p = p_isf;                /* third round: 'isfname' */
    else        /* i == 3 */
      p = buf->b_p_isk;         /* fourth round: 'iskeyword' */

    while (*p) {
      tilde = FALSE;
      do_isalpha = FALSE;
      if (*p == '^' && p[1] != NUL) {
        tilde = TRUE;
        ++p;
      }
      if (VIM_ISDIGIT(*p))
        c = getdigits(&p);
      else if (has_mbyte)
        c = mb_ptr2char_adv(&p);
      else
        c = *p++;
      c2 = -1;
      if (*p == '-' && p[1] != NUL) {
        ++p;
        if (VIM_ISDIGIT(*p))
          c2 = getdigits(&p);
        else if (has_mbyte)
          c2 = mb_ptr2char_adv(&p);
        else
          c2 = *p++;
      }
      if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
          || !(*p == NUL || *p == ','))
        return FAIL;

      if (c2 == -1) {           /* not a range */
        /*
         * A single '@' (not "@-@"):
         * Decide on letters being ID/printable/keyword chars with
         * standard function isalpha(). This takes care of locale for
         * single-byte characters).
         */
        if (c == '@') {
          do_isalpha = TRUE;
          c = 1;
          c2 = 255;
        } else
          c2 = c;
      }
      while (c <= c2) {
        /* Use the MB_ functions here, because isalpha() doesn't
         * work properly when 'encoding' is "latin1" and the locale is
         * "C".  */
        if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
            || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
            ) {
          if (i == 0) {                         /* (re)set ID flag */
            if (tilde)
              chartab[c] &= ~CT_ID_CHAR;
            else
              chartab[c] |= CT_ID_CHAR;
          } else if (i == 1)   {                /* (re)set printable */
            if ((c < ' '
                 || c > '~'
                 || (p_altkeymap
                     && (F_isalpha(c) || F_isdigit(c)))
                 )
                /* For double-byte we keep the cell width, so
                 * that we can detect it from the first byte. */
                && !(enc_dbcs && MB_BYTE2LEN(c) == 2)
                ) {
              if (tilde) {
                chartab[c] = (chartab[c] & ~CT_CELL_MASK)
                             + ((dy_flags & DY_UHEX) ? 4 : 2);
                chartab[c] &= ~CT_PRINT_CHAR;
              } else   {
                chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
                chartab[c] |= CT_PRINT_CHAR;
              }
            }
          } else if (i == 2)   {                /* (re)set fname flag */
            if (tilde)
              chartab[c] &= ~CT_FNAME_CHAR;
            else
              chartab[c] |= CT_FNAME_CHAR;
          } else   {     /* i == 3 */		/* (re)set keyword flag */
            if (tilde)
              RESET_CHARTAB(buf, c);
            else
              SET_CHARTAB(buf, c);
          }
        }
        ++c;
      }

      c = *p;
      p = skip_to_option_part(p);
      if (c == ',' && *p == NUL)
        /* Trailing comma is not allowed. */
        return FAIL;
    }
  }
  chartab_initialized = TRUE;
  return OK;
}