Exemple #1
0
CSVMap::CSVMap(const string &file_path, bool& header)
{
    this->file_path_ = file_path;
    this->header_ = header;
    ConstructMap();
}
Exemple #2
0
bool
RE_Class::ConstructL (unsigned depth, const uni_char *source, unsigned &index, unsigned length, bool extended)
{
  int range_first = -1;

  for (unsigned i = 0; i < BITMAP_ELEMENTS; ++i)
    bitmap[i] = 0;

  if (source[index] == '^')
    {
      inverted = true;
      ++index;
    }
  else
    inverted = false;

#ifdef RE_FEATURE__CHARACTER_CLASS_INTERSECTION
  if (depth++ >= RE_FEATURE__CHARACTER_CLASS_INTERSECTION_MAX_DEPTH)
    return false;
#endif // RE_FEATURE__CHARACTER_CLASS_INTERSECTION

#ifdef ES_FEATURE__ERROR_MESSAGES
  unsigned range_first_index, range_second_index;
#endif /* ES_FEATURE__ERROR_MESSAGES */

  RE_SKIP_WHITESPACE ();

  RE_ExcludeIncludeRange *eir = OP_NEW_L (RE_ExcludeIncludeRange, (false, 0, INT_MAX));
  OpStackAutoPtr<RE_ExcludeIncludeRange> eir_anchor (eir);

#define SetCharacter(ch) do { SetCharacter (ch); if (!(ch < BITMAP_RANGE)) eir = RE_ExcludeIncludeRange::Include (eir, ch, ch); } while (0)
#define SetRange(first, last) do { SetRange (first, last); if (last >= BITMAP_RANGE) eir = RE_ExcludeIncludeRange::Include (eir, MAX (first, BITMAP_RANGE), last); } while (0)

  while (index < length)
    {
#ifdef ES_FEATURE__ERROR_MESSAGES
      if (range_first == -1)
        range_first_index = index;
#endif /* ES_FEATURE__ERROR_MESSAGES */

      int character = source[index++];

      if (character == ']')
        {
          if (range_first >= 0)
            {
              SetCharacter (range_first);
              SetCharacter ('-');
            }

          --index;
          break;
        }
      else if (character == '\\')
        {
          if (index == length)
            return false;

          character = -2;

          bool builtin_class_handled = false;

          switch (source[index++])
            {
            case '0':
              if (index == length || !RE_Compiler::IsOctalDigit (source[index]))
                {
                  character = 0;
                  break;
                }
              ++index;

            case '1': case '2': case '3': case '4': case '5': case'6': case '7':
              character = 0;
              --index;
              while (index < length && RE_Compiler::IsOctalDigit (source[index]))
                {
                  int next_character = character * 8 + (source[index] - '0');
                  if (next_character > 255)
                    break;
                  character = next_character;
                  ++index;
                }
              break;

            case 'b':
              character = 8;
              break;

            case 't':
              character = 9;
              break;

            case 'n':
              character = 10;
              break;

            case 'v':
              character = 11;
              break;

            case 'f':
              character = 12;
              break;

            case 'r':
              character = 13;
              break;

            case 'd':
              if (builtin_class == BUILTIN_EMPTY || builtin_class == BUILTIN_DIGIT)
                {
                  builtin_class = BUILTIN_DIGIT;
                  builtin_class_handled = true;
                }

              SetRange ('0', '9');
              break;

            case 'D':
              if (builtin_class == BUILTIN_EMPTY || builtin_class == BUILTIN_NON_DIGIT)
                {
                  builtin_class = BUILTIN_NON_DIGIT;
                  builtin_class_handled = true;
                }

              SetRange (0, '0' - 1);
              SetRange ('9' + 1, INT_MAX - 1);
              break;

            case 's':
              if (builtin_class == BUILTIN_EMPTY || builtin_class == BUILTIN_WHITESPACE)
                {
                  builtin_class = BUILTIN_WHITESPACE;
                  builtin_class_handled = true;
                }

              SetRange (9, 13);
              SetCharacter (32);
              SetCharacter (160);
              SetCharacter (0x1680);
              SetCharacter (0x180e);
              SetRange (0x2000, 0x200b);
              SetCharacter (0x2028);
              SetCharacter (0x2029);
              SetCharacter (0x202f);
              SetCharacter (0x205f);
              SetCharacter (0x3000);
              SetCharacter (0xfeff);
              break;

            case 'S':
              if (builtin_class == BUILTIN_EMPTY || builtin_class == BUILTIN_NON_WHITESPACE)
                {
                  builtin_class = BUILTIN_NON_WHITESPACE;
                  builtin_class_handled = true;
                }

              SetRange (0, 8);
              SetRange (14, 31);
              SetRange (33, 159);
              SetRange (161, 0x1680 - 1);
              SetRange (0x1680 + 1, 0x180e - 1);
              SetRange (0x180e + 1, 0x2000 - 1);
              SetRange (0x200b + 1, 0x2028 - 1);
              SetRange (0x2029 + 1, 0x202f - 1);
              SetRange (0x202f + 1, 0x205f - 1);
              SetRange (0x205f + 1, 0x3000 - 1);
              SetRange (0x3000 + 1, 0xfeff - 1);
              SetRange (0xfeff + 1, INT_MAX - 1);
              break;

            case 'w':
              if (builtin_class == BUILTIN_EMPTY || builtin_class == BUILTIN_WORD_CHARACTER)
                {
                  builtin_class = BUILTIN_WORD_CHARACTER;
                  builtin_class_handled = true;
                }

              SetRange ('0', '9');
              SetRange ('A', 'Z');
              SetCharacter ('_');
              SetRange ('a', 'z');
              break;

            case 'W':
              if (builtin_class == BUILTIN_EMPTY || builtin_class == BUILTIN_NON_WORD_CHARACTER)
                {
                  builtin_class = BUILTIN_NON_WORD_CHARACTER;
                  builtin_class_handled = true;
                }

              SetRange (0, '0' - 1);
              SetRange ('9' + 1, 'A' - 1);
              SetRange ('Z' + 1, '_' - 1);
              SetCharacter ('_' + 1);
              SetRange ('z' + 1, INT_MAX - 1);
              break;

            case 'c':
              if (index - length < 2)
                continue;

              if (!RE_Compiler::IsLetter (source[index]))
                continue;

              character = source[index++] % 32;
              break;

            case 'x':
            case 'u':
              if (index - length < 3)
                continue;

#ifdef RE_FEATURE__BRACED_HEXADECIMAL_ESCAPES
              if (source[index] == '{' && RE_Compiler::IsHexDigit (source[index + 1]))
                {
                  unsigned index0 = index + 1;
                  character = 0;

                  while (index0 < length && source[index0] != '}' && RE_Compiler::IsHexDigit (source[index0]))
                    character = (character << 4) | RE_Compiler::HexToCharacter (source[index0++]);

                  if (index0 < length && source[index0] == '}')
                    {
                      index = index0 + 1;
                      break;
                    }

                  character = -2;
                }
#endif // RE_FEATURE__BRACED_HEXADECIMAL_ESCAPES

              if (!RE_Compiler::IsHexDigit (source[index]) || !RE_Compiler::IsHexDigit (source[index + 1]))
                continue;

              if (source[index - 1] == 'x')
                {
                  character = RE_Compiler::HexToCharacter (source[index + 1], source[index]);
                  index += 2;
                }
              else
                {
                  if (index - length < 5)
                    continue;

                  if (!RE_Compiler::IsHexDigit (source[index + 2]) || !RE_Compiler::IsHexDigit (source[index + 3]))
                    continue;

                  character = RE_Compiler::HexToCharacter (source[index + 3], source[index + 2], source[index + 1], source[index]);
                  index += 4;
                }
              break;

            default:
              character = source[index - 1];
              break;

#if 0
#ifdef ES_FEATURE__ERROR_MESSAGES
              error_string = ES_ERROR ("invalid escape sequence.");
              --index;
#endif /* ES_FEATURE__ERROR_MESSAGES */

              return false;
#endif // 0
            }

          if (!builtin_class_handled)
            builtin_class = BUILTIN_NONE;
        }
#ifdef RE_FEATURE__CHARACTER_CLASS_INTERSECTION
      else if (character == '&' && length - index >= 3 && source[index] == '&' && source[index + 1] == '[')
        {
          unsigned index0 = index + 2;

          RE_Class *iw = OP_NEW_L (RE_Class, (case_insensitive));

          iw->intersect_next = intersect_with;
          intersect_with = iw;

          if (!iw->ConstructL (depth, source, index0, length, extended))
            return false;

          index = index0 + 1;
        }
#endif // RE_FEATURE__CHARACTER_CLASS_INTERSECTION

      if (character == -2)
        range_first = -1;
      else if (range_first == -1)
        {
          RE_SKIP_WHITESPACE ();

          if (index < length)
            {
              int dash = source[index];

              if (dash == '-')
                {
                  range_first = character;
                  ++index;

#ifdef ES_FEATURE__ERROR_MESSAGES
                  range_second_index = index;
#endif /* ES_FEATURE__ERROR_MESSAGES */

                  continue;
                }
            }

          if (character != -2)
            {
              SetCharacter (character);
              builtin_class = BUILTIN_NONE;
            }
        }
      else if (range_first == -2 || range_first > character)
        {
#ifdef ES_FEATURE__ERROR_MESSAGES
          if (range_first == -2 || character == -2)
            {
              error_string = ES_ERROR ("invalid component in range.");

              if (range_first == -2)
                index = range_first_index;
              else
                index = range_second_index;
            }
          else
            {
              error_string = ES_ERROR ("negative range.");
              index = range_first_index;
            }
#endif /* ES_FEATURE__ERROR_MESSAGES */

          return false;
        }
      else
        {
          SetRange (range_first, character);
          builtin_class = BUILTIN_NONE;

          range_first = -1;
        }

      RE_SKIP_WHITESPACE ();
    }

  if (index == length)
    return false;

#ifdef RE_FEATURE__CHARACTER_CLASS_INTERSECTION
  RE_Class **iwp = &intersect_with;

  while (RE_Class *iw = *iwp)
    {
      for (unsigned i = 0; i < BITMAP_ELEMENTS; ++i)
#ifdef RE_COMPACT_CLASS_BITMAP
        bitmap[i] &= iw->bitmap[i];
#else // RE_COMPACT_CLASS_BITMAP
        bitmap[i] &= iw->bitmap[i];
#endif // RE_COMPACT_CLASS_BITMAP

      if (!iw->map)
        {
          /* Discard ranges that only use the bitmap; they are fully
             merged with the main set in the step above. */
          *iwp = iw->intersect_next;
          OP_DELETE(iw);
        }
      else
        iwp = &iw->intersect_next;
    }
#endif // RE_FEATURE__CHARACTER_CLASS_INTERSECTION

  if (case_insensitive)
    {
#ifdef RE_COMPACT_CLASS_BITMAP
      bitmap[3] |= bitmap[2] & 0x7ffffe;
      bitmap[2] |= bitmap[3] & 0x7ffffe;
#else // RE_COMPACT_CLASS_BITMAP
      unsigned b;

      for (b = 'A'; b <= 'Z'; ++b)
        if (bitmap[b])
          bitmap[b + 32] = 1;
        else if (bitmap[b + 32])
          bitmap[b] = 1;
#endif // RE_COMPACT_CLASS_BITMAP

      for (int character = 128; character < 256; ++character)
        if (Match (character))
          if (uni_islower (character))
            SetCharacter (uni_toupper (character));
          else if (uni_isupper (character))
            SetCharacter (uni_tolower (character));
    }

  if (inverted)
    for (unsigned idx = 0; idx < BITMAP_ELEMENTS; ++idx)
#ifdef RE_COMPACT_CLASS_BITMAP
      bitmap[idx] ^= ~0u;
#else // RE_COMPACT_CLASS_BITMAP
      bitmap[idx] = !bitmap[idx];
#endif // RE_COMPACT_CLASS_BITMAP

#undef SetCharacter
#undef SetRange

  ConstructMap (eir);

  return true;
}
	MapParameterStorage(It begin, It end, const Value& value, const string& default_value) : storage_(ConstructMap(begin, end, value)), default_value_(default_value) {
	}