예제 #1
0
파일: util.hpp 프로젝트: kspangsege/archon
    inline void to_lower_case_ascii(dom::DOMString &s)
    {
      typedef dom::DOMString::const_iterator iter;
      typedef dom::DOMString::value_type char_type;
      typedef dom::DOMString::traits_type traits;
      iter const b = s.begin();
      iter const e = s.end();
      iter i = b;
      traits::int_type v;
      for (;;) {
        if (i == e) return; // Nothing needs to be done
        v = traits::to_int_type(*i);
        if (0x7F < v) {
          s = dom::to_lower_case(s); // Full Unicode upper casing
          return;
        }
        if (0x41 <= v && v <= 0x5A) break; // Detect upper case ASCII
        ++i;
      }

      dom::DOMString s2;
      s2.reserve(s.size());
      s2.append(b, i);
      s2.append(1, traits::to_char_type(v + 0x20)); // Convert detected character
      for (;;) {
        if (++i == e) break;
        char_type c = *i;
        v = traits::to_int_type(c);
        if (0x7F < v) {
          s = dom::to_lower_case(s); // Full Unicode upper casing
          return;
        }
        if (0x41 <= v && v <= 0x5A) c = traits::to_char_type(v + 0x20);
        s2.append(1, c);
      }

      s = s2;
    }
예제 #2
0
파일: util.hpp 프로젝트: kspangsege/archon
    inline bool validate_xml_1_0_name(dom::DOMString const &name)
    {
      typedef dom::DOMString::const_iterator iter;
      typedef dom::DOMString::traits_type traits;
      typedef traits::int_type int_type;
      iter const begin = name.begin(), end = name.end();
      for (iter i=begin; i!=end; ++i) {
        int_type const v = traits::to_int_type(*i);
        if (v < 0xC0) { // 0x0 <= v < 0xC0
          if (v < 0x5B) { // 0x0 <= v < 0x5B
            if (v < 0x41) { // 0x0 <= v < 0x41
              if (v < 0x30) { // 0x0 <= v < 0x30
                if (v != 0x2D && v != 0x2E) return 0;
                // '-' or '.'  -->  good unless first char!
                if (i == begin) return 0;
              }
              else if (0x3A <= v) { // 0x3A <= v < 0x41
                if (v != 0x3A) return 0;
                // ':'  -->  good!
              }
              else { // '0' <= v <= '9'  -->  good unless first char!
                if (i == begin) return 0;
              }
            }
            else {} // 'A' <= v <= 'Z'  -->  good!
          }
          else if (v < 0x7B) { // 0x5B <= v < 0x7B
            if (v < 0x61) { // 0x5B <= v < 0x61
              if (v != 0x5F) return 0;
              // '_'  -->  good!
            }
            else {} // 'a' <= v <= 'z'  -->  good!
          }
          else { // 0x7B <= v < 0xC0
            if (v != 0xB7) return 0;
            // 0xB7  -->  good unless first char!
            if (i == begin) return 0;
          }
        }
        else if (v <= 0x3000) { // 0xC0 <= v <= 0x3000
          if (v < 0x2000) { // 0xC0 <= v < 0x2000
            if (v <= 0x37E) { // 0xC0 <= v <= 0x37E
              if (v < 0x300) { // 0xC0 <= v < 0x300
                if (v <= 0xF7) { // 0xC0 <= v <= 0xF7
                  if (v == 0xD7 || v == 0xF7) return 0;
                  // 0xC0 <= v < 0xD7  or  0xD7 < v < 0xF7  -->  good!
                }
                else {} // 0xF7 < v < 0x300  -->  good!
              }
              else { // 0x300 <= v <= 0x37E
                if (v < 0x370) { // 0x300 <= v < 0x370  -->  good unless first char!
                  if (i == begin) return 0;
                }
                else { // 0x370 <= v <= 0x37E
                  if (v == 0x37E) return 0;
                  // 0x370 <= v < 0x37E  -->  good!
                }
              }
            }
            else {} // 0x37E < v < 0x2000  -->  good!
          }
          else { // 0x2000 <= v <= 0x3000
            if (v < 0x2190) { // 0x2000 <= v < 0x2190
              if (v < 0x2070) { // 0x2000 <= v < 0x2070
                if (v <= 0x203E) { // 0x2000 <= v <= 0x203E
                  if (v < 0x200E) { // 0x2000 <= v < 0x200E
                    if (v <= 0x200B) return 0; // 0x2000 <= v <= 0x200B  -->  bad!
                    // 0x200B < v < 0x200E  -->  good!
                  }
                  else return 0; // 0x200E <= v <= 0x203E  -->  bad!
                }
                else { // 0x203E < v < 0x2070
                  if (v <= 0x2040) { // 0x203E < v <= 0x2040  -->  good unless first char!
                    if (i == begin) return 0;
                  }
                  else return 0; // 0x2040 < v < 0x2070  -->  bad!
                }
              }
              else {} // 0x2070 <= v < 0x2190  -->  good!
            }
            else { // 0x2190 <= v <= 0x3000
              if (v < 0x2C00) return 0; // 0x2190 <= v < 0x2C00  -->  bad!
              if (0x2FF0 <= v) return 0; // 0x2FF0 <= v <= 0x3000  -->  bad!
              // 0x2C00 <= v < 0x2FF0  -->  good!
            }
          }
        }
        else { // 0x3000 < v <= 0xFFFF
          if (0xD800 <= v) { // 0xD800 <= v <= 0xFFFF
            if (v < 0xDC00) { // 0xD800 <= v < 0xDC00
              // Combine UTF-16 surrogates
              if (++i == end) return 0; // Incomplete surrogate pair
              int_type const v2 = traits::to_int_type(*i);
              if (v2 < 0xDC00 || 0xE000 <= v2) return 0; // Invalid high surrogate
              core::UIntFast32 w = 0x10000 + (core::UIntFast32(v-0xD800)<<10) + (v2-0xDC00);
              if (0xF0000 <= w) return 0; // 0xF0000 <= w  -->  bad!
              // 0x10000 <= w < 0xF0000  -->  good!
            }
            else { // 0xDC00 <= v <= 0xFFFF
              if (v < 0xFDD0) { // 0xDC00 <= v < 0xFDD0
                if (v < 0xF900) return 0; // 0xDC00 <= v < 0xF900  -->  bad!
                // 0xF900 <= v < 0xFDD0  -->  good!
              }
              else { // 0xFDD0 <= v <= 0xFFFF
                if (v < 0xFDF0) return 0; // 0xFDD0 <= v < 0xFDF0  -->  bad!
                if (0xFFFE <= v) return 0; // 0xFFFE or 0xFFFF  -->  bad!
                // 0xFDF0 <= v < 0xFFFE  -->  good!
              }
            }
          }
          else {} // 0x3000 < v < 0xD800  -->  good!
        }
      }

      return 1;
    }