Ejemplo n.º 1
0
NumberingSystem* U_EXPORT2
NumberingSystem::createInstance(int32_t radix_in, UBool isAlgorithmic_in, const UnicodeString & desc_in, UErrorCode &status) {

    if (U_FAILURE(status)) {
        return NULL;
    }

    if ( radix_in < 2 ) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    if ( !isAlgorithmic_in ) {
       if ( desc_in.countChar32() != radix_in ) {
           status = U_ILLEGAL_ARGUMENT_ERROR;
           return NULL;
       }
    }

    NumberingSystem *ns = new NumberingSystem();

    ns->setRadix(radix_in);
    ns->setDesc(desc_in);
    ns->setAlgorithmic(isAlgorithmic_in);
    ns->setName(NULL);
    return ns;
    
}
Ejemplo n.º 2
0
void StaticUnicodeSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
                              const UnicodeSet &set, const UnicodeString &str) {
    if (str.countChar32(0, str.length()) != 1) {
        // Ignore locale strings with more than one code point (usually a bidi mark)
        return;
    }
    assertInSet(localeName, setName, set, str.char32At(0));
}
Ejemplo n.º 3
0
Variant f_icu_match(CStrRef pattern, CStrRef subject,
                    VRefParam matches /* = null */, int64_t flags /* = 0 */) {
  UErrorCode status = U_ZERO_ERROR;

  if (matches.isReferenced()) {
    matches = Array();
  }

  // Create hash map key by concatenating pattern and flags.
  StringBuffer bpattern;
  bpattern.append(pattern);
  bpattern.append(':');
  bpattern.append(flags);
  String spattern = bpattern.detach();

  // Find compiled pattern matcher in hash map or add it.
  PatternStringMap::accessor accessor;
  const RegexPattern* rpattern;
  if (s_patternCacheMap.find(accessor, spattern.get())) {
    rpattern = accessor->second;
  } else {
    // First 32 bits are reserved for ICU-specific flags.
    rpattern = RegexPattern::compile(
      UnicodeString::fromUTF8(pattern.data()), (flags & 0xFFFFFFFF), status);
    if (U_FAILURE(status)) {
      return false;
    }

    if (s_patternCacheMap.insert(
      accessor, StringData::GetStaticString(spattern.get()))) {
      accessor->second = rpattern;
    } else {
      delete rpattern;
      rpattern = accessor->second;
    }
  }

  // Build regex matcher from compiled pattern and passed-in subject.
  UnicodeString usubject = UnicodeString::fromUTF8(subject.data());
  boost::scoped_ptr<RegexMatcher> matcher(rpattern->matcher(usubject, status));
  if (U_FAILURE(status)) {
    return false;
  }

  // Return 0 or 1 depending on whether or not a match was found and
  // (optionally), set matched (sub-)patterns for passed-in reference.
  int matched = 0;
  if (matcher->find()) {
    matched = 1;

    if (matches.isReferenced()) {
      int32_t count = matcher->groupCount();

      for (int32_t i = 0; i <= count; i++) {
        UnicodeString ustring = matcher->group(i, status);
        if (U_FAILURE(status)) {
          return false;
        }

        // Convert UnicodeString back to UTF-8.
        std::string string;
        ustring.toUTF8String(string);
        String match = String(string);

        if (flags & k_UREGEX_OFFSET_CAPTURE) {
          // start() returns the index in UnicodeString, which
          // normally means the index into an array of 16-bit
          // code "units" (not "points").
          int32_t start = matcher->start(i, status);
          if (U_FAILURE(status)) {
            return false;
          }

          start = usubject.countChar32(0, start);
          matches->append(CREATE_VECTOR2(match, start));
        } else {
          matches->append(match);
        }
      }
    }
  }

  return matched;
}
Ejemplo n.º 4
0
seec::Maybe<IndexedString>
IndexedString::from(UnicodeString const &String)
{
  if (String.isBogus())
    return seec::Maybe<IndexedString>();
  
  UnicodeString const NeedleStart("@[");
  UnicodeString const NeedleEscape("@[[");
  UnicodeString const NeedleEnd("]");
  
  UnicodeString CleanedString; // String with index indicators removed.
  std::multimap<UnicodeString, Needle> Needles;
  
  std::vector<std::pair<UnicodeString, int32_t>> IndexStack;
  
  int32_t SearchFrom = 0; // Current offset in String.
  int32_t FoundStart; // Position of matched index indicator.
  
  while ((FoundStart = String.indexOf(NeedleStart, SearchFrom)) != -1) {
    // Copy all the literal string data.
    CleanedString.append(String, SearchFrom, FoundStart - SearchFrom);
    
    // Check if this is an escape sequence.
    if (String.compare(FoundStart, NeedleEscape.length(), NeedleEscape) == 0) {
      CleanedString.append(NeedleStart);
      SearchFrom = FoundStart + NeedleEscape.length();
      continue;
    }
    
    // Find the end of this sequence.
    int32_t FoundEnd = String.indexOf(NeedleEnd, SearchFrom);
    if (FoundEnd == -1)
      return seec::Maybe<IndexedString>();
    
    if (FoundEnd == FoundStart + NeedleStart.length()) {
      // This is a closing sequence.
      if (IndexStack.size() == 0)
        return seec::Maybe<IndexedString>();
      
      // Pop the starting details of the last-opened sequence.
      auto const Start = IndexStack.back();
      IndexStack.pop_back();
      
      // Store the needle for this sequence.
      Needles.insert(std::make_pair(Start.first,
                                    Needle(Start.second,
                                           CleanedString.countChar32())));
    }
    else {
      // This is an opening sequence.
      int32_t const NameStart = FoundStart + NeedleStart.length();
      int32_t const NameLength = FoundEnd - NameStart;
      
      IndexStack.emplace_back(UnicodeString(String, NameStart, NameLength),
                              CleanedString.countChar32());
    }
    
    SearchFrom = FoundEnd + NeedleEnd.length();
  }
  
  // Copy all remaining literal data.
  CleanedString.append(String, SearchFrom, String.length() - SearchFrom);
  
  return IndexedString(std::move(CleanedString), std::move(Needles));
}
Ejemplo n.º 5
0
/**
 * Dumb recursive implementation of permutation.
 * TODO: optimize
 * @param source the string to find permutations for
 * @return the results in a set.
 */
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
    if(U_FAILURE(status)) {
        return;
    }
    //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
    int32_t i = 0;

    // optimization:
    // if zero or one character, just return a set with it
    // we check for length < 2 to keep from counting code points all the time
    if (source.length() <= 2 && source.countChar32() <= 1) {
        UnicodeString *toPut = new UnicodeString(source);
        /* test for NULL */
        if (toPut == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        result->put(source, toPut, status);
        return;
    }

    // otherwise iterate through the string, and recursively permute all the other characters
    UChar32 cp;
    Hashtable subpermute(status);
    if(U_FAILURE(status)) {
        return;
    }
    subpermute.setValueDeleter(uprv_deleteUObject);

    for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
        cp = source.char32At(i);
        const UHashElement *ne = NULL;
        int32_t el = UHASH_FIRST;
        UnicodeString subPermuteString = source;

        // optimization:
        // if the character is canonical combining class zero,
        // don't permute it
        if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
            //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
            continue;
        }

        subpermute.removeAll();

        // see what the permutations of the characters before and after this one are
        //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
        permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
        /* Test for buffer overflows */
        if(U_FAILURE(status)) {
            return;
        }
        // The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents 
        // of source at this point.

        // prefix this character to all of them
        ne = subpermute.nextElement(el);
        while (ne != NULL) {
            UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
            UnicodeString *chStr = new UnicodeString(cp);
            //test for  NULL
            if (chStr == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
            chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
            //if (PROGRESS) printf("  Piece: %s\n", UToS(*chStr));
            result->put(*chStr, chStr, status);
            ne = subpermute.nextElement(el);
        }
    }
    //return result;
}