static jboolean Character_isTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
    return u_istitle(codePoint);
}
Beispiel #2
0
//static jboolean Character_isTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
JNIEXPORT jboolean JNICALL
Java_java_lang_Character_isTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
    return u_istitle(codePoint);
}
Beispiel #3
0
      symbol_type operator()(const symbol_type& symbol) const
      {
	const std::string& word = static_cast<const std::string&>(symbol);
	icu::UnicodeString uword = icu::UnicodeString::fromUTF8(icu::StringPiece(word.data(), word.size()));
	
	std::string signature = "<unk";
	
	// signature for English, taken from Stanford parser's getSignature5
	int num_caps = 0;
	bool has_digit  = false;
	bool has_dash   = false;
	bool has_lower  = false;
	bool has_punct  = false;
	bool has_symbol = false;
	
	size_t length = 0;
	UChar32 ch0 = 0;
	UChar32 ch_1 = 0;
	UChar32 ch_2 = 0;
	
	icu::StringCharacterIterator iter(uword);
	for (iter.setToStart(); iter.hasNext(); ++ length) {
	  const UChar32 ch = iter.next32PostInc();
	  
	  // keep initial char...
	  if (ch0 == 0)
	    ch0 = ch;
	  
	  ch_2 = ch_1;
	  ch_1 = ch;
	  
	  const int32_t gc = u_getIntPropertyValue(ch, UCHAR_GENERAL_CATEGORY_MASK);
	  
	  has_dash   |= ((gc & U_GC_PD_MASK) != 0);
	  has_punct  |= ((gc & U_GC_P_MASK) != 0);
	  has_symbol |= ((gc & U_GC_S_MASK) != 0);
	  
	  has_digit  |= (u_getNumericValue(ch) != U_NO_NUMERIC_VALUE);
	  
	  if (u_isUAlphabetic(ch)) {
	    if (u_isULowercase(ch))
	      has_lower = true;
	    else if (u_istitle(ch)) {
	      has_lower = true;
	      ++ num_caps;
	    } else
	      ++ num_caps;
	  }
	}
	
	// transform into lower...
	uword.toLower();
	ch_2 = (ch_2 ? u_tolower(ch_2) : ch_2);
	ch_1 = (ch_1 ? u_tolower(ch_1) : ch_1);
	
	// we do not check loc...
	if (u_isUUppercase(ch0) || u_istitle(ch0))
	  signature += "-caps";
	else if (! u_isUAlphabetic(ch0) && num_caps)
	  signature += "-caps";
	else if (has_lower)
	  signature += "-lc";
      
	if (has_digit)
	  signature += "-num";
	if (has_dash)
	  signature += "-dash";
	if (has_punct)
	  signature += "-punct";
	if (has_symbol)
	  signature += "-sym";
      
	if (length >= 3 && ch_1 == 's') {
	  if (ch_2 != 's' && ch_2 != 'i' && ch_2 != 'u')
	    signature += "-s";
	} else if (length >= 5 && ! has_dash && ! (has_digit && num_caps > 0)) {
	  if (uword.endsWith("ed"))
	    signature += "-ed";
	  else if (uword.endsWith("ing"))
	    signature += "-ing";
	  else if (uword.endsWith("ion"))
	    signature += "-ion";
	  else if (uword.endsWith("er"))
	    signature += "-er";
	  else if (uword.endsWith("est"))
	    signature += "-est";
	  else if (uword.endsWith("ly"))
	    signature += "-ly";
	  else if (uword.endsWith("ity"))
	    signature += "-ity";
	  else if (uword.endsWith("y"))
	    signature += "-y";
	  else if (uword.endsWith("al"))
	    signature += "-al";
	}
      
	signature += '>';
	
	return signature;
      }
Beispiel #4
0
jboolean fastiva_vm_Character_C$__isTitleCaseImpl(jint codePoint) {
    return u_istitle(codePoint);
}
Beispiel #5
0
// Determines whether the specified code point is a titlecase letter.
// True for general category "Lt" (titlecase letter).
bool
BUnicodeChar::IsTitle(uint32 c)
{
	BUnicodeChar();
	return u_istitle(c);
}