Ejemplo n.º 1
0
/**
 * Parse a string as an integer.
 * This function handles fullwidth numbers.
 * @param str String.
 * @return Integer.
 */
int VarReplace::strToInt(const QString &str)
{
	// TODO: Qt should have a way to do this itself...

	// Fullwidth/Halfwidth to Standard table.
	// Index is FW; value is standard.
	// NOTE: Characters with '0' are not supported here.
	static const uint16_t fwhwToStd[256] = {
		   0, L'!', L'"', L'#', L'$', L'%', L'&', L'\'',
		L'(', L')', L'*', L'+', L',', L'-', L'.', L'/',
		L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7',
		L'8', L'9', L':', L';', L'<', L'=', L'>', L'?',
		L'@', L'A', L'B', L'C', L'D', L'E', L'F', L'G',
		L'H', L'I', L'J', L'K', L'L', L'M', L'N', L'O',
		L'P', L'Q', L'R', L'S', L'T', L'U', L'V', L'W',
		L'X', L'Y', L'Z', L'[', L'\\', L']', L'^', L'_',
		L'`', L'a', L'b', L'c', L'd', L'e', L'f', L'g',
		L'h', L'i', L'j', L'k', L'l', L'm', L'n', L'o',
		L'p', L'q', L'r', L's', L't', L'u', L'v', L'w',
		L'x', L'y', L'z', L'{', L'|', L'}', L'~', 0x2985,
		0x2986, 0x3002, 0x300C, 0x300D, 0x3001, 0x30FB, 0x30F2, 0x30A1,
		0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30E3, 0x30E5, 0x30E7, 0x30C3,
		0x30FC, 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30AB, 0x30AD,
		0x30AF, 0x30B1, 0x30B3, 0x30B5, 0x30B7, 0x30B9, 0x30BB, 0x30BD,
		0x30BF, 0x30C1, 0x30C4, 0x30C6, 0x30C8, 0x30CA, 0x30CB, 0x30CC,
		0x30CD, 0x30CE, 0x30CF, 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30DE,
		0x30DF, 0x30E0, 0x30E1, 0x30E2, 0x30E4, 0x30E6, 0x30E8, 0x30E9,
		0x30EA, 0x30EB, 0x30EC, 0x30ED, 0x30EF, 0x30F3, 0x3099, 0x309A,
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
		0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0,
		0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0,

		// U+FFF0-U+FFFF - not assigned
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
	};

	// Convert the string from fullwidth first.
	QString std_str(str);
	for (int i = 0; i < std_str.size(); i++) {
		uint16_t chr = std_str[i].unicode();
		uint16_t old_chr = chr;
		if ((chr & 0xFF00) == 0xFF00) {
			// Fullwidth/halfwidth.
			chr = fwhwToStd[chr & 0xFF];
			std_str[i] = QChar(chr);
		}
	}

	// Convert to integer.
	return std_str.toInt(nullptr, 10);
}
Ejemplo n.º 2
0
String halfwidthen_one(const String & x) {
  String str(x);
  
  // If NA, no need to convert; just return NA
  if(str == NA_STRING) return str;
  
  // convert to UTF-8
  str.set_encoding(CE_UTF8);
  
  std::string std_str(str);
  
  unsigned int length = std_str.size();
  unsigned int pos = 0;
  
  while ( pos < length ) {
    int16_t first_byte = (int16_t) std_str[pos] & 0xFF;
    unsigned int next = utf8char_bytes(first_byte);
    
    if( next == 3 ) {
      if( is_between(&std_str[pos], "0", "9") || 
          is_between(&std_str[pos], "A", "Z") ) {
        std_str.erase(pos, 2);
        std_str[pos] = std_str[pos] - 0x60;
        length = std_str.size();
      } else if( is_between(&std_str[pos], "a", "z") ) {
        std_str.erase(pos, 2);
        std_str[pos] = std_str[pos] - 0x20;
        length = std_str.size();
      }
      pos += 1;
    } else {
      pos += next;
    }
  }
  
  return String(std_str, CE_UTF8);
}
Ejemplo n.º 3
0
cn_word::cn_word(char *wd, int len)
{
	this->wd = std_str(wd, len);
}
Ejemplo n.º 4
0
static int load_file(wordic_p p_fdic, wordic_p p_bdic, char *file, wd_attr_p p_attr)
{
    std_inf inf;
    char * line;
    std_str revert_word;
    lang_conv lc("UTF-8", "UCS-2LE");
#if defined(_CONV_TEST_)
    lang_conv lc2("UCS-2LE", "UTF-8");
#endif // _CONV_TEST_
    size_t len0, len1;
    size_t len;
    word_splitter words("", std_str(" ",2));

    line = (char*)malloc(IN_BUF_SZ);

    inf.open(file, std_inf::in|std_inf::binary);

    while (inf.good()) {
        inf.getline(line, IN_BUF_SZ);
        if (line[strlen(line)-1] == 0x0a)
            line[strlen(line)-1] = '\0';
        len0 = strlen(line);
        if (len0 <= 0) continue;
        if ('#' == line[0]) continue;
        len = lc.conv(line, len0);

        words.work_on(std_str(lc.data(), lc.len()));
        int i = 0;
        // each element is 2bytes, so when revert the code, it should go with 2 bytes everytime.
        while (word_splitter::err != words[i]) {
            revert_word.clear();
            //revert_word.resize(words[i].length());
            int k = words[i].length() - sizeof(short);
            int j = 0;
            while (k >= 0) {
                revert_word.insert(j, words[i], k, sizeof(short));
                k -= sizeof(short);
                j += sizeof(short);
            }
            wordic_add_word(p_fdic, (unsigned short*)words[i].data(), words[i].length()/2, p_attr);
            wordic_add_word(p_bdic, (unsigned short*)revert_word.data(), revert_word.length()/2, p_attr);
            i++;
        }
        words.debug();
        words.clear();

#if defined(_CONV_TEST_)
        len = lc2.conv(lc.data(), lc.len());
        if (len != lc.len()+1) {
            std::cout << "error or incomplete input" << std::endl;
        }
        words.chg_sep(" ");
        words.work_on(std_str(lc2.data(), lc2.len()));
        words.debug();
        words.clear();
#endif // _CONV_TEST_
    }

    inf.close();
    free(line);

    std::cout << "end" << std::endl;

    return 0;
}