void StringUtilTest::testConvertStringToNumber(){ StringUtil sUtil; //check passing in null returns -1 CPPUNIT_ASSERT(sUtil.convertStringToNumber(NULL) == -1); //check passing string with a number in it CPPUNIT_ASSERT(sUtil.convertStringToNumber("2343") == 2343); //check passing string with % at end CPPUNIT_ASSERT(sUtil.convertStringToNumber("1233%") == 1233); CPPUNIT_ASSERT(sUtil.convertStringToNumber("1233 %") == 1233); //check passing string with % at front CPPUNIT_ASSERT(sUtil.convertStringToNumber("%22") == 0); //check passing non numeric string CPPUNIT_ASSERT(sUtil.convertStringToNumber("apple") == 0); //check passing in NA CPPUNIT_ASSERT(sUtil.convertStringToNumber("NA") == -2); //check passing in number with characters in it CPPUNIT_ASSERT(sUtil.convertStringToNumber("234A234") == 234); //check passing in number too large CPPUNIT_ASSERT(sUtil.convertStringToNumber("99999999999999999999999999999999999999999999") == -3); }
void StringUtilTest::testSplitNullArgs(){ StringUtil sUtil; //test null arguments std::list<std::string> sList = sUtil.split(NULL,NULL); CPPUNIT_ASSERT(sList.empty() == true); sList = sUtil.split("blah",NULL); CPPUNIT_ASSERT(sList.empty() == true); sList = sUtil.split(NULL,"sdf"); CPPUNIT_ASSERT(sList.empty() == true); }
int main(int argc, char** argv) { // Create an instance of the Kytea program Kytea kytea; // Load a KyTea model from a model file // this can be a binary or text model in any character encoding, // it will be detected automatically kytea.readModel("../../data/model.bin"); // Get the string utility class. This allows you to convert from // the appropriate string encoding to Kytea's internal format StringUtil* util = kytea.getStringUtil(); // Get the configuration class, this allows you to read or set the // configuration for the analysis KyteaConfig* config = kytea.getConfig(); // Map a plain text string to a KyteaString, and create a sentence object KyteaString surface_string = util->mapString("これはテストです。"); KyteaSentence sentence(surface_string, util->normalize(surface_string)); // Find the word boundaries kytea.calculateWS(sentence); // Find the pronunciations for each tag level for(int i = 0; i < config->getNumTags(); i++) kytea.calculateTags(sentence,i); // For each word in the sentence const KyteaSentence::Words & words = sentence.words; for(int i = 0; i < (int)words.size(); i++) { // Print the word cout << util->showString(words[i].surface); // For each tag level for(int j = 0; j < (int)words[i].tags.size(); j++) { cout << "\t"; // Print each of its tags for(int k = 0; k < (int)words[i].tags[j].size(); k++) { cout << " " << util->showString(words[i].tags[j][k].first) << "/" << words[i].tags[j][k].second; } } cout << endl; } cout << endl; }
static void handing_text(std::string &line, std::map<std::string, std::size_t> &word_count) //文本处理,将大写转换成小写,去除标点符号 { std::string word ; std::istringstream sin(line); StringUtil stringutil ; while(sin >> word) //构成词库 { stringutil.upperTolower(word); //大写转小写,去除标点符号 //统计单词的词频,去除重复词的存储 std::pair<std::map<std::string, std::size_t>::iterator, bool> ret = word_count.insert(std::make_pair(word, 1)); if (!ret.second) { ++ret.first->second; } } sin.clear(); }
void CsvUtil::addFileData(const std::string &rSCsvFilePath) { if(std::string("") == rSCsvFilePath) return; FileLoadUtil tFileLoadUtil; auto linesVec = tFileLoadUtil.getDataLines(rSCsvFilePath); StrVec strsVec; StrDict dict; StringUtil tStringUtil; for(const auto &linesVecIter : linesVec) { strsVec = tStringUtil.split(linesVecIter.c_str(), ","); dict.push_back(strsVec); } _pCsvMap->insert(std::make_pair(std::string(rSCsvFilePath), dict)); }
void StringUtilTest::testGetIntAsPaddedString() { int cmp; StringUtil sUtil; //check that 0 or negative size string will return empty string for (int i = 0; i > -2; i--) { const char *val = sUtil.getIntAsPaddedString(0, 123, i); CPPUNIT_ASSERT(val != NULL); CPPUNIT_ASSERT(strcmp(val, "") == 0); } //check that value is larger the 10 width is set to width of 10. cmp = strcmp(sUtil.getIntAsPaddedString(4, 3, 20), "##########"); CPPUNIT_ASSERT(cmp == 0); //check that value is 10 width its set to width 10 cmp = strcmp(sUtil.getIntAsPaddedString(4, 3, 10), "##########"); CPPUNIT_ASSERT(cmp == 0); //check that value of 9 width is set to 9 cmp = strcmp(sUtil.getIntAsPaddedString(4, 3, 9), "#########"); CPPUNIT_ASSERT(cmp == 0); //check a negative value cmp = strcmp(sUtil.getIntAsPaddedString(-1, 3, 9), " -1"); CPPUNIT_ASSERT(cmp == 0); //check a 2 digit number no padding cmp = strcmp(sUtil.getIntAsPaddedString(32, 32, 2), "32"); CPPUNIT_ASSERT(cmp == 0); //check a 2 digit number 1 padding cmp = strcmp(sUtil.getIntAsPaddedString(32, 32, 3), " 32"); CPPUNIT_ASSERT(cmp == 0); //check a 2 digit number beyond threshold cmp = strcmp(sUtil.getIntAsPaddedString(32, 4, 2), "##"); CPPUNIT_ASSERT(cmp == 0); }
void StringUtilTest::testSplitValidArgs(){ StringUtil sUtil; //test 3 in list std::list<std::string> sList = sUtil.split("codonis,trestles,steele",","); CPPUNIT_ASSERT(sList.empty() == false); CPPUNIT_ASSERT(sList.size() == 3); CPPUNIT_ASSERT(sList.front() == "codonis"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "trestles"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "steele"); //test 2 in list sList = sUtil.split("codonis,steele",","); CPPUNIT_ASSERT(sList.empty() == false); CPPUNIT_ASSERT(sList.size() == 2); CPPUNIT_ASSERT(sList.front() == "codonis"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "steele"); //test 1 in list sList = sUtil.split("codonis",","); CPPUNIT_ASSERT(sList.empty() == false); CPPUNIT_ASSERT(sList.size() == 1); CPPUNIT_ASSERT(sList.front() == "codonis"); //test 7 in list sList = sUtil.split("codonis,trestles,steele,a,b,c,d",","); CPPUNIT_ASSERT(sList.empty() == false); CPPUNIT_ASSERT(sList.size() == 7); CPPUNIT_ASSERT(sList.front() == "codonis"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "trestles"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "steele"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "a"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "b"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "c"); sList.pop_front(); CPPUNIT_ASSERT(sList.front() == "d"); sList.pop_front(); }