/** * This tests the implementation, which passes, with * the caveat that it doesn't implement an algorithm * conforming to any standard, accepted definition of * an abbreviation. This might could be called something * else, like "custom_stemming" or something. */ void test_is_not_abbrev() { const char * s1 = "abbreviation"; const char * s2 = "abbr."; CPPUNIT_ASSERT (0 == is_abbreviation(s1,s2)); }
CFSArray<CFSWString> tokens2words(CFSArray<CFSWString> a) { CFSArray<CFSWString> temp_array; CFSArray<CFSWString> res; for (INTPTR i = 0; i < a.GetSize(); i++) { CFSWString s = a[i]; if (is_abbreviation(s, temp_array) > 0) { for (INTPTR i = 0; i < temp_array.GetSize(); i++) res.AddItem(temp_array[i]); } else if (is_word(s)) res.AddItem(s); else { CFSArray<CFSWString> carray; make_ctype_array(s, carray); for (INTPTR ictype = 0; ictype < carray.GetSize(); ictype++) { INTPTR c_type = ctype(carray[ictype].GetAt(0)); if (c_type == 1) { //Tähed if (is_word(carray[ictype])) res.AddItem(carray[ictype]); else { // kui on sodipodi explode(carray[ictype], L"", temp_array); for (INTPTR i_temp = 0; i_temp < temp_array.GetSize(); i_temp++) res.AddItem(replace_schar(temp_array[i_temp])); } } else if (c_type == 2) { //Sümbolid explode(carray[ictype], L"", temp_array); for (INTPTR i_temp = 0; i_temp < temp_array.GetSize(); i_temp++) res.AddItem(replace_schar(temp_array[i_temp])); } else if (c_type == 3) { //Numbrid CFSWString nr = int_to_words(carray[ictype]); explode(nr, sp, temp_array); for (INTPTR i_temp = 0; i_temp < temp_array.GetSize(); i_temp++) if (temp_array[i_temp].GetLength() > 0) res.AddItem(temp_array[i_temp]); } } } } // iga token return res; }