CFSWString DealWithText(CFSWString text) { /* Proovin kogu sõnniku minema loopida */ CFSWString res; text.Trim(); text.Replace(L"\n\n", L"\n", 1); text.Replace(L"‘", L"'", 1); text.Replace(L"`", L"'", 1); text.Replace(L"´", L"'", 1); text.Replace(L"’", L"'", 1); for (INTPTR i = 0; i < text.GetLength(); i++) { CFSWString c = text.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = text.GetAt(i + 1); if (c == L"'") { if (is_vowel(pc)) res += L"q"; else res += c; } else if (is_char(c)) res += c; else if (is_digit(c)) res += c; else if (is_hyphen(c) && is_char(pc) && is_char(nc)) res += sp; else if (is_symbol(c)) res += c; else if (is_colon(c) && !is_colon(pc)) res += c; else if (is_bbracket(c) && !is_bbracket(pc)) res += c; else if (is_ebracket(c) && is_ending(nc)) res += L""; else if (is_ebracket(c) && !is_ebracket(pc)) res += c; else if (is_comma(c) && !is_comma(pc)) res += c; else if (is_fchar(c)) res += replace_fchar(c); else if (is_space(c) && !is_whitespace(pc)) res += c; else if (is_break(c) && !is_break(pc)) { res += c; } //kahtlane else if (is_tab(c) && !is_whitespace(pc)) res += c; else if (is_ending(c) && !is_ending(pc) && !is_whitespace(pc)) res += c; } res.Trim(); return res; }
CFSWString chars_to_phones_part_I(CFSWString &s) { CFSWString res; for (INTPTR i = 0; i < s.GetLength(); i++) { CFSWString c = s.GetAt(i); if (c == L']') { CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper(); res.SetAt(res.GetLength() - 1, t.GetAt(0)); //vaatab taha; pole kindel, et kas on vajalik t = CFSWString(s.GetAt(i - 2)).ToUpper(); if (can_palat(t)) { res.SetAt(res.GetLength() - 2, t.GetAt(0)); t = CFSWString(s.GetAt(i - 3)).ToUpper(); if (can_palat(t)) { res.SetAt(res.GetLength() - 2, t.GetAt(0)); } } //vaatab ette t = CFSWString(s.GetAt(i + 1)).ToUpper(); if (can_palat(t)) { s.SetAt(i + 1, t.GetAt(0)); t = CFSWString(s.GetAt(i + 2)).ToUpper(); if (can_palat(t)) { s.SetAt(i + 2, t.GetAt(0)); } } } else if (c == L'<') { CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper(); s.SetAt(i + 1, t.GetAt(0)); } else if (c == L'?') { }//Ebanormaalne rõhk. Pärast vaatab, mis teha else if (c == L'x') res += L"ks"; else if (c == L'y') res += L"i"; else if (c == L'w') res += L"v"; else if (c == L'z') res += L"ts"; else if (c == L'c') { res += L"k"; } else if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü') res += L"i"; else res += c; } return res; }
bool is_word(CFSWString s) { if ((has_vowel(s) == true) && (s.GetLength() == make_char_string(s).GetLength()) && (s.GetLength() > 1)) return true; return false; }
bool is_stressed_syl(CFSWString syl) { bool res = false; for (INTPTR i = 0; i < syl.GetLength(); i++) { if ((syl.GetAt(i) == colon) || ((is_vowel(syl.GetAt(i))) && (is_vowel(syl.GetAt(i + 1))))) res = true; } return res; }
INTPTR do_phrases(utterance_struct &u) { phrase_struct p; CFSWString res; p.phone_c = 0; p.syl_c = 0; p.word_c = 0; for (INTPTR i = 0; i < u.s.GetLength(); i++) { CFSWString c = u.s.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = u.s.GetAt(i + 1); CFSWString nnc = u.s.GetAt(i + 2); if ((is_comma(c) || is_colon(c) || is_semicolon(c)) && is_space(nc) && is_char(nnc)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_bbracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } p.s = L"sulgudes"; u.phr_vector.AddItem(p); } else if (is_ebracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_space(c)) { // komatud sidesõnad CFSWString tempm = u.s.Mid(i + 1, -1); res.Trim(); if (is_conju(tempm.Left(tempm.Find(sp))) && res.GetLength() > 0) { push_ph_res(u, p, res); } else res += c; } else if (is_bhyphen(c)) { res.Trim(); if (res.GetLength() > 0 && ((is_char(pc) && is_space(nc)) || (is_space(nc) && is_char(nnc)) || (is_space(pc) && is_char(nc)))) { push_ph_res(u, p, res); } else res += c; } else res += c; } if (res.GetLength() > 0) { // if (is_ending(res.GetAt(res.GetLength() - 1))) { // res.Delete(res.GetLength() - 1, 1); // } push_ph_res(u, p, res); } return u.phr_vector.GetSize(); }
bool VOTAFAILIST::RidaTrimmitud(CFSWString& rida) { while (Rida(rida) == true) { rida.Trim(); if (rida.GetLength() > 0) return true; } return false; }
CFSWString simplify_pattern(CFSWString s) { CFSWString res; for (INTPTR i = 0; i < (s.GetLength()); i++) { CFSWString c = s.GetAt(i); if (c.FindOneOf(L"jhvsLQ") > -1) res += L"C"; else res += c; } return res; }
void CSuggestor::MultiReplace(const CFSWString &szWord, INTPTR ipStartPos) { if (ipStartPos>0) CheckAndAdd(szWord); INTPTR ipLength=szWord.GetLength(); for (; ipStartPos<ipLength; ipStartPos++){ for (INTPTR ip=0; ip<(INTPTR)(sizeof(ChangeStringsMultiple)/sizeof(__CChangeStrings)); ip++){ if (szWord.ContainsAt(ipStartPos, ChangeStringsMultiple[ip].m_lpszFrom)){ MultiReplace(szWord.Left(ipStartPos)+ChangeStringsMultiple[ip].m_lpszTo+szWord.Mid(ipStartPos+FSStrLen(ChangeStringsMultiple[ip].m_lpszFrom)), ipStartPos+FSStrLen(ChangeStringsMultiple[ip].m_lpszTo)); } } } }
CFSWString syllabify2(CFSWString s) { CFSWString res; for (INTPTR i = 0; i < s.GetLength(); i++) { CFSWString c = s.GetAt(i); if (is_consonant(c) && is_vowel(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1))) res += d; if (is_vowel(c) && is_vowel(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)) && c.ToLower() == s.GetAt(i + 1)) res += d; if (is_consonant(c) && is_consonant(s.GetAt(i - 1)) && is_vowel(s.GetAt(i + 1)) && has_vowel(res)) //küsitav res += d; res += c; } return res; }
void PTWSplitBuffer(const CFSWString &szBuffer, CPTWordArray &Words) { Words.Cleanup(); INTPTR ipStartPos=0; INTPTR ipPos; for (ipPos=0; ipPos<szBuffer.GetLength(); ipPos++) { if (FSIsSpace(szBuffer[ipPos])) { if (ipPos>ipStartPos) { Words.AddItem(CPTWord(szBuffer.Mid(ipStartPos, ipPos-ipStartPos), ipStartPos)); } ipStartPos=ipPos+1; } } if (ipPos>ipStartPos) { Words.AddItem(CPTWord(szBuffer.Mid(ipStartPos, ipPos-ipStartPos), ipStartPos)); } }
CFSWString palat_vru (CFSWString s) { CFSWString res; bool m = false; if (s == L"är'") res = L"ärq"; else if (s == L"ar'") res = L"arq"; else if (s == L"jäl'") res = L"jälq"; else if (s == L"jal'") res = L"jalq"; else if (s == L"kül'") res = L"külq"; else if (s == L"pan'") res = L"panq"; else if (s == L"tul'") res = L"tulq"; else if (s == L"ol'") res = L"olq"; else for (INTPTR i = s.GetLength()-1; i >= 0; i--) { CFSWString c = s.GetAt(i); if (c == L"'") { m = true; } else if (must(c)) { res = c + res; m = true; } else if (m) { if (can_palat_vr(c)) { c = c.ToUpper(); res = c + res; } else { res = c + res; m = false; } } else { res = c + res; } } return res; }
CFSArray<CFSWString> do_utterances(CFSWString s) { CFSWString res = empty_str; CFSArray<CFSWString> res_array; if (s.GetLength() == 1) res_array.AddItem(s); else for (INTPTR i = 0; i < s.GetLength(); i++) { CFSWString c = s.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = s.GetAt(i + 1); CFSWString nnc = s.GetAt(i + 2); if (is_ending(c) && is_whitespace(nc) && is_upper(nnc)) { res.Trim(); res_array.AddItem(res); res = empty_str; } else if (is_tab(c)) { if (res.GetLength() > 0) { res.Trim(); res_array.AddItem(res); res = empty_str; } } else res += c; } res.Trim(); if (res.GetLength() > 0) { while (is_ending(res.GetAt(res.GetLength() - 1))) { res.Delete(res.GetLength() - 1, 1); } res_array.AddItem(res); } for (INTPTR i=0; i < res_array.GetSize(); i++) { if (is_ending(res_array[i].GetAt(res_array[i].GetLength()-1))) res_array[i].Delete( res_array[i].GetLength()-1, 1 ); } return res_array; }
void print_u(utterance_struct u) { for (INTPTR i = 0; i < u.phr_vector.GetSize(); i++) for (INTPTR i1 = 0; i1 < u.phr_vector[i].word_vector.GetSize(); i1++) { fprintf(stderr, "%s\n\n", ccstr(u.phr_vector[i].word_vector[i1].mi.m_szRoot)); for (INTPTR i2 = 0; i2 < u.phr_vector[i].word_vector[i1].syl_vector.GetSize(); i2++) { fprintf(stderr, "\t%s\n", ccstr(u.phr_vector[i].word_vector[i1].syl_vector[i2].syl)); for (INTPTR i3 = 0; i3 < u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector.GetSize(); i3++) { CFSWString w = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3].phone; while (w.GetLength() < 6) w += sp; fprintf(stderr, "\t\t%s", ccstr(w)); wprintf(L"%i %i %i %i\t", u.phra_c, u.word_c, u.syl_c, u.phone_c); wprintf(L"%i %i %i [%i]\t", u.phr_vector[i].word_c, u.phr_vector[i].syl_c, u.phr_vector[i].phone_c, u.phr_vector[i].utt_p); wprintf(L"%i %i [%i %i]\t", u.phr_vector[i].word_vector[i1].syl_c, u.phr_vector[i].word_vector[i1].phone_c, u.phr_vector[i].word_vector[i1].utt_p, u.phr_vector[i].word_vector[i1].phr_p ); wprintf(L"%i [%i %i %i]\t", u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_c, u.phr_vector[i].word_vector[i1].syl_vector[i2].utt_p, u.phr_vector[i].word_vector[i1].syl_vector[i2].phr_p, u.phr_vector[i].word_vector[i1].syl_vector[i2].word_p ); phone_struct p = u.phr_vector[i].word_vector[i1].syl_vector[i2].phone_vector[i3]; wprintf(L"[%i %i %i %i]\t", p.utt_p, p.phr_p, p.word_p, p.syl_p); wprintf(L"{ %i }", u.phr_vector[i].word_vector[i1].syl_vector[i2].stress); wprintf(L"\n"); } } } wprintf(L"\n"); }
std::string to_stdstring(CFSWString s) { std::string res = ""; for (INTPTR i = 0; i < s.GetLength(); i++) res += s.GetAt(i); return res; }
CFSWString chars_to_phones_part_I(CFSWString &s) { /* müüa -> müia siia? Kuna vältemärgi nihutamise reeglid on ehitatud selliselt, et kohati kasutatakse foneeme ja kohati ei, siis tuleb täht->foneem teisendus teha kahes jaos. Palataliseerimine põhineb ideel, et palataliseeritud foneemi ümbruses ei saa olla palataliseeruvaid mittepalataliseeritud foneeme :D */ CFSWString res; for (INTPTR i = 0; i < s.GetLength(); i++) { CFSWString c = s.GetAt(i); if (c == L']') { CFSWString t = CFSWString(s.GetAt(i - 1)).ToUpper(); res.SetAt(res.GetLength() - 1, t.GetAt(0)); //vaatab taha; pole kindel, et kas on vajalik t = CFSWString(s.GetAt(i - 2)).ToUpper(); if (can_palat(t)) { res.SetAt(res.GetLength() - 2, t.GetAt(0)); t = CFSWString(s.GetAt(i - 3)).ToUpper(); if (can_palat(t)) { res.SetAt(res.GetLength() - 2, t.GetAt(0)); } } //vaatab ette t = CFSWString(s.GetAt(i + 1)).ToUpper(); if (can_palat(t)) { s.SetAt(i + 1, t.GetAt(0)); t = CFSWString(s.GetAt(i + 2)).ToUpper(); if (can_palat(t)) { s.SetAt(i + 2, t.GetAt(0)); } } } else if (c == L'<') { CFSWString t = CFSWString(s.GetAt(i + 1)).ToUpper(); s.SetAt(i + 1, t.GetAt(0)); } else if (c == L'?') { }//Ebanormaalne rõhk. Pärast vaatab, mis teha else if (c == L'x') res += L"ks"; else if (c == L'y') res += L"i"; else if (c == L'w') res += L"v"; else if (c == L'z') res += L"ts"; else if (c == L'c') { res += L"k"; } else if (c == L'ü' && is_vowel(s.GetAt(i + 1)) && s.GetAt(i - 1) == L'ü') res += L"i"; else if (c == L'q') { if (is_vowel(s.GetAt(i + 1))) { res += L"k"; s.SetAt(i + 1, L'v'); } else res += L"k"; } else res += c; } return res; }
CFSWString the_shift(CFSWString s) { /* On mingi võimalus, et lihtsustus tuleb teha kahes astmes. LQ-ta ja LQ-ga (vt shift_pattern). Kõik seotud sellega, et pole vältenihutusreeglitest lõpuni aru saanud. Eksisteerib Mihkla versioon ja ametlik versioon. Tänud Mihklale, kes kala asemel annab tattninale õnge, see õpetab ujuma. Maadlesin õngega pikalt. */ CFSWString res; CFSWString code; INTPTR pos; INTPTR i = 0; INTPTR x; while (s.GetLength() > 0) { CFSWString c = s.GetAt(0); s.Delete(0, 1); if (is_uvowel(c)) { c = c.ToLower(); code += shift_pattern(c); res += c; pos = i; } else if (c == d && code.GetLength() > 0) { res += c; code += c; CFSWString t_code = code; t_code += shift_pattern(s.GetAt(0)); x = pattern_lookup(t_code); //orig üle silbipiiri if (x > -1) { x += pos; if (x > res.GetLength()) { // kui kargab järgmisse silpi x = x - res.GetLength(); s.Insert(x, colon); } else res.Insert(x, colon); i++; } else { t_code = simplify_pattern(t_code); x = pattern_lookup(t_code); //liht üle silbipiiri if (x > -1) { x += pos; if (x > res.GetLength()) { // kui kargab järgmisse silpi x = x - res.GetLength(); s.Insert(x, colon); } else res.Insert(x, colon); i++; } else { x = pattern_lookup(code); //orig if (x > -1) { x += pos; res.Insert(x, colon); i++; } else { code = simplify_pattern(code); x = pattern_lookup(code); //liht if (x > -1) { x += pos; res.Insert(x, colon); i++; } } } } code = empty_str; } else { res += c; if (code.GetLength() > 0) { code += shift_pattern(c); } } i++; } //while // sõna lõpus if (code.GetLength() > 0) { code += L"#"; //imelik koht ainult "lonksu" pärast if ((code.Left(3) == L"VLQ") && ((code.GetAt(3) == L's') || (code.GetAt(3) == L'h') || (code.GetAt(3) == L'v') || (code.GetAt(3) == L'j'))) { code = L"VLQC#"; } INTPTR x = pattern_lookup(code); if (x > -1) { x += pos; res.Insert(x, colon); } else { code = simplify_pattern(code); x = pattern_lookup(code); if (x > -1) { x += pos; res.Insert(x, colon); } } code = empty_str; } return res; }