CFSWString DealWithText(CFSWString text) { /* Proovin kogu sõnniku minema loopida */ CFSWString res; text.Trim(); text.Replace(L"\n\n", L"\n", 1); text.Replace(L"‘", L"'", 1); text.Replace(L"`", L"'", 1); text.Replace(L"´", L"'", 1); text.Replace(L"’", L"'", 1); for (INTPTR i = 0; i < text.GetLength(); i++) { CFSWString c = text.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = text.GetAt(i + 1); if (c == L"'") { if (is_vowel(pc)) res += L"q"; else res += c; } else if (is_char(c)) res += c; else if (is_digit(c)) res += c; else if (is_hyphen(c) && is_char(pc) && is_char(nc)) res += sp; else if (is_symbol(c)) res += c; else if (is_colon(c) && !is_colon(pc)) res += c; else if (is_bbracket(c) && !is_bbracket(pc)) res += c; else if (is_ebracket(c) && is_ending(nc)) res += L""; else if (is_ebracket(c) && !is_ebracket(pc)) res += c; else if (is_comma(c) && !is_comma(pc)) res += c; else if (is_fchar(c)) res += replace_fchar(c); else if (is_space(c) && !is_whitespace(pc)) res += c; else if (is_break(c) && !is_break(pc)) { res += c; } //kahtlane else if (is_tab(c) && !is_whitespace(pc)) res += c; else if (is_ending(c) && !is_ending(pc) && !is_whitespace(pc)) res += c; } res.Trim(); return res; }
void do_phones(syl_struct &ss) { CFSArray<phone_struct> pv; phone_struct p; p.utt_p = 0; p.phr_p = 0; p.word_p = 0; p.syl_p = 0; INTPTR phone_syl_p = 1; for (INTPTR i = 0; i < ss.syl.GetLength(); i++) { CFSWString c = ss.syl.GetAt(i); if ((c == L'š') || (c == L'ž')) c = L"sh"; if (c == L'õ') c = L"q"; if (c == L'ä') c = L"x"; if (c == L'ö') c = L"c"; if (c == L'ü') c = L"y"; if (is_colon(c)) { // 1. nihutusvigade kaitseks (vt "piirkonda") // 2-3. kolmandas vältes v ja j on kõnebaasis sedavõrd haruldased, // et väljundis kuuleb nende asemel mingit r-i laadset hääikut. // Kellel on parem baas, kommenteerigu 2. ja 3. tingimus välja. if ((i > 0) && (pv[pv.GetSize() - 1].phone != L"j") && (pv[pv.GetSize() - 1].phone != L"v")) pv[pv.GetSize() - 1].phone += doq; } else { p.phone = c; p.syl_p = phone_syl_p++; pv.AddItem(p); } } ss.phone_vector = pv; }
void do_phones(syl_struct &ss) { CFSArray<phone_struct> pv; phone_struct p; p.utt_p = 0; p.phr_p = 0; p.word_p = 0; p.syl_p = 0; INTPTR phone_syl_p = 1; for (INTPTR i = 0; i < ss.syl.GetLength(); i++) { CFSWString c = ss.syl.GetAt(i); if ((c == L'š') || (c == L'ž')) c = L"sh"; else if (c == L'q') c = L"kw"; else if (c == L'õ') c = L"q"; else if (c == L'ä') c = L"x"; else if (c == L'ö') c = L"c"; else if (c == L'ü') c = L"y"; if (is_colon(c)) { if (i > 0) // nihutusvigade kaitseks (vt "piirkonda") pv[pv.GetSize() - 1].phone += doq; } else { p.phone = c; p.syl_p = phone_syl_p++; pv.AddItem(p); } } ss.phone_vector = pv; }
INTPTR do_phrases(utterance_struct &u) { phrase_struct p; CFSWString res; p.phone_c = 0; p.syl_c = 0; p.word_c = 0; for (INTPTR i = 0; i < u.s.GetLength(); i++) { CFSWString c = u.s.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = u.s.GetAt(i + 1); CFSWString nnc = u.s.GetAt(i + 2); if ((is_comma(c) || is_colon(c) || is_semicolon(c)) && is_space(nc) && is_char(nnc)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_bbracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } p.s = L"sulgudes"; u.phr_vector.AddItem(p); } else if (is_ebracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_space(c)) { // komatud sidesõnad CFSWString tempm = u.s.Mid(i + 1, -1); res.Trim(); if (is_conju(tempm.Left(tempm.Find(sp))) && res.GetLength() > 0) { push_ph_res(u, p, res); } else res += c; } else if (is_bhyphen(c)) { res.Trim(); if (res.GetLength() > 0 && ((is_char(pc) && is_space(nc)) || (is_space(nc) && is_char(nnc)) || (is_space(pc) && is_char(nc)))) { push_ph_res(u, p, res); } else res += c; } else res += c; } if (res.GetLength() > 0) { // if (is_ending(res.GetAt(res.GetLength() - 1))) { // res.Delete(res.GetLength() - 1, 1); // } push_ph_res(u, p, res); } return u.phr_vector.GetSize(); }
// time := hour colon minute [ colon second ] . void time() { hour(); colon(); minute(); if (is_colon(scan.get())) { colon(); second(); } }