void TUStr::GetWordUStrV(TUStrV& WordUStrV){ // clear word vector WordUStrV.Clr(); // create boundaries TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV); IAssert(Len()==WordBoundPV.Len()-1); IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last())); // traverse characters and bounds int UniChs=Len(); TIntV WordUniChV; for (int UniChN=0; UniChN<=UniChs; UniChN++){ if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary if (UniChN<UniChs){ // if not finish // if last-word-char or single-alphabetic-char if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){ WordUniChV.Add(UniChV[UniChN]); // add char } } if (!WordUniChV.Empty()){ // add current word to vector TUStr WordUStr(WordUniChV); // construct word from char-vector WordUStrV.Add(WordUStr); // add word to word-vector WordUniChV.Clr(false); // clear char-vector } } else { // add character to char-vector WordUniChV.Add(UniChV[UniChN]); } } }
void TUStr::GetWordBoundPV(TIntV& WordBoundPosV){ TBoolV WordBoundPV; TUnicodeDef::GetDef()->FindWordBoundaries(UniChV, WordBoundPV); int n = WordBoundPV.Len(); WordBoundPosV.Reserve(n); for(int i = 0; i <= n; i++){ if(WordBoundPV[i]){ WordBoundPosV.Add(i); } } }
void TUStr::GetWordUStrLst(TLst<TUStr>& WordUStrV, TLst<TBool> &TerminalV){ //TBoolV& TerminalV){ // clear word vector WordUStrV.Clr(); // create boundaries TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV); //TerminalV.Reserve(WordBoundPV.Len()); IAssert(Len()==WordBoundPV.Len()-1); IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last())); // traverse characters and bounds int UniChs=Len(); TIntV WordUniChV; bool terminal = false; for (int UniChN=0; UniChN<=UniChs; UniChN++){ if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary if (UniChN<UniChs){ // if not finish // if last-word-char or single-alphabetic-char if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){ WordUniChV.Add(UniChV[UniChN]); // add char } else{ if(WordUStrV.Len() > 0){ if(IsTerminal(UniChV[UniChN])) terminal = true; } } } if (!WordUniChV.Empty()){ // add current word to vector TUStr WordUStr(WordUniChV); // construct word from char-vector WordUStrV.AddBack(WordUStr); // add word to word-vector WordUniChV.Clr(false); // clear char-vector if(terminal){ TerminalV.AddBack(true);} else{ TerminalV.AddBack(false);} terminal = false; } } else { // add character to char-vector WordUniChV.Add(UniChV[UniChN]); } } }