void TUStr::GetWordUStrV(TUStrV& WordUStrV){
  // clear word vector
  WordUStrV.Clr();
  // create boundaries
  TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV);
  IAssert(Len()==WordBoundPV.Len()-1);
  IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last()));
  // traverse characters and bounds
  int UniChs=Len(); TIntV WordUniChV;
  for (int UniChN=0; UniChN<=UniChs; UniChN++){
    if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary
      if (UniChN<UniChs){ // if not finish
        // if last-word-char or single-alphabetic-char
        if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){
          WordUniChV.Add(UniChV[UniChN]); // add char
        }
      }
      if (!WordUniChV.Empty()){ // add current word to vector
        TUStr WordUStr(WordUniChV); // construct word from char-vector
        WordUStrV.Add(WordUStr); // add word to word-vector
        WordUniChV.Clr(false); // clear char-vector
      }
    } else {
      // add character to char-vector
      WordUniChV.Add(UniChV[UniChN]);
    }
  }
}
Ejemplo n.º 2
0
void TUStr::GetWordBoundPV(TIntV& WordBoundPosV){
  TBoolV WordBoundPV;
  TUnicodeDef::GetDef()->FindWordBoundaries(UniChV, WordBoundPV);
  int n = WordBoundPV.Len();
  WordBoundPosV.Reserve(n);
  for(int i = 0; i <= n; i++){
	  if(WordBoundPV[i]){
		WordBoundPosV.Add(i);
	  }
  }
}
Ejemplo n.º 3
0
void TUStr::GetWordUStrLst(TLst<TUStr>& WordUStrV, TLst<TBool> &TerminalV){ //TBoolV& TerminalV){
  
// clear word vector
  WordUStrV.Clr();
  // create boundaries
  TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV);
  //TerminalV.Reserve(WordBoundPV.Len());
  IAssert(Len()==WordBoundPV.Len()-1);
  IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last()));
  // traverse characters and bounds
  int UniChs=Len(); TIntV WordUniChV;
  bool terminal = false;

  for (int UniChN=0; UniChN<=UniChs; UniChN++){
    if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary
      if (UniChN<UniChs){ // if not finish
        // if last-word-char or single-alphabetic-char
        if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){
          WordUniChV.Add(UniChV[UniChN]); // add char
        }
		else{
			if(WordUStrV.Len() > 0){
				if(IsTerminal(UniChV[UniChN])) terminal = true;
			}
		}
      }
      if (!WordUniChV.Empty()){ // add current word to vector
        TUStr WordUStr(WordUniChV); // construct word from char-vector
		WordUStrV.AddBack(WordUStr); // add word to word-vector
        WordUniChV.Clr(false); // clear char-vector
		if(terminal){ TerminalV.AddBack(true);}
		else{ TerminalV.AddBack(false);}
		terminal = false;
      }
    } else {
      // add character to char-vector
      WordUniChV.Add(UniChV[UniChN]);
    }
  }
}