void TUStr::GetWordUStrV(TUStrV& WordUStrV){
  // clear word vector
  WordUStrV.Clr();
  // create boundaries
  TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV);
  IAssert(Len()==WordBoundPV.Len()-1);
  IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last()));
  // traverse characters and bounds
  int UniChs=Len(); TIntV WordUniChV;
  for (int UniChN=0; UniChN<=UniChs; UniChN++){
    if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary
      if (UniChN<UniChs){ // if not finish
        // if last-word-char or single-alphabetic-char
        if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){
          WordUniChV.Add(UniChV[UniChN]); // add char
        }
      }
      if (!WordUniChV.Empty()){ // add current word to vector
        TUStr WordUStr(WordUniChV); // construct word from char-vector
        WordUStrV.Add(WordUStr); // add word to word-vector
        WordUniChV.Clr(false); // clear char-vector
      }
    } else {
      // add character to char-vector
      WordUniChV.Add(UniChV[UniChN]);
    }
  }
}
Exemplo n.º 2
0
bool ScannerA::IsAlphanumeric()
{
	if (IsNumeric() || IsAlphabetic())
	{
		return true;
	}
	return false;
}
TStr TUStr::GetChTypeStr(const int& UniCh){
  TChA ChTypeChA;
  ChTypeChA+='[';
  if (IsCase(UniCh)){ChTypeChA+="Case,";}
  if (IsUpperCase(UniCh)){ChTypeChA+="UpperCase,";}
  if (IsLowerCase(UniCh)){ChTypeChA+="LowerCase,";}
  if (IsAlphabetic(UniCh)){ChTypeChA+="Alphabetic,";}
  if (IsMath(UniCh)){ChTypeChA+="Math,";}
  if (ChTypeChA.LastCh()=='['){ChTypeChA+=']';}
  else {ChTypeChA[ChTypeChA.Len()-1]=']';}
  return ChTypeChA;
}
Exemplo n.º 4
0
inline void GetExpressionForControl(wxString &expr,
				    wxString &control_name,
				    DeviceQualifier *control_device = NULL,
				    DeviceQualifier *default_device = NULL)
{
	expr = "";

	// non-default device
	if (control_device && default_device && !(*control_device == *default_device))
	{
		expr += control_device->ToString();
		expr += ":";
	}

	// append the control name
	expr += control_name;

	if (!IsAlphabetic(expr))
		expr = wxString::Format("`%s`", expr);
}
Exemplo n.º 5
0
void TUStr::GetWordUStrLst(TLst<TUStr>& WordUStrV, TLst<TBool> &TerminalV){ //TBoolV& TerminalV){
  
// clear word vector
  WordUStrV.Clr();
  // create boundaries
  TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV);
  //TerminalV.Reserve(WordBoundPV.Len());
  IAssert(Len()==WordBoundPV.Len()-1);
  IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last()));
  // traverse characters and bounds
  int UniChs=Len(); TIntV WordUniChV;
  bool terminal = false;

  for (int UniChN=0; UniChN<=UniChs; UniChN++){
    if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary
      if (UniChN<UniChs){ // if not finish
        // if last-word-char or single-alphabetic-char
        if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){
          WordUniChV.Add(UniChV[UniChN]); // add char
        }
		else{
			if(WordUStrV.Len() > 0){
				if(IsTerminal(UniChV[UniChN])) terminal = true;
			}
		}
      }
      if (!WordUniChV.Empty()){ // add current word to vector
        TUStr WordUStr(WordUniChV); // construct word from char-vector
		WordUStrV.AddBack(WordUStr); // add word to word-vector
        WordUniChV.Clr(false); // clear char-vector
		if(terminal){ TerminalV.AddBack(true);}
		else{ TerminalV.AddBack(false);}
		terminal = false;
      }
    } else {
      // add character to char-vector
      WordUniChV.Add(UniChV[UniChN]);
    }
  }
}
Exemplo n.º 6
0
// Tokenise() splits the given input into tokens, each represented by a
// string. The output is APPENDED to "output".
//
// There are several types of tokens:
// * single characters like "a", or "{", or single non-ASCII unicode
//   characters
// * alphabetic commands like "\frac"
// * commands like "\," which have a single nonalphabetic character
//   after the backslash
// * commands like "\   " which have their whitespace collapsed,
//   stored as "\ "
// * other consecutive whitespace characters which get collapsed to
//   just " "
// * the sequence "\begin   {  stuff  }" gets stored as the single token
//   "\begin{  stuff  }". Note that whitespace is preserved between the
//   braces but not between "\begin" and "{". Similarly for "\end".
void Tokenise(const wstring& input, vector<wstring>& output)
{
    wstring::const_iterator ptr = input.begin();

    while (ptr != input.end())
    {
        // merge adjacent whitespace
        if (iswspace(*ptr))
        {
            output.push_back(L" ");
            do
                ptr++;
            while (ptr != input.end() && iswspace(*ptr));
        }
        // boring single character tokens
        else if (*ptr != L'\\')
        {
            // Disallow non-printable, non-whitespace ASCII
            if (*ptr < L' ' || *ptr == 0x7F)
                throw Exception(L"IllegalCharacter");
            output.push_back(wstring(1, *ptr++));
        }
        else
        {
            // tokens starting with backslash
            wstring token = L"\\";

            if (++ptr == input.end())
                throw Exception(L"IllegalFinalBackslash");
            if (IsAlphabetic(*ptr))
            {
                // plain alphabetic commands
                do
                    token += *ptr++;
                while (ptr != input.end() && IsAlphabetic(*ptr));

                // Special treatment for "\begin" and "\end"; need to
                // collapse "\begin  {xyz}" to "\begin{xyz}", and store it
                // as a single token.
                if (token == L"\\begin" || token == L"\\end")
                {
                    while (ptr != input.end() && iswspace(*ptr))
                        ptr++;
                    if (ptr == input.end() || *ptr != L'{')
                        throw Exception(L"MissingOpenBraceAfter", token);
                    token += *ptr++;
                    while (ptr != input.end() && *ptr != L'}')
                        token += *ptr++;
                    if (ptr == input.end())
                        throw Exception(L"UnmatchedOpenBrace");
                    token += *ptr++;
                }
            }
            else if (iswspace(*ptr))
            {
                // commands like "\    "
                token += L" ";
                do
                    ptr++;
                while (ptr != input.end() && iswspace(*ptr));
            }
            // commands like "\," and "\;"
            else
                token += *ptr++;

            output.push_back(token);
        }
    }
}