void TUStr::GetWordUStrV(TUStrV& WordUStrV){ // clear word vector WordUStrV.Clr(); // create boundaries TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV); IAssert(Len()==WordBoundPV.Len()-1); IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last())); // traverse characters and bounds int UniChs=Len(); TIntV WordUniChV; for (int UniChN=0; UniChN<=UniChs; UniChN++){ if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary if (UniChN<UniChs){ // if not finish // if last-word-char or single-alphabetic-char if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){ WordUniChV.Add(UniChV[UniChN]); // add char } } if (!WordUniChV.Empty()){ // add current word to vector TUStr WordUStr(WordUniChV); // construct word from char-vector WordUStrV.Add(WordUStr); // add word to word-vector WordUniChV.Clr(false); // clear char-vector } } else { // add character to char-vector WordUniChV.Add(UniChV[UniChN]); } } }
bool ScannerA::IsAlphanumeric() { if (IsNumeric() || IsAlphabetic()) { return true; } return false; }
TStr TUStr::GetChTypeStr(const int& UniCh){ TChA ChTypeChA; ChTypeChA+='['; if (IsCase(UniCh)){ChTypeChA+="Case,";} if (IsUpperCase(UniCh)){ChTypeChA+="UpperCase,";} if (IsLowerCase(UniCh)){ChTypeChA+="LowerCase,";} if (IsAlphabetic(UniCh)){ChTypeChA+="Alphabetic,";} if (IsMath(UniCh)){ChTypeChA+="Math,";} if (ChTypeChA.LastCh()=='['){ChTypeChA+=']';} else {ChTypeChA[ChTypeChA.Len()-1]=']';} return ChTypeChA; }
inline void GetExpressionForControl(wxString &expr, wxString &control_name, DeviceQualifier *control_device = NULL, DeviceQualifier *default_device = NULL) { expr = ""; // non-default device if (control_device && default_device && !(*control_device == *default_device)) { expr += control_device->ToString(); expr += ":"; } // append the control name expr += control_name; if (!IsAlphabetic(expr)) expr = wxString::Format("`%s`", expr); }
void TUStr::GetWordUStrLst(TLst<TUStr>& WordUStrV, TLst<TBool> &TerminalV){ //TBoolV& TerminalV){ // clear word vector WordUStrV.Clr(); // create boundaries TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV); //TerminalV.Reserve(WordBoundPV.Len()); IAssert(Len()==WordBoundPV.Len()-1); IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last())); // traverse characters and bounds int UniChs=Len(); TIntV WordUniChV; bool terminal = false; for (int UniChN=0; UniChN<=UniChs; UniChN++){ if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary if (UniChN<UniChs){ // if not finish // if last-word-char or single-alphabetic-char if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){ WordUniChV.Add(UniChV[UniChN]); // add char } else{ if(WordUStrV.Len() > 0){ if(IsTerminal(UniChV[UniChN])) terminal = true; } } } if (!WordUniChV.Empty()){ // add current word to vector TUStr WordUStr(WordUniChV); // construct word from char-vector WordUStrV.AddBack(WordUStr); // add word to word-vector WordUniChV.Clr(false); // clear char-vector if(terminal){ TerminalV.AddBack(true);} else{ TerminalV.AddBack(false);} terminal = false; } } else { // add character to char-vector WordUniChV.Add(UniChV[UniChN]); } } }
// Tokenise() splits the given input into tokens, each represented by a // string. The output is APPENDED to "output". // // There are several types of tokens: // * single characters like "a", or "{", or single non-ASCII unicode // characters // * alphabetic commands like "\frac" // * commands like "\," which have a single nonalphabetic character // after the backslash // * commands like "\ " which have their whitespace collapsed, // stored as "\ " // * other consecutive whitespace characters which get collapsed to // just " " // * the sequence "\begin { stuff }" gets stored as the single token // "\begin{ stuff }". Note that whitespace is preserved between the // braces but not between "\begin" and "{". Similarly for "\end". void Tokenise(const wstring& input, vector<wstring>& output) { wstring::const_iterator ptr = input.begin(); while (ptr != input.end()) { // merge adjacent whitespace if (iswspace(*ptr)) { output.push_back(L" "); do ptr++; while (ptr != input.end() && iswspace(*ptr)); } // boring single character tokens else if (*ptr != L'\\') { // Disallow non-printable, non-whitespace ASCII if (*ptr < L' ' || *ptr == 0x7F) throw Exception(L"IllegalCharacter"); output.push_back(wstring(1, *ptr++)); } else { // tokens starting with backslash wstring token = L"\\"; if (++ptr == input.end()) throw Exception(L"IllegalFinalBackslash"); if (IsAlphabetic(*ptr)) { // plain alphabetic commands do token += *ptr++; while (ptr != input.end() && IsAlphabetic(*ptr)); // Special treatment for "\begin" and "\end"; need to // collapse "\begin {xyz}" to "\begin{xyz}", and store it // as a single token. if (token == L"\\begin" || token == L"\\end") { while (ptr != input.end() && iswspace(*ptr)) ptr++; if (ptr == input.end() || *ptr != L'{') throw Exception(L"MissingOpenBraceAfter", token); token += *ptr++; while (ptr != input.end() && *ptr != L'}') token += *ptr++; if (ptr == input.end()) throw Exception(L"UnmatchedOpenBrace"); token += *ptr++; } } else if (iswspace(*ptr)) { // commands like "\ " token += L" "; do ptr++; while (ptr != input.end() && iswspace(*ptr)); } // commands like "\," and "\;" else token += *ptr++; output.push_back(token); } } }