const WordList layprop::ViewProperties::getAllLayers(void) { //drawprop._layset WordList listLayers; laySetList::const_iterator it; for( it = _drawprop._layset.begin(); it != _drawprop._layset.end(); ++it) { listLayers.push_back((*it).first); } return listLayers; }
void LayerMapGds::getList(wxString exp, WordList& data) { wxRegEx number_tmpl(wxT("[[:digit:]]*")); wxRegEx separ_tmpl(wxT("[\\,\\-]{1,1}")); unsigned long conversion; bool last_was_separator = true; char separator = ','; VERIFY(number_tmpl.IsValid()); VERIFY(separ_tmpl.IsValid()); do { if (last_was_separator) { number_tmpl.Matches(exp); number_tmpl.GetMatch(exp).ToULong(&conversion); number_tmpl.ReplaceFirst(&exp,wxT("")); if (',' == separator) data.push_back((word)conversion); else { for (word numi = data.back() + 1; numi <= conversion; numi++) data.push_back(numi); } } else { separ_tmpl.Matches(exp); if (wxT("-") == separ_tmpl.GetMatch(exp)) separator = '-'; else if (wxT(",") == separ_tmpl.GetMatch(exp)) separator = ','; else assert(false); separ_tmpl.ReplaceFirst(&exp,wxT("")); } last_was_separator = !last_was_separator; } while (!exp.IsEmpty()); }
void HighlightStateBuilder::build(StringListLangElem *elem, HighlightState *state) { const string &name = elem->getName(); StringDefs *alternatives = elem->getAlternatives(); WordList wordList; bool doubleQuoted = false, nonDoubleQuoted = false, buildAsWordList = true; for (StringDefs::const_iterator it = alternatives->begin(); it != alternatives->end(); ++it) { const string &rep = (*it)->toString(); // double quoted strings generate WordListRules, otherwise simple ListRules // we don't allow double quoted strings mixed with non double quoted if (((*it)->isDoubleQuoted() && nonDoubleQuoted) || (!(*it)->isDoubleQuoted() && doubleQuoted)) { throw HighlightBuilderException( "cannot mix double quoted and non double quoted", elem); } doubleQuoted = (*it)->isDoubleQuoted(); nonDoubleQuoted = !(*it)->isDoubleQuoted(); wordList.push_back(rep); // now check whether we must build a word list rule (word boundary) or an // ordinary list; as soon as we find something that is not to be isolated // we set buildAsWordList as false if (buildAsWordList && (!doubleQuoted || !is_to_isolate(rep))) { buildAsWordList = false; } } HighlightRulePtr rule; if (buildAsWordList) rule = HighlightRulePtr(highlightRuleFactory->createWordListRule(name, wordList, elem->isCaseSensitive())); else rule = HighlightRulePtr(highlightRuleFactory->createListRule(name, wordList, elem->isCaseSensitive())); rule->setAdditionalInfo(elem->toStringParserInfo()); state->addRule(rule); setExitLevel(elem, rule.get()); }
void FrequencyMap::highPassFilter( WordList& allowedWords, double ratio ) const { allowedWords.clear(); FrequencyMap::ReFrequencyMap reFrequencyMap; reverseMap(reFrequencyMap); FrequencyMap::ReFrequencyMap::reverse_iterator rit; int totalItemNum = total(); int localItemNum(0); for ( rit=reFrequencyMap.rbegin(); rit!=reFrequencyMap.rend(); ++rit ) { localItemNum += rit->first; if ( ((double)localItemNum)/totalItemNum > ratio ) break; allowedWords.push_back(rit->second); } }
void DictionaryItems::read( std::istream& is ) { clear(); //x std::ofstream logs("vonyokornai.nojoker.reconstructed.txt"); //x std::ostream& os = logs; while (!is.eof()) { WordList hu; WordList en; Word delimiter; bool engPart = true; while (true) { Word w; is >> w; if (w.empty()) break; // We allow vonyo7's "@" delimiter, and vonyokornai's "@V", "@N" etc. delimiters. if ( (w.size()<=2) && (w[0]=='@') ) { engPart = false; delimiter = w; } else if (engPart) { en.push_back(w); } else { hu.push_back(w); } while ( (is.peek()==' ') || (is.peek()=='\r') ) { is.ignore(); } if (is.peek()=='\n') { is.ignore(); break; } } if (en.empty()) break; push_back(std::make_pair(en,hu)); //x WordList::const_iterator it; //x for ( it=en.begin(); it!=en.end(); ++it ) //x { //x os << *it << " "; //x } //x os << delimiter ; //x for ( it=hu.begin(); it!=hu.end(); ++it ) //x { //x os << " " << *it ; //x } //x os << "\n"; } //x os.flush(); }
int main() { ostringstream printedState; HighlightStatePrinter printer(printedState); MatchingParameters params; cout << "*** test_regexrules" << endl; // first of all check the only spaces regular expression boost::regex onlySpaces("[[:blank:]]*"); assertTrue(boost::regex_match(" \t \t ", onlySpaces)); assertTrue(boost::regex_match("", onlySpaces)); assertFalse(boost::regex_match(" a\t \t ", onlySpaces)); assertFalse(boost::regex_match(" a ", onlySpaces)); assertFalse(boost::regex_match("foo", onlySpaces)); RegexRuleFactory factory; HighlightRulePtr rule = HighlightRulePtr(factory.createSimpleRule("foo", "foo|bar")); check_regex(rule.get(), params, "before_foo_after", true, "foo", "before_", "_after"); check_regex(rule.get(), params, "before_foo", true, "foo", "before_", ""); check_regex(rule.get(), params, "bar_after", true, "bar", "", "_after"); check_regex(rule.get(), params, "before__after", false, "foo", "before_", "_after"); WordList wordList; wordList.push_back("foo"); wordList.push_back("bar"); wordList.push_back("gogo"); rule = HighlightRulePtr(factory.createWordListRule("foo", wordList)); cout << "word list regex: " << rule->toString() << endl; assertEquals("\\<(?:foo|bar|gogo)\\>", rule->toString()); // there's no matching since we required word boundaries check_regex(rule.get(), params, "before_foo_after", false, "foo", "before_", "_after"); check_regex(rule.get(), params, "before_foo", false, "foo", "before_", ""); check_regex(rule.get(), params, "bar_after", false, "bar", "", "_after"); // now there's match check_regex(rule.get(), params, "before foo after", true, "foo", "before ", " after"); check_regex(rule.get(), params, "before foo", true, "foo", "before ", ""); check_regex(rule.get(), params, "bar after", true, "bar", "", " after"); check_regex(rule.get(), params, "before__after", false, "foo", "before_", "_after"); // non case sensitive version rule = HighlightRulePtr(factory.createWordListRule("foo", wordList, false)); cout << "word list regex: " << rule->toString() << endl; assertEquals("\\<(?:[Ff][Oo][Oo]|[Bb][Aa][Rr]|[Gg][Oo][Gg][Oo])\\>", rule->toString()); check_regex(rule.get(), params, "before FoO", true, "FoO", "before ", ""); check_regex(rule.get(), params, "bAr after", true, "bAr", "", " after"); // now the simple list version (i.e., not word list) rule = HighlightRulePtr(factory.createListRule("foo", wordList)); cout << "word list regex: " << rule->toString() << endl; assertEquals("(?:foo|bar|gogo)", rule->toString()); check_regex(rule.get(), params, "before_foo", true, "foo", "before_", ""); check_regex(rule.get(), params, "bar_after", true, "bar", "", "_after"); // search for strings delimited by A rule = HighlightRulePtr(factory.createLineRule("foo", "A", "A")); cout << "line rule: " << rule->toString() << endl; assertEquals("A(?:[^A])*A", rule->toString()); check_regex(rule.get(), params, "before A foo bar A after", true, "A foo bar A", "before ", " after"); // search for strings delimited by A and BC rule = HighlightRulePtr(factory.createLineRule("foo", "A", "B")); cout << "line rule: " << rule->toString() << endl; assertEquals("A(?:[^AB])*B", rule->toString()); check_regex(rule.get(), params, "before A foo bar A and BC after", true, "A and B", "before A foo bar ", "C after"); // search for strings delimited by < and > and escape char rule = HighlightRulePtr(factory.createLineRule("foo", "<", ">", "\\\\")); cout << "line rule: " << rule->toString() << endl; assertEquals("<(?:[^\\\\<\\\\>]|\\\\.)*>", rule->toString()); check_regex(rule.get(), params, "before < foo bar \\> and > after", true, "< foo bar \\> and >", "before ", " after"); // test for multiline rule without escape and non nested rule = HighlightRulePtr(factory.createMultiLineRule("foo", "<", ">", "", false)); printer.printHighlightRule(rule.get()); cout << "multi line rule: " << printedState.str() << endl; assertEquals("<", rule->toString()); const HighlightState *nextState = rule->getNextState().get(); assertTrue(nextState != 0); if (nextState) { HighlightRulePtr nextRule = nextState->getRuleList()[0]; assertEquals(">", nextRule->toString()); } // test for multiline rule with escape and non nested rule = HighlightRulePtr(factory.createMultiLineRule("foo", "<", ">", "\\\\", false)); printedState.str(""); printer.printHighlightRule(rule.get()); cout << "multi line rule: " << printedState.str() << endl; assertEquals("<", rule->toString()); nextState = rule->getNextState().get(); assertTrue(nextState != 0); if (nextState) { HighlightRulePtr nextRule = nextState->getRuleList()[1]; assertEquals("\\\\.", nextRule->toString()); nextRule = nextState->getRuleList()[0]; assertEquals(">", nextRule->toString()); assertEquals(1, nextRule->getExitLevel()); } // check state betterThan HighlightToken t1("", "ab", ""); HighlightToken t2("", "bcd", "a"); HighlightToken t3("", "bcde", "a"); // t2 is not better than t1 since although its matching string is longer // also its prefix is assertFalse(HighlightState::betterThan(t2, t1)); assertTrue(HighlightState::betterThan(t1, t2)); // t2 is not better than t3 since since its matching string is smaller assertFalse(HighlightState::betterThan(t2, t3)); assertTrue(HighlightState::betterThan(t3, t2)); // now test the state matching with the above nextState HighlightToken token; // should match > check_state(nextState, params, "ab >", true, ">", "ab ", ""); // should match \> since it is escaped check_state(nextState, params, "ab \\> bc >", true, "\\>", "ab ", " bc >"); HighlightState classState; assertEquals("normal", classState.getDefaultElement()); classState.addRule( HighlightRulePtr(factory.createSimpleRule("as", "as|at"))); check_state(&classState, params, "this class foo", true, "as", "this cl", "s foo", "", "as"); HighlightRulePtr keywordRule = HighlightRulePtr(factory.createSimpleRule( "keyword", "class")); classState.addRule(keywordRule); // now we should find a better match "class" check_state(&classState, params, "this class foo", true, "class", "this ", " foo", "class", "keyword"); classState.addRule(HighlightRulePtr(factory.createSimpleRule("complex", "class[[:blank:]]+[[:word:]]+"))); // now we should find an even better match "class foo" check_state(&classState, params, "this class foo", true, "class foo", "this ", "", "class[[:blank:]]+[[:word:]]+", "complex"); // we don't match "class foo" but only "class" since its prefix // contains only spaces check_state(&classState, params, "\t \t class foo", true, "class", "\t \t ", " foo", "class", "keyword"); classState.addRule(HighlightRulePtr(factory.createSimpleRule("foo", "bar"))); // (1) classState.addRule(HighlightRulePtr(factory.createSimpleRule("foo", "[[:word:]]+(?=[[:blank:]]*\\()"))); // (2) // although also the second rule matches "bar", however the first one already matched check_state(&classState, params, "this bar ( foo", true, "bar", "this ", " ( foo", "bar"); // in this case the second rule matches only check_state(&classState, params, "this myfun ( foo", true, "myfun", "this ", " ( foo", "[[:word:]]+(?=[[:blank:]]*\\()"); // test for nested HighlightState stateWithNestedRule; HighlightRulePtr startRule = HighlightRulePtr(factory.createMultiLineRule( "foo", "<", ">", "\\\\", true)); cout << "multi line rule (nested): "; coutPrinter.printHighlightRule(startRule.get()); cout << endl; nextState = startRule->getNextState().get(); // the 3rd element is expected to be the same start sequence but nested and // with empty next state assertEquals("<", nextState->getRuleList()[2]->toString()); assertTrue(nextState->getRuleList()[2]->isNested()); assertTrue(nextState->getRuleList()[2]->getNextState().get() == 0); // single line automatically transformed in multi line (since nested) HighlightRulePtr singleNestedToMultiRule = HighlightRulePtr( factory.createLineRule("foo", "<", ">", "\\\\", true)); cout << "single nested line rule (transformed): "; coutPrinter.printHighlightRule(singleNestedToMultiRule.get()); cout << endl; nextState = singleNestedToMultiRule->getNextState().get(); assertEquals(">", nextState->getRuleList()[0]->toString()); assertEquals(1, nextState->getRuleList()[0]->getExitLevel()); assertEquals("<", nextState->getRuleList()[2]->toString()); assertTrue(nextState->getRuleList()[2]->isNested()); // single line automatically transformed in multi line (since // one delimiter has more than one char) HighlightRulePtr singleToMultiRule = HighlightRulePtr( factory.createLineRule("foo", "<<", ">>", "\\\\")); cout << "single line rule (transformed): "; coutPrinter.printHighlightRule(singleToMultiRule.get()); cout << endl; nextState = singleToMultiRule->getNextState().get(); assertEquals(">>", nextState->getRuleList()[0]->toString()); assertEquals(1, nextState->getRuleList()[0]->getExitLevel()); // check state copy HighlightStatePtr stateCopy = HighlightStatePtr( new HighlightState(*nextState)); // a new id must have been created for the copy assertEquals(nextState->getId()+1, stateCopy->getId()); rule = HighlightRulePtr(factory.createSimpleRule("foo", "foo")); // change the second rule of the state HighlightRulePtr oldRule = stateCopy->replaceRule(0, rule); stateCopy->addRule(HighlightRulePtr(factory.createSimpleRule("foo", "bar"))); cout << "orig state: "; coutPrinter.printHighlightState(nextState); cout << endl; cout << "copy state: "; coutPrinter.printHighlightState(stateCopy.get()); cout << endl; // check that the original state is still the same assertEquals(">>", nextState->getRuleList()[0]->toString()); assertEquals(1, nextState->getRuleList()[0]->getExitLevel()); assertEquals(">>", oldRule->toString()); assertEquals(1, oldRule->getExitLevel()); // and that the new one has changed rules assertEquals("foo", stateCopy->getRuleList()[0]->toString()); assertEquals(0, stateCopy->getRuleList()[0]->getExitLevel()); assertEquals("bar", stateCopy->getRuleList()[2]->toString()); // test for MatchingParameters (beginning of line) HighlightStatePtr stateWithBOL = HighlightStatePtr(new HighlightState); stateWithBOL->addRule(HighlightRulePtr(factory.createSimpleRule("BOL", "^[[:word:]]+"))); stateWithBOL->addRule(HighlightRulePtr(factory.createSimpleRule("NOTBOL", "foo"))); cout << "state with BOL: "; coutPrinter.printHighlightState(stateWithBOL.get()); cout << endl; // the first rule matches since we're at the beginning of the line check_state(stateWithBOL.get(), params, "foo at the beginning", true, "foo", "", " at the beginning", "^[[:word:]]+", "BOL"); // the second rule matches since we specified that we're not at the beginning of the line MatchingParameters paramsNotBOL; paramsNotBOL.beginningOfLine = false; check_state(stateWithBOL.get(), paramsNotBOL, "foo at the beginning", true, "foo", "", " at the beginning", "foo", "NOTBOL"); // test for compound rules ElemNameList nameList; nameList.push_back("keyword"); nameList.push_back("normal"); nameList.push_back("type"); HighlightRulePtr compoundRule = HighlightRulePtr( factory.createCompoundRule(nameList, "(class)([[:blank:]]+)([[:word:]]+)")); cout << "compound rule: "; coutPrinter.printHighlightRule(compoundRule.get()); cout << endl; MatchedElements expectedMatchedElements; expectedMatchedElements.push_back(make_pair("keyword", "class")); expectedMatchedElements.push_back(make_pair("normal", " ")); expectedMatchedElements.push_back(make_pair("type", "MyClass")); check_compound_regex(compoundRule.get(), params, "class MyClass", expectedMatchedElements, "", ""); // test for rules with no end (i.e., \z) rule = HighlightRulePtr(factory.createLineRule("foo", "#", "", "\\\\")); cout << "line rule: "; coutPrinter.printHighlightRule(rule.get()); cout << endl; assertEquals("#", rule->toString()); assertTrue(rule->getNextState().get()); assertEquals("\\z", rule->getNextState()->getRuleList().front()->toString()); check_regex(rule.get(), params, "before # after", true, "#", "before ", " after"); // test for replacement rule = HighlightRulePtr(factory.createSimpleRule("foo", "@{2} @{1} @{3}")); ReplacementList rep(9); rep[0] = "first"; rep[1] = "second"; rule->replaceReferences(rep); cout << "replaced rule: " << rule->toString() << endl; assertEquals("second first ", rule->toString()); // test replacement for states HighlightRulePtr origRep1 = HighlightRulePtr(factory.createSimpleRule( "foo", "@{2} @{1} @{3}")); HighlightRulePtr origNoRep = HighlightRulePtr(factory.createSimpleRule( "foo", "no @{1} replacement")); HighlightRulePtr origRep2 = HighlightRulePtr(factory.createSimpleRule( "foo", "@{3} @{1} @{2}")); origRep1->setNeedsReferenceReplacement(); origRep2->setNeedsReferenceReplacement(); HighlightStatePtr stateWithRepl = HighlightStatePtr(new HighlightState); stateWithRepl->addRule(origRep1); stateWithRepl->addRule(origNoRep); stateWithRepl->addRule(origRep2); stateWithRepl->replaceReferences(rep); coutPrinter.printHighlightState(stateWithRepl.get()); // check that the original rule have not been changed assertEquals("@{2} @{1} @{3}", origRep1->toString()); assertEquals("@{3} @{1} @{2}", origRep2->toString()); // that the second rule was not changed assertEquals("no @{1} replacement", stateWithRepl->getRuleList()[1]->toString()); // that the other two were changed assertEquals("second first ", stateWithRepl->getRuleList()[0]->toString()); assertEquals(" first second", stateWithRepl->getRuleList()[2]->toString()); // test for subexpressions rule = HighlightRulePtr(factory.createSimpleRule("foo", "(class) ([[:word:]]+) (\\{)")); rule->setHasSubexpressions(); // the expected subexpressions MatchedSubExps subexps; subexps.push_back("class"); subexps.push_back("foo"); subexps.push_back("{"); check_regex(rule.get(), params, "my class foo {", true, "class foo {", "my ", "", subexps); cout << "*** test_regexrules SUCCESS" << endl; return 0; }