void SynPatterns::LoadTxt( Dictionary &dict, lem::Iridium::Macro_Parser & txtfile ) { lem::Iridium::BSourceState beg = txtfile.tellp(); SynPatternOptions *x = new SynPatternOptions(); x->LoadTxt( dict, txtfile ); if( IsPatternName(x->GetName()) ) { dict.GetIO().merr().printf( "Patterns group [%us] is already declared\n", x->GetName().c_str() ); lem::Iridium::Print_Error(beg,txtfile); throw lem::E_BaseException(); } if( dict.GetLexAuto().GetWordEntrySet().IsSetName(x->GetName()) ) { dict.GetIO().merr().printf( "%vfC%us%vn is a name of word entry set, word set or collocation set\n", x->GetName().c_str() ); lem::Iridium::Print_Error(beg,txtfile); throw lem::E_BaseException(); } const int id = GetNextTreeID(); options.push_back(x); lem::UCString uname( lem::to_upper(x->GetName() ) ); patterns.insert( std::make_pair( uname, x ) ); name2id.insert( std::make_pair( uname, id ) ); return; }
void TreeScorerResult::LoadTxt( Dictionary & dict, lem::Iridium::Macro_Parser & txtfile, const TreeScorerMarkers & markers ) { // ќценка может быть отрицательной. if( txtfile.probe( B_SUB ) ) { type=0; score = -txtfile.read_int(); } else { if( lem::is_int( txtfile.pick().string() ) ) { type=0; score = txtfile.read_int(); } else { const lem::Iridium::BethToken & t = txtfile.read(); id_fact = dict.GetLexAuto().GetKnowledgeBase().FindFacts( t.string() ); if( id_fact==UNKNOWN ) { // todo - тут могут быть другие варианты вызываемых вычислений. lem::Iridium::Print_Error(t,txtfile); dict.GetIO().merr().printf( "Unknown scoring expression starts with %us\n", t.string().c_str() ); throw lem::E_BaseException(); } txtfile.read_it( B_OROUNDPAREN ); while( !txtfile.eof() ) { if( txtfile.probe( B_CROUNDPAREN ) ) break; if( !args.empty() ) txtfile.read_it( B_COMMA ); const lem::Iridium::BethToken & var = txtfile.read(); lem::UCString upper_var = lem::to_upper(var.string()); if( !markers.IsAlreadyBound(upper_var) ) { lem::Iridium::Print_Error(var,txtfile); dict.GetIO().merr().printf( "variable %us not bound\n", var.string().c_str() ); throw lem::E_BaseException(); } args.push_back( upper_var ); } type=1; } } return; }
void TreeScorers::LoadGroup( Dictionary & dict, lem::Iridium::Macro_Parser &txtfile ) { lem::Iridium::BethToken t_name = txtfile.read(); lem::UCString gname = t_name.string(); gname.to_upper(); if( name2id.find(gname)!=name2id.end() ) { lem::Iridium::Print_Error( t_name, txtfile ); dict.GetIO().merr().printf( "Tree scorer group [%us] is already declared\n", t_name.string().c_str() ); throw lem::E_ParserError(); } TreeScorerGroupParams params; if( txtfile.probe( B_OFIGPAREN ) ) { while( !txtfile.eof() ) { if( txtfile.probe( B_CFIGPAREN ) ) break; lem::Iridium::BethToken tparam = txtfile.read(); if( tparam.string().eqi(L"allow_unmatched_children") ) { txtfile.read_it( B_EQUAL ); lem::Iridium::BethToken tbool = txtfile.read(); if( tbool.string().eqi(L"true") ) params.allow_unmatched_children = true; else if( tbool.string().eqi(L"false") ) params.allow_unmatched_children = false; else { lem::Iridium::Print_Error( tbool, txtfile ); dict.GetIO().merr().printf( "[%us] is not boolean value\n", tbool.string().c_str() ); throw lem::E_ParserError(); } } else { lem::Iridium::Print_Error( tparam, txtfile ); dict.GetIO().merr().printf( "Unknown tree scorer group parameter [%us]\n", tparam.string().c_str() ); throw lem::E_ParserError(); } } } const int id = storage->StoreTreeScorerGroup( gname, params ); name2id.insert( std::make_pair(gname,id) ); return; }
void LEMM_Compiler::LoadNGram( lem::Iridium::Macro_Parser & txtfile, Dictionary & dict, lem::MCollect<int> & terms, int order ) const { lem::Iridium::BSourceState beg = txtfile.tellp(); while( !txtfile.eof() ) { lem::Iridium::BethToken t = txtfile.read(); if( lem::is_int(t.string()) ) terms.push_back( lem::to_int(t.string()) ); else { txtfile.seekp(t); break; } } if( terms.size() != order+1 ) { dict.GetIO().merr().printf( "%vfDInvalid ngram%vn\n" ); lem::Iridium::Print_Error( beg, txtfile ); throw lem::E_ParserError(); } return; }
void LA_Recognizer::LoadTxt_Misspelling( lem::Iridium::Macro_Parser &txtfile, Dictionary &dict ) { txtfile.read_it(B_LANGUAGE); // дальше идет наименование языка, в рамках которого действует правило. txtfile.read_it( B_EQUAL ); lem::Iridium::BethToken t1 = txtfile.read(); int id_language = dict.GetSynGram().Find_Language(t1.string()); if( id_language==UNKNOWN ) { lem::Iridium::Print_Error(t1,txtfile); dict.GetIO().merr().printf( "Unknown language name %us\n", t1.c_str() ); throw lem::E_BaseException(); } txtfile.read_it( B_OFIGPAREN ); txtfile.read_it( B_IF ); Solarix::Lexem old_word = txtfile.read().string(); old_word.strip(L'"'); dict.GetLexAuto().TranslateLexem( old_word, true, id_language ); txtfile.read_it( B_THEN ); Solarix::Lexem new_word = txtfile.read().string(); new_word.strip(L'"'); dict.GetLexAuto().TranslateLexem( new_word, true, id_language ); txtfile.read_it( B_CFIGPAREN ); storage->AddMisspelling( id_language, old_word, new_word ); return; }
void TreeScorerResult::LoadBoundVars( Dictionary & dict, lem::Iridium::Macro_Parser & txtfile, const TreeScorerMarkers & markers ) { txtfile.read_it( B_OROUNDPAREN ); while( !txtfile.eof() ) { if( txtfile.probe( B_CROUNDPAREN ) ) break; if( !args.empty() ) txtfile.read_it( B_COMMA ); const lem::Iridium::BethToken & var = txtfile.read(); lem::UCString upper_var = lem::to_upper(var.string()); if( !markers.IsAlreadyBound(upper_var) ) { lem::Iridium::Print_Error(var,txtfile); dict.GetIO().merr().printf( "Variable %us is not bound\n", var.string().c_str() ); throw lem::E_BaseException(); } args.push_back( upper_var ); } return; }
void SG_DeclensionTable::Loaded( const Dictionary &dict ) { if( dict.GetDebugLevel_ir()>=3 ) dict.GetIO().merr().printf( "%vfAOK%vn\n" ); return; }
void Form_Table::Loaded(const Dictionary &dict) { if (dict.GetDebugLevel_ir() >= 3) dict.GetIO().mecho().printf("%vfAOK%vn\n"); return; }
/******************************************************************* Именем таблицы в данном случае является целое неотрицательное число в десятеричной системе счисления. ********************************************************************/ void SG_DeclensionTable::LoadName( Macro_Parser &txtfile, Dictionary &dict ) { const BethToken& t = txtfile.read(); key = ANY_STATE; // Формат: // paradigma Условное_Имя, Алиас1, ... : name = t.string(); names.push_back(name); key = ANY_STATE; while( !txtfile.eof() && txtfile.pick().GetToken()==B_COMMA ) { txtfile.read_it(B_COMMA); names.push_back( txtfile.read().string() ); } txtfile.read_it(B_COLON); if( dict.GetDebugLevel_ir()>=3 ) { dict.GetIO().mecho().printf( "%us [%vfE%us%vn]->", sol_get_token(B_PARADIGMA).c_str(), GetName().c_str() ); } return; }
void Alphabets::LoadTxt(lem::Iridium::Macro_Parser &txt, Dictionary &dict) { lem::UCString name(txt.read().string()); if (Find(name) != UNKNOWN) { dict.GetIO().merr().printf( "Alphabet [%us] is already declared\n" , name.c_str() ); throw E_ParserError(); } const int id = storage->AddAlphabet(name); Alphabet *a = new Alphabet(id, name); a->LoadTxt(txt, dict); storage->StoreAlphabet(*a); name2id.insert(std::make_pair(name, id)); id2alphabet.insert(std::make_pair(id, a)); alphabets.push_back(a); return; }
void SG_DeclensionTable::AddForm( const CP_Array &dim, const WordFormName &form_name, Dictionary &dict ) { try { form.push_back( new SG_DeclensionForm(dim,form_name.form_name) ); } catch( ... ) { dict.GetIO().merr().printf( "Error in paradigma [%us] syntax\n", GetName().c_str() ); throw E_BaseException(); } }
void LA_Preprocessor::LoadTxt( const lem::Iridium::BethToken &head_token, lem::Iridium::Macro_Parser &txtfile, Dictionary &dict ) { if( head_token.string().eqi(L"crop") ) { LA_CropRule* r = new LA_CropRule(); r->LoadTxt( txtfile, dict ); if( !r->GetName().empty() && storage->FindCropRule(r->GetName())!=UNKNOWN ) { lem::Iridium::Print_Error( head_token, txtfile ); dict.GetIO().merr().printf( "Rule [%us] redefinition\n", r->GetName().c_str() ); throw E_ParserError(); } storage->StorePreprocessorCropRule(r); } return; }
void SyllabRule::LoadTxt( lem::Iridium::Macro_Parser &txtfile, Dictionary &dict ) { lem::Iridium::BSourceState point_begin = txtfile.tellp(); id_src = dict.GetDebugSymbols().RegisterLocation( txtfile, point_begin ); // шапка: syllab_rule XXXX language=YYY name = txtfile.read().string(); txtfile.read_it( B_LANGUAGE ); txtfile.read_it( B_EQUAL ); lem::Iridium::BethToken lang = txtfile.read(); id_language = dict.GetSynGram().Find_Language(lang.string()); if( id_language==UNKNOWN ) { lem::Iridium::Print_Error(lang,txtfile); dict.GetIO().merr().printf( "Unknown language name %us\n", lang.c_str() ); throw lem::E_BaseException(); } txtfile.read_it( B_OFIGPAREN ); txtfile.read_it( B_IF ); txtfile.read_it( B_CONTEXT ); condition.LoadTxt( txtfile, dict ); txtfile.read_it( B_THEN ); txtfile.read_it( B_OFIGPAREN ); txtfile.read_it( B_CONTEXT ); result.LoadTxt( txtfile, dict, condition ); txtfile.read_it( B_CFIGPAREN ); // закрываем блок then { ... } txtfile.read_it( B_CFIGPAREN ); // закрываем тело правила return; }
void PatternConstraint::LoadTxt( Dictionary &dict, lem::Iridium::Macro_Parser & txtfile, SynPatternCompilation & compilation_context ) { lem::Iridium::BethToken marker_name = txtfile.read(); if( compilation_context.Find(marker_name)==UNKNOWN ) { dict.GetIO().merr().printf( "Marker [%us] is not declared in this pattern", marker_name.c_str() ); lem::Iridium::Print_Error(marker_name,txtfile); throw lem::E_BaseException(); } // --------------------------- from_marker = marker_name.string(); from_marker.to_upper(); // --------------------------- txtfile.read_it( B_COLON ); lem::Iridium::BethToken coord_name1 = txtfile.read(); //if( for_group ) txtfile.read_it( B_CSPAREN ); Solarix::GramCoordAdr iglob_coord1 = dict.GetSynGram().FindCoord(coord_name1.string()); if( !iglob_coord1.IsDefined() ) { dict.GetSynGram().GetIO().merr().printf( "Unknown coordinate %us\n", coord_name1.c_str() ); lem::Iridium::Print_Error(coord_name1,txtfile); throw lem::E_BaseException(); } from_coord_id = iglob_coord1.GetIndex(); // --------------------------- lem::Iridium::BethToken func = txtfile.read(); if( func.GetToken()==B_EQUAL ) constraint_func = EqualFunc; else if( func.GetToken()==B_LOGNE ) constraint_func = NotEqualFunc; else { dict.GetIO().merr().printf( "Unknown constraint [%us]", func.string().c_str() ); lem::Iridium::Print_Error(func,txtfile); throw lem::E_BaseException(); } marker_name = txtfile.read(); if( compilation_context.Find(marker_name)==UNKNOWN ) { dict.GetIO().merr().printf( "Marker [%us] is not declared in this pattern", marker_name.c_str() ); lem::Iridium::Print_Error(marker_name,txtfile); throw lem::E_BaseException(); } to_marker = marker_name.string(); to_marker.to_upper(); // --------------------------- txtfile.read_it( B_COLON ); lem::Iridium::BethToken coord_name2 = txtfile.read(); //if( for_group ) txtfile.read_it( B_CSPAREN ); Solarix::GramCoordAdr iglob_coord2 = dict.GetSynGram().FindCoord(coord_name2.string()); if( !iglob_coord2.IsDefined() ) { dict.GetSynGram().GetIO().merr().printf( "Unknown coordinate %us\n", coord_name2.c_str() ); lem::Iridium::Print_Error(coord_name2,txtfile); throw lem::E_BaseException(); } to_coord_id = iglob_coord2.GetIndex(); return; }
void SynPatternExport::LoadTxt( Dictionary &dict, lem::Iridium::Macro_Parser & txtfile ) { txtfile.read_it( B_OFIGPAREN ); while( !txtfile.eof() ) { bool null_export = txtfile.probe( B_OROUNDPAREN ); lem::Iridium::BethToken coord_name = txtfile.read(); if( coord_name.GetToken()==B_CFIGPAREN ) break; if( coord_name.string().eqi( L"node" ) ) { txtfile.read_it( B_COLON ); lem::Iridium::BethToken t_node_name = txtfile.read(); lem::UCString node_name = t_node_name.string(); node_name.to_upper(); if( export_nodes.find(node_name)!=UNKNOWN ) { dict.GetIO().merr().printf( "Wordform %us is already mentioned in export section\n", t_node_name.string().c_str() ); lem::Iridium::Print_Error(t_node_name,txtfile); throw lem::E_BaseException(); } export_nodes.push_back( node_name ); null_export_nodes.push_back( null_export ? 1 : 0 ); if( null_export ) txtfile.read_it( B_CROUNDPAREN ); continue; } const GramCoordAdr iglob_coord = dict.GetSynGram().FindCoord(coord_name.string()); if( !iglob_coord.IsDefined() ) { dict.GetIO().merr().printf( "Unknown coordinate %us\n", coord_name.c_str() ); lem::Iridium::Print_Error(coord_name,txtfile); throw lem::E_BaseException(); } if( export_coords.find( iglob_coord.GetIndex() )!=UNKNOWN ) { dict.GetIO().merr().printf( "Coordinate %us is already mentioned in export section\n", coord_name.c_str() ); lem::Iridium::Print_Error(coord_name,txtfile); throw lem::E_BaseException(); } export_coords.push_back(iglob_coord.GetIndex()); null_export_coords.push_back( null_export ? 1 : 0 ); if( null_export ) txtfile.read_it( B_CROUNDPAREN ); } return; }
void LEMM_Compiler::LoadTxt( lem::Iridium::Macro_Parser & txtfile, Dictionary & dict ) { txtfile.read_it( B_OFIGPAREN ); while( !txtfile.eof() ) { lem::Iridium::BethToken t = txtfile.read(); if( t.eqi( L"suffix" ) ) { int id = txtfile.read_int(); lem::UCString suffix = txtfile.read(); if( suffix[0]==L'"' && suffix.back()==L'"' ) suffix.strip_quotes(); suffices.push_back( std::make_pair(id,suffix) ); } else if( t.eqi( L"word" ) ) { int id = txtfile.read_int(); lem::UCString word = txtfile.read(); if( word[0]==L'"' && word.back()==L'"' ) word.strip_quotes(); words.push_back( std::make_pair(id,word) ); } else if( t.eqi(L"ngram2") ) { lem::MCollect<int> terms; LoadNGram(txtfile,dict,terms,2); LEMM_Ngram2 n2; n2.tags.first = terms[0]; n2.tags.second = terms[1]; n2.freq = terms[2]; ngram2.push_back(n2); } else if( t.eqi(L"ngram2_1") ) { lem::MCollect<int> terms; LoadNGram(txtfile,dict,terms,2); LEMM_Ngram2 n2; n2.tags.first = terms[0]; n2.tags.second = terms[1]; n2.freq = terms[2]; ngram2_1.push_back(n2); } else if( t.eqi(L"ngram3") ) { lem::MCollect<int> terms; LoadNGram(txtfile,dict,terms,3); LEMM_Ngram3 n3; n3.tags.first = terms[0]; n3.tags.second = terms[1]; n3.tags.third = terms[2]; n3.freq = terms[3]; ngram3.push_back(n3); } else if( t.eqi(L"ngram3_1") ) { lem::MCollect<int> terms; LoadNGram(txtfile,dict,terms,3); LEMM_Ngram3 n3; n3.tags.first = terms[0]; n3.tags.second = terms[1]; n3.tags.third = terms[2]; n3.freq = terms[3]; ngram3_1.push_back(n3); } else if( t.eqi(L"ngram3_2") ) { lem::MCollect<int> terms; LoadNGram(txtfile,dict,terms,3); LEMM_Ngram3 n3; n3.tags.first = terms[0]; n3.tags.second = terms[1]; n3.tags.third = terms[2]; n3.freq = terms[3]; ngram3_2.push_back(n3); } else if( t.eqi(L"ngram4") ) { lem::MCollect<int> terms; LoadNGram(txtfile,dict,terms,4); LEMM_Ngram4 n4; n4.tags.first = terms[0]; n4.tags.second = terms[1]; n4.tags.third = terms[2]; n4.tags.fourth = terms[3]; n4.freq = terms[4]; ngram4.push_back(n4); } else if( t.eqi( L"suffix_len" ) ) { suffix_len = txtfile.read_int(); } else if( t.GetToken()==B_CFIGPAREN ) break; else { dict.GetIO().merr().printf( "%vfDInvalid statement [%us]%vn\n", t.string().c_str() ); lem::Iridium::Print_Error( t, txtfile ); throw lem::E_ParserError(); } } return; }
void SynPattern::LoadTxt( Dictionary &dict, lem::Iridium::Macro_Parser & txtfile, const SynPatterns &patterns, WordEntrySet &wordentry_set, const TrProcedureDeclaration &procs, TrFunctions &functions ) { lem::Iridium::BSourceState pattern_beginning = txtfile.tellp(); id_src = dict.GetDebugSymbols().RegisterLocation( txtfile, txtfile.tellp() ); if( dict.GetDebugLevel_ir()>=3 ) { dict.GetIO().mecho().printf( "pattern " ); } // ќпционально могут быть заданы целевой ¤зык и опции. while( !txtfile.eof() ) { if( txtfile.probe( B_OFIGPAREN ) ) break; if( txtfile.probe( B_LANGUAGE ) ) { txtfile.read_it( B_EQUAL ); lem::Iridium::BethToken lang = txtfile.read(); id_language = dict.GetSynGram().Find_Language(lang.string()); if( id_language==UNKNOWN ) { lem::Iridium::Print_Error(lang,txtfile); dict.GetIO().merr().printf( "Unknown language name %us\n", lang.c_str() ); throw lem::E_BaseException(); } } else if( txtfile.probe( L"incomplete" ) ) { incomplete=true; } else { lem::Iridium::BethToken tname = txtfile.read(); name = tname.string(); if( dict.GetDebugLevel_ir()>=3 ) { dict.GetIO().mecho().printf( "%vfE%us%vn ", name.c_str() ); } if( !patterns.IsPatternName(name) ) { dict.GetIO().merr().printf( "Patterns group [%us] is not declared\n", name.c_str() ); lem::Iridium::Print_Error(tname,txtfile); throw lem::E_BaseException(); } const SynPatternOptions & group_options = patterns.GetOptions(name); id_language = group_options.GetLanguageId(); // —екци¤ export { ... } содержит объ¤влени¤ координат, которые паттерн выдает наружу // —начала попробуем вз¤ть содержимое экспорта по умолчанию, зарегистрированное в объ¤влении // группы паттернов. if( txtfile.probe(L"export") ) { export_info.LoadTxt( dict, txtfile ); } else { const SynPatternOptions & p_options = patterns.GetOptions(name); export_info = p_options.GetExport(); } export_info.RegisterExport( *compilation_context ); if( txtfile.probe( B_LANGUAGE ) ) { txtfile.read_it( B_EQUAL ); lem::Iridium::BethToken lang = txtfile.read(); id_language = dict.GetSynGram().Find_Language(lang.string()); if( id_language==UNKNOWN ) { lem::Iridium::Print_Error(lang,txtfile); dict.GetIO().merr().printf( "Unknown language name %us\n", lang.c_str() ); throw lem::E_BaseException(); } } txtfile.read_it( B_OFIGPAREN ); break; } } // —писок опорных точек в фигурных скобочках lem::Iridium::BSourceState beg = txtfile.tellp(); compilation_context->SetName( name ); compilation_context->Set(&wordentry_set); while( !txtfile.eof() ) { if( txtfile.pick().GetToken()==B_CFIGPAREN ) { txtfile.read(); break; } SlotProperties slot; slot.LoadTxt( dict, txtfile ); SynPatternPoint *p = new SynPatternPoint; p->LoadTxt( dict, txtfile, patterns, *compilation_context, procs, functions ); points.push_back(p); slots.push_back(slot); compilation_context->BeforeNextPointCompilation(); } if( points.empty() ) { lem::Iridium::Print_Error(beg,txtfile); dict.GetIO().merr().printf("Pattern must not be empty\n" ); throw E_Solarix(); } if( !compilation_context->PatternHasBeenCompiled(dict.GetSynGram()) ) { lem::Iridium::Print_Error(pattern_beginning,txtfile); dict.GetIO().merr().printf("Some export items are not actually exported\n" ); throw E_Solarix(); } points.back()->Terminator(); bool links_loaded=false, ngrams_loaded=false, predicates_loaded=false, constraints_loaded=false; while( !txtfile.eof() ) if( txtfile.probe(B_COLON) ) { lem::Iridium::BSourceState section_beg = txtfile.tellp(); if( txtfile.probe( L"links" ) ) { if( links_loaded ) { lem::Iridium::Print_Error(section_beg,txtfile); dict.GetIO().merr().printf("Redefinition of 'links'\n" ); throw E_Solarix(); } LoadLinks( dict, txtfile, *compilation_context ); links_loaded=true; } else if( txtfile.probe( L"ngrams" ) ) { if( ngrams_loaded ) { lem::Iridium::Print_Error(section_beg,txtfile); dict.GetIO().merr().printf("Redefinition of 'ngrams'\n" ); throw E_Solarix(); } LoadNGrams( dict, txtfile, *compilation_context ); ngrams_loaded=true; } else if( txtfile.probe( L"predicates" ) ) { if( predicates_loaded ) { lem::Iridium::Print_Error(section_beg,txtfile); dict.GetIO().merr().printf("Redefinition of 'predicates'\n" ); throw E_Solarix(); } LoadPredicates( dict, txtfile, *compilation_context ); predicates_loaded=true; } /* else if( sparse && txtfile.probe( L"constraints" ) ) { if( constraints_loaded ) { lem::Iridium::Print_Error(section_beg,txtfile); dict.GetIO().merr().printf("Redefinition of 'constraints'\n" ); throw E_Solarix(); } LoadConstraints( dict, txtfile, *compilation_context ); constraints_loaded=true; }*/ else { lem::Iridium::Print_Error(txtfile); dict.GetIO().merr().printf("Unexpected token\n" ); throw E_Solarix(); } } else { break; } // ќпорные точки могут теперь выполнить внутренние оптимизации, в частности - учесть использование // маркировок и улучшить эффективность директивы @mark() for( lem::Container::size_type i=0; i<points.size(); ++i ) points[i]->OptimizeAfterCompilation( *compilation_context ); // Ѕезым¤нные паттерны, то есть правила самого верхнего уровн¤, должны быть прив¤заны к ¤зыку с помощью директивы { language=XXX } if( id_language==UNKNOWN && name.empty() ) { lem::Iridium::Print_Error(pattern_beginning,txtfile); dict.GetIO().merr().printf("Pattern must be bound to a language\n" ); throw E_Solarix(); } if( dict.GetDebugLevel_ir()>=3 ) { dict.GetIO().mecho().printf( "%vfAOK%vn\n" ); } return; }