void BinaryFile::OpenDiskFile( const char *amode, const wchar_t *umode, bool do_throw ) { if( lem::System_Config::SupportUnicodeFilenames() ) { #if defined LEM_WINDOWS const UFString n = GetName().GetUnicode(); // У стандартной CRT Borland CBuilder 6.0 проблема - не может // открыть файл "nul" через UNICODE-функцию. if( n==L"nul" ) file=/*std::*/::fopen( "nul", amode ); else #if defined LEM_BORLAND file=/*std::*/::fopen( GetName().GetAscii().c_str(), amode ); #else file=_wfopen( n.c_str(), umode ); #endif #elif defined LEM_UNIX // перекодируем в utf-8 if( GetName().IsAscii() ) { file=::fopen( GetName().GetAscii().c_str(), amode ); } else { file=::fopen( lem::to_utf8(GetName().GetUnicode()).c_str(), amode ); } #endif } else { file=/*std::*/::fopen( GetName().GetAscii().c_str(), amode ); } if( file==NULL ) { if( do_throw ) { //printf( "File error: %s\n", filename.Get_Ascii().c_str() ); lem::UFString fn( GetName().GetUnicode() ); throw E_BaseException( wstring( L"File open error: " )+fn.c_str() ); } } #if LEM_DEBUGGING==1 n_opened++; #endif // check_file(file,filename); closable=true; IFDEBUG(Assert()); return; }
void SG_DeclensionForm::ReadAdditionalInfo( Grammar &gram, const SG_DeclensionTable &table, Macro_Parser& txtfile ) { // :: flexer рег_выражение for регулярное_выражение while( !txtfile.eof() ) { BethToken t = txtfile.read(); if( t==B_FOR ) { UFString re = strip_quotes( txtfile.read().string() ).c_str(); if( table.GetClass()!=UNKNOWN ) { const SG_Class &cls = (const SG_Class&)gram.classes()[ table.GetClass() ]; const int id_lang = cls.GetLanguage(); if( id_lang!=UNKNOWN ) { const SG_Language &lang = gram.GetDict().GetSynGram().languages()[id_lang]; lang.SubstParadigmPattern(re); } } condition_str = re; condition = boost::wregex( re.c_str(), boost::basic_regex<wchar_t>::icase ); valid_condition = true; } else if( t==B_FLEXER ) { UFString re = strip_quotes( txtfile.read().string() ).c_str(); flexer_flags_str = re; flexer_flags = boost::wregex( re.c_str(), boost::basic_regex<wchar_t>::icase ); valid_flexer_flags = true; } else { txtfile.seekp(t); break; } } return; }
const FString lem::to_ascii( const UFString &str, const CodeConverter *_cp ) { if( str.empty() ) return FString(); const CodeConverter *cp = _cp ? _cp : &lem::UI::get_UI().GetSessionCp(); const int l = str.length(); char *ascii = FString::Alloc( cp->EstimateAsciiLen(l+1) ); cp->to_ascii( str.c_str(), ascii ); return FString(ascii,true); }
bool SG_DeclensionForm::MatchCondition( const UCString &str, const UFString &entry_flexer_flags ) const { #if !defined FAIND_NO_BOOST_REGEX if( (!valid_condition || condition.empty()) && (!valid_flexer_flags || flexer_flags.empty()) ) return true; if( valid_condition && !boost::regex_match( str.c_str(), condition ) ) return false; if( valid_flexer_flags && !boost::regex_search( entry_flexer_flags.c_str(), flexer_flags ) ) return false; return true; #else return false; #endif }
// **************************************** // Entering the command processing loop. // **************************************** void SyntaxShell::main_loop(void) { // Если язык по умолчанию не задан, то запросим его имя с консоли. if( default_language==UNKNOWN && !guess_language ) { lem::MCollect<int> langs; default_language = sol_id->GetLanguages(langs); if( default_language==UNKNOWN || langs.size()>1 ) { while(true) { mout->printf( "Please select the language:\n" ); if( langs.empty() ) { lem::Ptr<LanguageEnumerator> lenum( sol_id->GetSynGram().languages().Enumerate() ); while( lenum->Fetch() ) langs.push_back( lenum->GetId() ); } for( lem::Container::size_type i=0; i<langs.size(); ++i ) { mout->printf( "[%vfA%d%vn] - %vfE%us%vn\n", i, sol_id->GetSynGram().languages()[langs[i]].GetName().c_str() ); } mout->printf( "%vfA-1%vn - do not set default language for syntax analysis\n\n?" ); int ilang = mkey->ask_int(); if( ilang==UNKNOWN ) { default_language = UNKNOWN; break; } if( ilang>=0 ) { default_language = langs[ilang]; break; } } } } mout->eol(); #if LEM_DEBUGGING==1 //_CrtMemState ms1,ms2,ms3; #endif int ipass=0; for(;;ipass++) { UFString str; if( !pre_entered_phrase.empty() ) { str = pre_entered_phrase; pre_entered_phrase.clear(); mout->printf( "\n> %us\n", str.c_str() ); } else { str = enter_cmd( debugger.IsNull() ? ": " : ":> " ); } if( str==L"#exit" ) break; if( TryCommand(str) ) continue; if( run_mode==TokenizerMode ) { Tokenize(str); } else if( run_mode==LemmatizerMode ) { Lemmatize(str); } else if( run_mode==SpeakerMode ) { Speak(str); } else { lem::ElapsedTime total_et; total_et.start(); PerformSyntacticAnalysis(str); total_et.stop(); const int msec_elapsed = total_et.msec(); if( traceon ) lem::mout->printf( "Elapsed time: %d millisec\n", msec_elapsed ); } } return; }
void GraphGram::Save_SQL( OFormatter &out, OFormatter &alters, const SQL_Production &sql_version ) { if (sql_version.type == SQL_Production::MsSql) { out.printf("!! @echo Creating alphabets...\n"); } else if (sql_version.type == SQL_Production::Oracle) { out.printf("HOST echo Creating alphabets...\n"); } out.printf("%s\n", sql_version.BeginTx().c_str()); const wchar_t* NPrefix = sql_version.GetNPrefix(); // ПАРАМЕТРЫ -> SG_CRITERION std::unique_ptr<CriterionEnumerator> crenum(param->Enumerate()); while (crenum->Fetch()) { const int id = crenum->GetId(); const Criterion& x = crenum->GetItem(); lem::UFString name(sql_version.SqlStr(x.GetName())); lem::UFString strval(sql_version.SqlStr(x.GetString())); out.printf( "INSERT INTO sg_criterion( id, name, strval ) VALUES( %d, %us'%us', %us'%us' );\n" , id , NPrefix , name.c_str() , NPrefix , strval.c_str() ); } out.eol(); // АЛФАВИТЫ std::unique_ptr<AlphabetEnumerator> aenum(alphabets().List()); while (aenum->Fetch()) { const Alphabet &alphabet = aenum->GetItem(); const int id = aenum->GetId(); lem::UFString aname(sql_version.SqlStr(alphabet.get_Name())); out.printf( "INSERT INTO abc_alphabet( id, name ) VALUES( %d, %us'%us' );\n" , id , NPrefix , aname.c_str() ); } out.eol(); // КООРДИНАТЫ И СОСТОЯНИЯ std::unique_ptr<CoordEnumerator> cenum(coords().Enumerate()); while (cenum->Fetch()) { const int id_coord = cenum->GetId(); const GramCoord &c = cenum->GetItem(); out.printf( "INSERT INTO abc_coord( id, name, bistable ) VALUES ( %d, %us'%us', %d );\n", id_coord, NPrefix, c.GetName().front().c_str(), c.IsBistable() ? 1 : 0 ); int istate = 0; if (c.IsBistable()) { // Все-таки объявим в явном виде два состояния для бистабильных координат, чтобы // в реляционной схеме можно было создавать foreight key с таблиц SG_ENTRY_COORD и SG_FORM_COORD out.printf("INSERT INTO abc_state( id, id_coord, name ) VALUES ( 0, %d, '0' );\n", id_coord); out.printf("INSERT INTO abc_state( id, id_coord, name ) VALUES ( 1, %d, '1' );\n", id_coord); } else { for (lem::Container::size_type j = 0; j < c.states().size(); ++j) { const GramCoordState & s = c.states()[j]; const int id_parent = istate; for (lem::Container::size_type k = 0; k < s.size(); ++k, ++istate) { out.printf("INSERT INTO abc_state( id, id_coord, name ) VALUES ( %d, %d, %us'%us' );\n", istate, id_coord, NPrefix, s[k].c_str()); } } } } // КЛАССЫ -> ABC_CLASS std::unique_ptr<SymbolClassEnumerator> class_enum((SymbolClassEnumerator*)classes().Enumerate()); while (class_enum->Fetch()) { const int id = class_enum->GetId(); const GramClass & c = class_enum->GetItem(); out.printf( "INSERT INTO abc_class( id, name ) VALUES( %d, %us'%us' );\n" , id , NPrefix , sql_version.SqlStr(c.GetName()).c_str() ); // Сохраним в базе информацию о привязке координат (атрибутов, измерений, тэгов) к классам. for (lem::Container::size_type k = 0; k < c.attrs().size(); ++k) { const GramCoordAdr &atr = c.attrs()[k]; out.printf( "INSERT INTO abc_class_coord( id_class, id_coord, coord_type ) VALUES( %d, %d, 0 );\n" , id , atr.GetIndex() ); } for (lem::Container::size_type k = 0; k < c.dims().size(); ++k) { const GramCoordAdr &dim = c.dims()[k]; out.printf( "INSERT INTO abc_class_coord( id_class, id_coord, coord_type ) VALUES( %d, %d, 1 );\n" , id , dim.GetIndex() ); } } out.eol(); // ********************** // СТАТЬИ -> ABC_ENTRY // ********************** ABC_CoordPairsList coords_ref; std::unique_ptr<SymbolEnumerator> senum(entries().Enumerate()); int id_form = 0; while (senum->Fetch()) { const GG_Entry &e = senum->GetItem(); // некоторые версии MSSQL ругаются на unicode-символы с кодами более 2^16. // поэтому для этой СУБД не будем их реально загружать, но чтобы отсутствие этих // символов не вызывало удивления - впечатаем закомментированные операторы DML. bool wrap_in_comment = false; if (e.GetName() > 0x0000ffffU) { if (sql_version.type == SQL_Production::MsSql || sql_version.type == SQL_Production::Oracle || sql_version.type == SQL_Production::Postgres) { wrap_in_comment = true; } } const int id_pairs = coords_ref.Register(e.attrs()); const int id_entry = senum->GetId(); UFString s = lem::UFString(e.GetNameWide().c_str()); if (wrap_in_comment) out.printf("%s", sql_version.Get_Comment().c_str()); if (sql_version.type == SQL_Production::Oracle) { UFString s2 = SQL_Production::Oracle_UNISTR(s); out.printf( "INSERT INTO abc_entry( id, name, code, id_class, id_alphabet, id_pairs ) VALUES( %d, %us, %d, %d, %d, %d );\n" , id_entry , s2.c_str() , e.GetName() , e.GetClass() , e.GetAlphabet() , id_pairs ); } else { s = sql_version.ClearInvalidChars(s); s = sql_version.SqlStr(s); out.printf( "INSERT INTO abc_entry( id, name, code, id_class, id_alphabet, id_pairs ) VALUES( %d, %us'%us', %d, %d, %d, %d );\n" , id_entry , NPrefix , s.c_str() , e.GetName() , e.GetClass() , e.GetAlphabet() , id_pairs ); } for (Container::size_type j = 0; j < e.forms().size(); j++) { const GG_EntryForm &form = e.forms()[j]; lem::UFString fs(form.GetNameWide().c_str()); const int id_dims = coords_ref.Register(form.dims()); if (wrap_in_comment) out.printf("%s", sql_version.Get_Comment().c_str()); if (sql_version.type == SQL_Production::Oracle) { UFString s2 = SQL_Production::Oracle_UNISTR(fs); out.printf( "INSERT INTO abc_form( id, id_entry, ordnum, name, code, id_pairs )" " VALUES( %d, %d, %d, %us, %d, %d );\n" , id_form++ , id_entry , CastSizeToInt(j) , s2.c_str() , form.GetName() , id_dims ); } else { fs = sql_version.ClearInvalidChars(fs); fs = sql_version.SqlStr(fs); out.printf( "INSERT INTO abc_form( id, id_entry, ordnum, name, code, id_pairs )" " VALUES( %d, %d, %d, %us'%us', %d, %d );\n" , id_form++ , id_entry , CastSizeToInt(j) , NPrefix , fs.c_str() , form.GetName() , id_dims ); } } } if (!sql_version.norules) { // Правила слогоделителя std::unique_ptr<LS_ResultSet> rs_slb1(storage->ListSyllabRules()); while (rs_slb1->Fetch()) { const int id = rs_slb1->GetInt(0); lem::UCString name = rs_slb1->GetUCString(1); const int id_src = rs_slb1->GetInt(2); const int id_language = rs_slb1->GetInt(3); const int cursor_shift = rs_slb1->GetInt(4); out.printf("INSERT INTO slb_rule( id, name, id_src, id_language, cursor_shift ) VALUES ( %d, '%us', %d, %d, %d );\n", id, name.c_str(), id_src, id_language, cursor_shift); } rs_slb1.reset(); out.eol(); std::unique_ptr<LS_ResultSet> rs_slb2(storage->ListSyllabConditionPoints()); while (rs_slb2->Fetch()) { const int id = rs_slb2->GetInt(0); const int id_rule = rs_slb2->GetInt(1); const int point_index = rs_slb2->GetInt(2); const int n_char = rs_slb2->GetInt(3); lem::UFString char_text = rs_slb2->GetUFString(4); lem::UFString char_ucs4 = rs_slb2->GetUFString(5); const int id_class = rs_slb2->GetInt(6); const int id_entry = rs_slb2->GetInt(7); const int n_coord = rs_slb2->GetInt(8); const int id_coord0 = rs_slb2->GetInt(9); const int id_state0 = rs_slb2->GetInt(10); const int is_left_boundary = rs_slb2->GetInt(11); const int is_right_boundary = rs_slb2->GetInt(12); const int is_positive = rs_slb2->GetInt(13); out.printf("INSERT INTO slb_condition_point( id, id_rule, point_index, n_char, char_text, char_ucs4," " id_class, id_entry, id_coord0, id_state0, is_left_boundary," " is_right_boundary, n_coord, is_positive ) VALUES (" " %d, %d, %d, %d, '%us', '%us'," " %d, %d, %d, %d, %d," " %d, %d, %d );\n", id, id_rule, point_index, n_char, char_text.empty() ? L"" : char_text.c_str(), char_ucs4.empty() ? L"" : char_ucs4.c_str(), id_class, id_entry, id_coord0, id_state0, is_left_boundary, is_right_boundary, n_coord, is_positive ); } rs_slb2.reset(); out.eol(); std::unique_ptr<LS_ResultSet> rs_slb3(storage->ListSyllabResultPoints()); while (rs_slb3->Fetch()) { const int id = rs_slb3->GetInt(0); const int id_rule = rs_slb3->GetInt(1); const int point_index = rs_slb3->GetInt(2); const int copy_index = rs_slb3->GetInt(3); const int merge_index0 = rs_slb3->GetInt(4); const int merge_count = rs_slb3->GetInt(5); out.printf("INSERT INTO slb_result_point( id, id_rule, point_index, copy_index, merge_index0, merge_count )" " VALUES ( %d, %d, %d, %d, %d, %d );\n", id, id_rule, point_index, copy_index, merge_index0, merge_count); } rs_slb3.reset(); out.eol(); out.eol(); out.printf("%s\n", sql_version.CommitTx().c_str()); out.printf("%s\n", sql_version.BeginTx().c_str()); } coords_ref.SaveSQL(out, sql_version); out.eol(); out.printf("%s\n", sql_version.CommitTx().c_str()); if (sql_version.type == SQL_Production::MsSql) { out.printf("!! @echo Alphabets have been loaded.\n"); } else if (sql_version.type == SQL_Production::Oracle) { out.printf("HOST echo Alphabets have been loaded.\n"); } return; }