C++ (Cpp) SG_calibrator::GetWord Examples

Programming Language: C++ (Cpp)

Class/Type: SG_calibrator

Method/Function: GetWord

Examples at hotexamples.com: 2

C++ (Cpp) SG_calibrator::GetWord - 2 examples found. These are the top rated real world C++ (Cpp) examples of SG_calibrator::GetWord extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetClass(2)

GetFreq(2)

GetWord(2)

LoadBin(2)

MatchCoords(2)

IsWordEntry(1)

IsWordEntryFreq(1)

IsWordForm(1)

IsWordFormScore(1)

IsWordFormsScore(1)

Example #1

Show file

File: sg_preparing.cpp Project: mcdir/GrammarEngine

/***********************************************************************
   Подготовка к работе после загрузки содержимого из текстового
 файла Словаря. Процедура вызывается модулем Словаря автоматически.
 Выполняем некоторые операции, которые позволят реализовать быстрый
 алгоритм проецирования мультилексем на Лексикон. То есть, мы создадим и
 сохраним некоторые информационные структуры, чтобы при поиске словоформ
 не отвлекаться на создание этих структур снова и снова. Только после
 выполнения этой процедуры Лексикон готов к использованию другими
 автоматами.
***********************************************************************/
void SynGram::Prepare( const lem::Path &outdir, const Binarization_Options &opts )
{
 Grammar::Prepare(outdir,opts);

 word_entries->CommitCompilation( GetIO().merr() );
 storage->CommitCompilation();

 Generate_Links(opts);


 Write_Declarations(outdir);

 for( std::map<UCString,CplxLeft*>::iterator it=cplx_map.begin(); it!=cplx_map.end(); ++it )
  {
   const CplxLeft &x = * it->second;
   GetStorage().StoreCplxLeft(x.headword,x.minlen,x.maxlen);
  }


 // Инициализируем частоты словарных статей и словоформ.

 if( wordfreq_filename.NotNull() )
  {
   wordfreq_file.Delete();

   lem::BinaryReader rdr( *wordfreq_filename );

   const int dbg = 1;

   if( dbg>0 )
    {
     // Длительная операция: сообщаем пользователю.
     GetIO().mecho().printf( "%vfESG%vn: updating word frequencies..." );
     GetIO().mecho().flush();
    }

   MCollect<Word_Coord> found_list;
   MCollect<ProjScore> val_list;
   PtrCollect<LA_ProjectInfo> inf_list;
   LexicalAutomat &la = GetDict().GetLexAuto();

   while( !rdr.eof() )
    {
     SG_calibrator c;
     c.LoadBin(rdr);
     
     if( rdr.eof() )
      break;

     found_list.clear();
     val_list.clear();
     inf_list.clear();
     
     Lexem uword(c.GetWord());

     int language_id=UNKNOWN;
     // если указан грамматический класс, то берем id языка оттуда.
     if( c.GetClass()!=UNKNOWN )
      {
       const Solarix::SG_Class & pos = GetClass( c.GetClass() );
       language_id = pos.GetLanguage();
      }
     
     la.TranslateLexem( uword, true, language_id );
     
     RC_Lexem rc_name( &uword, null_deleter() );
     la.ProjectWord( rc_name, found_list, val_list, inf_list, LexicalAutomat::Wordforms, 0, language_id, NULL );

     if( c.IsWordEntry() )
      {
       lem::MCollect<int> processed;
       for( lem::Container::size_type j=0; j<found_list.size(); ++j )
        {
         if( processed.find(found_list[j].GetEntry())==UNKNOWN )
          {
           processed.push_back( found_list[j].GetEntry() );
           SG_Entry& e = GetEntries().GetEntryForChange( found_list[j].GetEntry() );

           if( c.GetClass()!=UNKNOWN && c.GetClass()!=e.GetClass() )
            continue;
   
           // Задаем частоту либо для всей словарной статьи
           e.UpdateFreq( c.GetFreq() );
          }
        }
      }
     else if( c.IsWordForm() )
      {
       if( found_list.empty() )
        {
         GetIO().mecho().printf( "\nUnknown word [%us] in word_frequency rule\n", c.GetWord().c_str() );
         throw lem::E_BaseException();
        }

       for( lem::Container::size_type j=0; j<found_list.size(); ++j )
        {
         const SG_Entry &e = GetEntry(found_list[j].GetEntry());
         
         if( c.GetClass()!=UNKNOWN && c.GetClass()!=e.GetClass() )
          {
           continue;
          } 

         const SG_EntryForm &f = e.forms()[found_list[j].GetForm()];
         if( c.MatchCoords( f.coords() ) )
          {
           GetStorage().SetWordformFrequency( found_list[j].GetEntry(), found_list[j].GetForm(), c.GetFreq() );
          }
        }
      }
     else
      {
       LEM_STOPIT;
      } 
    }


   rdr.close();
   wordfreq_filename->DoRemove();

   if( dbg>0 )
    {
     GetIO().mecho().printf( "%vfAOK%vn\n" );
    }

 }

 return;
}

Example #2

Show file

File: sg_preparing.cpp Project: Koziev/GrammarEngine

/***********************************************************************
   Подготовка к работе после загрузки содержимого из текстового
 файла Словаря. Процедура вызывается модулем Словаря автоматически.
 Выполняем некоторые операции, которые позволят реализовать быстрый
 алгоритм проецирования мультилексем на Лексикон. То есть, мы создадим и
 сохраним некоторые информационные структуры, чтобы при поиске словоформ
 не отвлекаться на создание этих структур снова и снова. Только после
 выполнения этой процедуры Лексикон готов к использованию другими
 автоматами.
***********************************************************************/
void SynGram::Prepare( const lem::Path &outdir, const Binarization_Options &opts )
{
 Grammar::Prepare(outdir,opts);

 word_entries->CommitCompilation( GetIO().merr() );
 storage->CommitCompilation();

 Generate_Links(opts);


 Write_Declarations(outdir);

 for( std::map<UCString,CplxLeft*>::iterator it=cplx_map.begin(); it!=cplx_map.end(); ++it )
  {
   const CplxLeft &x = * it->second;
   GetStorage().StoreCplxLeft(x.headword,x.minlen,x.maxlen);
  }


 // Инициализируем частоты словарных статей и словоформ.
 // Нам потребуется быстро найти словарные статьи по их леммам.

 std::multimap<lem::UCString,int> lemma2id;

 lem::Ptr<WordEntryEnumerator> wenum( word_entries->ListEntries() );
 while( wenum->Fetch() )
 {
  lem::UCString entry_name = lem::to_upper( GetEntry( wenum->GetId() ).GetName() );
  lemma2id.insert( std::make_pair( entry_name, wenum->GetId() ) );
 }



 if( wordfreq_filename.NotNull() )
  {
   wordfreq_file.Delete();

   lem::BinaryReader rdr( *wordfreq_filename );

   // Длительная операция: сообщаем пользователю.
   GetIO().mecho().printf( "%vfESG%vn: updating word frequencies..." );
   GetIO().mecho().flush();

   MCollect<Word_Coord> found_list;
   MCollect<ProjScore> val_list;
   PtrCollect<LA_ProjectInfo> inf_list;
   LexicalAutomat &la = GetDict().GetLexAuto();

   int counter=0;

   while( !rdr.eof() )
    {
     SG_calibrator c;
     c.LoadBin(rdr);
     
     if( rdr.eof() )
      break;

     counter++;

     if( counter>0 && (counter%10000)==0 )
     {
      lem::mout->printf( " %d", counter/10000 );
      lem::mout->flush();
     }

     Lexem uword( lem::to_upper( c.GetWord() ) );


     if( c.IsWordFormsScore() )
      {
       // Заданы оценки для всех форм слова
       lem::MCollect<int> ies;

       typedef std::multimap<lem::UCString,int>::const_iterator L2E_CIT;
       std::pair<L2E_CIT,L2E_CIT> r = lemma2id.equal_range( uword );
       for( L2E_CIT it=r.first; it!=r.second; ++it )
        {
         int id_entry = it->second;
         if( c.GetClass()==UNKNOWN || GetEntries().GetEntry( id_entry ).GetClass() == c.GetClass() )
          {
           SG_Entry& e = GetEntries().GetEntryForChange( id_entry );

           for( int iform=0; iform<e.forms().size(); ++iform )
            {
             const SG_EntryForm &f = e.forms()[iform];
             if( c.MatchCoords( f.coords() ) )
              {
               GetStorage().SetWordformFrequency( id_entry, iform, c.GetFreq() );
              }
            }
          }
        }
      }
     else if( c.GetClass()!=UNKNOWN && c.IsWordEntryFreq() )
      {
       lem::MCollect<int> ies;

       typedef std::multimap<lem::UCString,int>::const_iterator L2E_CIT;
       std::pair<L2E_CIT,L2E_CIT> r = lemma2id.equal_range( uword );
       for( L2E_CIT it=r.first; it!=r.second; ++it )
        {
         int id_entry = it->second;
         if( GetEntries().GetEntry( id_entry ).GetClass() == c.GetClass() )
          {
           SG_Entry& e = GetEntries().GetEntryForChange( id_entry );
           e.UpdateFreq( c.GetFreq() );
          }
        }
      }
     else if( c.IsWordFormScore() )
      {
       // задан скоринг для словоформы.

       int language_id=UNKNOWN;
       // если указан грамматический класс, то берем id языка оттуда.
       if( c.GetClass()!=UNKNOWN )
        {
         const Solarix::SG_Class & pos = GetClass( c.GetClass() );
         language_id = pos.GetLanguage();
        }
       
       la.TranslateLexem( uword, true, language_id );
       
       RC_Lexem rc_name( &uword, null_deleter() );

       found_list.clear();
       val_list.clear();
       inf_list.clear();

       la.ProjectWord( rc_name, found_list, val_list, inf_list, LexicalAutomat::Wordforms, 0, language_id, NULL );
    
       // задан скоринг для словоформы.
    
       if( found_list.empty() )
        {
         GetIO().mecho().printf( "\nUnknown word [%us] in word_frequency rule\n", c.GetWord().c_str() );
         throw lem::E_BaseException();
        }
    
       for( lem::Container::size_type j=0; j<found_list.size(); ++j )
        {
         const SG_Entry &e = GetEntry(found_list[j].GetEntry());
         
         if( c.GetClass()!=UNKNOWN && c.GetClass()!=e.GetClass() )
          {
           continue;
          } 
    
         const SG_EntryForm &f = e.forms()[found_list[j].GetForm()];
         if( c.MatchCoords( f.coords() ) )
          {
           GetStorage().SetWordformFrequency( found_list[j].GetEntry(), found_list[j].GetForm(), c.GetFreq() );
          }
        }
      }
     else
      {
       LEM_STOPIT;
      } 
    }


   rdr.close();
   wordfreq_filename->DoRemove();

   GetIO().mecho().printf( "%vfAOK%vn\n" );
  }

 return;
}