Ejemplos de CStringVector en C++ (Cpp), ejemplos de CStringVector en C++ (Cpp)

Ejemplo n.º 1

0

Mostrar archivo

Archivo: tagger.cpp Proyecto: StevenLOL/zpar

bool CTagger::train( const CStringVector * sentence , const CTwoStringVector * correct) {
   ++m_nTrainingRound ;
   buildStateItem( sentence, correct, &m_goldState);
   // Updates that are common for all example
   for ( unsigned i=0; i<correct->size(); ++i ) {

      const CWord &word = correct->at(i).first ;
      unsigned long tag = CTag( correct->at(i).second ).code() ;

      static CStringVector chars;
      chars.clear();
      getCharactersFromUTF8String(correct->at(i).first, &chars);

      m_weights->m_mapWordFrequency[word]++;
      if (m_weights->m_mapWordFrequency[word]>m_weights->m_nMaxWordFrequency)
         m_weights->m_nMaxWordFrequency = m_weights->m_mapWordFrequency[word];

      m_weights->m_mapTagDictionary.add(word, tag);
      for ( unsigned j=0 ; j<chars.size() ; ++j ) {
         m_weights->m_mapCharTagDictionary.add(chars[j], tag) ;
      }

      if ( PENN_TAG_CLOSED[tag] ) {
           m_weights->m_mapCanStart.add(chars[0], tag);
      }

      if ( !m_weights->m_Knowledge ||
          (!m_weights->m_Knowledge->isFWorCD(chars[0])&&
           !m_weights->m_Knowledge->isFWorCD(chars[chars.size()-1])))
         m_weights->setMaxLengthByTag( tag , chars.size() ) ;
   }
   tag( sentence, NULL, NULL, 1, NULL );
   return m_bTrainingError;
}

Ejemplo n.º 2

0

Mostrar archivo

Archivo: tagger.cpp Proyecto: xiangyu/zpar-0.6

void CTagger::updateScores(const CTwoStringVector* tagged, const CTwoStringVector* correct, unsigned long round) {

    static int i , j ;

    if ( *tagged != *correct ) {
        for (i=0; i<tagged->size(); ++i)
            updateLocalFeatureVector(eSubtract, tagged, i, round);
        for (i=0; i<correct->size(); ++i)
            updateLocalFeatureVector(eAdd, correct, i, round);
    }

    if (round>m_nNumberOfCurrentTrainingExample) {
        //
        // Updates that are common for all example
        //
        m_nNumberOfCurrentTrainingExample = round;
        for (i=0; i<correct->size(); ++i) {
            CWord word = correct->at(i).first;
            CTag tag(correct->at(i).second);
            CStringVector chars;
            chars.clear();
            getCharactersFromUTF8String(correct->at(i).first, &chars);
            m_weights->m_mapWordFrequency[word]++;
            m_weights->m_mapTagDictionary.add(word, tag);
            if (m_weights->m_mapWordFrequency[word]>m_weights->m_nMaxWordFrequency) m_weights->m_nMaxWordFrequency = m_weights->m_mapWordFrequency[word];
            for ( j = 0 ; j < chars.size() ; ++j ) m_weights->m_mapCharTagDictionary.add(chars[j], tag) ;
        }
    }
}

Ejemplo n.º 3

0

Mostrar archivo

Archivo: service.cpp Proyecto: Groestlcoin/el

void ServiceController::Start(const CStringVector& ar) {
	LPCTSTR *p = 0;
	if (!ar.empty()) {
		p = (LPCTSTR*)alloca(ar.size()*sizeof(LPCTSTR));
		for (size_t i=0; i<ar.size(); ++i)
			p[i] = ar[i];
	}
	Win32Check(::StartService(m_handle, (DWORD)ar.size(), p));
}

Ejemplo n.º 4

0

Mostrar archivo

Archivo: main.cpp Proyecto: karuiwu/reparse

void process(const std::string &sInputFile, const std::string &sOutputFile, unsigned long nMaxSentSize) {
   CDoc2Snt doc2snt(sInputFile, nMaxSentSize);
   CSentenceWriter writer(sOutputFile);
   CStringVector sent;
   while (doc2snt.getSentence(sent)) {
      if (sent.size()>0 && sent.back()=="\n")
         sent.pop_back();
      writer.writeSentence(&sent, "");
      sent.clear();
   }
}

Ejemplo n.º 5

0

Mostrar archivo

Archivo: acl07.cpp Proyecto: fangyw/meishan-code

void CFeatureHandle::updateLocalFeatureVector(SCORE_UPDATE method, const CStringVector* outout, int index, int round) { 
   // abstd::cout words              
   CWord word = outout->at(index);
   CWord last_word = index>0 ? outout->at(index-1) : g_emptyWord;
   CTwoWords two_word;
   two_word.allocate(word.str(), last_word.str());
   CStringVector chars;
   chars.clear(); getCharactersFromUTF8String(word.str(), &chars);
   // abstd::cout length
   int length = getUTF8StringLength(word.str()); if (length > LENGTH_MAX-1) length = LENGTH_MAX-1;
   int last_length = getUTF8StringLength(last_word.str()); if (last_length > LENGTH_MAX-1) last_length = LENGTH_MAX-1;
   // abstd::cout chars  
   CWord first_char = getFirstCharFromUTF8String(word.str());
   CWord last_char = getLastCharFromUTF8String(word.str());
   CWord first_char_last_word = index>0 ? getFirstCharFromUTF8String(last_word.str()) : g_emptyWord;
   CWord last_char_last_word = index>0 ? getLastCharFromUTF8String(last_word.str()) : g_emptyWord;
   CWord two_char = index>0 ? last_char_last_word.str() + first_char.str() : g_emptyWord;
   CTwoWords first_and_last_char, lastword_firstchar, currentword_lastchar, firstcharlastword_word, lastword_lastchar;
   first_and_last_char.allocate(first_char.str(), last_char.str());
   if (index>0) {
      lastword_firstchar.allocate(last_word.str(), first_char.str());
      currentword_lastchar.allocate(word.str(), last_char_last_word.str());
      firstcharlastword_word.allocate(first_char_last_word.str(), first_char.str());
      lastword_lastchar.allocate(last_char_last_word.str(), last_char.str());
   }
   
   SCORE_TYPE amount = ( (method==eAdd) ? 1 : -1 ) ;

   m_weights.m_mapSeenWords.updateScore(word, amount, round);
   m_weights.m_mapLastWordByWord.updateScore(two_word, amount, round);
   if (length==1) m_weights.m_mapOneCharWord.updateScore(first_char, amount, round);
   else {
      m_weights.m_mapFirstAndLastChars.updateScore(first_and_last_char, amount, round);
      for (int j=0; j<chars.size()-1; j++) {
         m_weights.m_mapConsecutiveChars.updateScore(chars[j]+chars[j+1], amount, round);
      }
      m_weights.m_mapLengthByFirstChar.updateScore(std::make_pair(first_char, length), amount, round);
      m_weights.m_mapLengthByLastChar.updateScore(std::make_pair(last_char, length), amount, round);
   } 
   if (index>0) {
      m_weights.m_mapSeparateChars.updateScore(two_char, amount, round);
      
      m_weights.m_mapLastWordFirstChar.updateScore(lastword_firstchar, amount, round);
      m_weights.m_mapCurrentWordLastChar.updateScore(currentword_lastchar, amount, round);
      
      m_weights.m_mapFirstCharLastWordByWord.updateScore(firstcharlastword_word, amount, round);
      m_weights.m_mapLastWordByLastChar.updateScore(lastword_lastchar, amount, round);

      m_weights.m_mapLengthByLastWord.updateScore(std::make_pair(last_word, length), amount, round);
      m_weights.m_mapLastLengthByWord.updateScore(std::make_pair(word, last_length), amount, round);
   }
}

Ejemplo n.º 6

0

Mostrar archivo

Archivo: tagger.cpp Proyecto: fangyw/meishan-code

bool CTagger::train( const CStringVector * sentence_input , const CTwoStringVector * correct) {
   ++m_nTrainingRound ;
   static CStringVector sentence;
   m_weights->m_rules.record( correct, &sentence );
   buildStateItem( &sentence, correct, &m_goldState);
//   for (int i=0; i<sentence.size(); ++i)
//      std::cout << m_weights->m_rules.canSeparate(i) << std::endl;

   static unsigned total_size, local_size;
   total_size=0;
   // Updates that are common for all example
   for ( unsigned i=0; i<correct->size(); ++i ) {

      const CWord &word = correct->at(i).first ;
      unsigned long tag = CTag( correct->at(i).second ).code() ;

      static CStringVector chars;
      static unsigned j;
      chars.clear(); 
      getCharactersFromUTF8String(correct->at(i).first, &chars);
      local_size = chars.size();

      m_weights->m_mapWordFrequency[word]++;
      if (m_weights->m_mapWordFrequency[word]>m_weights->m_nMaxWordFrequency) 
         m_weights->m_nMaxWordFrequency = m_weights->m_mapWordFrequency[word];

      m_weights->m_mapTagDictionary.add(word, tag);
      for ( j=0 ; j<local_size; ++j ) {
         m_weights->m_mapCharTagDictionary.add(chars[j], tag) ;
      }

      if ( PENN_TAG_CLOSED[tag] || tag==PENN_TAG_CD ) {
           m_weights->m_mapCanStart.add(chars[0], tag);
      }

//      if ( !m_weights->m_Knowledge ||
//          (!m_weights->m_Knowledge->isFWorCD(chars[0])&&
//           !m_weights->m_Knowledge->isFWorCD(chars[chars.size()-1])))
      bool bNoSep=false;
      for ( j=total_size+1; j<total_size+local_size; ++j) 
         if (!m_weights->m_rules.canSeparate(j)) bNoSep = true;
      if (!bNoSep)
         m_weights->setMaxLengthByTag( tag , local_size ) ;

      total_size += chars.size();
   }
   work( &sentence, NULL, NULL, 1, NULL );
   return m_bTrainingError;
}

Ejemplo n.º 7

0

Mostrar archivo

Archivo: tools.cpp Proyecto: HackLinux/eMule-IS-Mod

CStringVector SplitString( const CString& strString, LPCTSTR szSplitter )
{
    CStringVector arrStrings;

    int curPos = 0;
    CString resToken = strString.Tokenize(szSplitter, curPos);

    while (resToken != _T(""))
    {
        arrStrings.push_back(resToken);
        resToken = strString.Tokenize(szSplitter, curPos);
    };     

    return arrStrings;
}

Ejemplo n.º 8

0

Mostrar archivo

Archivo: tagger.cpp Proyecto: StevenLOL/zpar

SCORE_TYPE CTagger::getGlobalScore(const CTwoStringVector* tagged) {

   static int i;

   static CStateItem item ;
   static CStringVector raw;

   static SCORE_TYPE rv; rv=0;

   raw.clear();
   for (i=0; i<tagged->size(); ++i)
      getCharactersFromUTF8String(tagged->at(i).first, &raw);

   buildStateItem( &raw, tagged, &item );
   for (i=0; i<tagged->size(); ++i)
      rv += getOrUpdateLocalScore(&raw, &item, i);

   return rv;
}

Ejemplo n.º 9

0

Mostrar archivo

Archivo: DBStepNC.cpp Proyecto: usnistgov/QIF

HRESULT CDBStepNC::InsertRow(CString table, CStringVector & columns, CStringVector & values)
{
    CCommand< CDynamicStringAccessor > sqlInsertCommand;
    int i;
    CString tszSQL;
    HRESULT hr;

    if(values.size() != columns.size())
        return E_INVALIDARG;
    if(values.size() == 0 ||  columns.size()==0)
        return E_INVALIDARG;

    tszSQL.Format("INSERT INTO %s (", table);
    for(i=0; i<columns.size(); i++)
    {
        if(i>0) tszSQL+=" ,";
        tszSQL.AppendFormat("%s", columns[i]);
    }
    tszSQL.AppendFormat(") VALUES (");
    for(i=0; i<values.size(); i++)
    {
        if(i>0) tszSQL+=" ,";
        tszSQL.AppendFormat("'%s'", values[i]);
    }
    tszSQL.AppendFormat(")");

    hr = sqlInsertCommand.Open( m_session, tszSQL );
    return hr;

}

Ejemplo n.º 10

0

Mostrar archivo

Archivo: PokerData.cpp Proyecto: zqrtalent/MercuryUI

int				
PokerData::SetUserAvatar(int nUserId, _String &sAvatarFileName, int nAvatarId /*= -1*/, bool bIsPrivateAvatar /*= true*/){
	if( nUserId <= 0 ) return 0;
	SetUserAvatarProc proc;
	proc.m_nAvatarId		= nAvatarId;
	proc.m_nUserId			= nUserId;
	proc.m_bIsPrivateAvatar	= bIsPrivateAvatar;

	int nRet = 0;
	if( m_pDataMan->ExecuteProc(&proc) && proc.m_nSuccess == 0 ){
		int nAvatarIdNew	= proc.m_nAvatarIdNew;
		nRet				= nAvatarIdNew;
		if( nAvatarId != nAvatarIdNew ){
			sAvatarFileName = GetAvatarFileName(nAvatarIdNew);

			// Update new avatars file name. {{
			PokerAvatar avatarNew;
			avatarNew.m_nId				= nAvatarIdNew;
			avatarNew.m_sAvatarFileName = sAvatarFileName;
			CStringVector arrDirtyFields;
			arrDirtyFields.push_back(_T("AVATAR_FILE_NAME"));
			if( m_pDataMan->SaveRecord(&avatarNew, &arrDirtyFields) ){
				/*
				PokerAvatar* pAvatarInfoNew			= new PokerAvatar();
				pAvatarInfoNew->m_nId				= proc.m_nAvatarIdNew;
				pAvatarInfoNew->m_nPrivateUserId	= nUserId;
				pAvatarInfoNew->m_sAvatarFileName	= sAvatarFileName;
				pAvatarInfoNew->m_recStatus			= 0;
				m_listAvatars.Add(pAvatarInfoNew);
				*/
				}
			// }}
			}
		else
			sAvatarFileName = GetAvatarFileName(nAvatarId);
		nAvatarIdNew = proc.m_nAvatarIdNew;
		}
	return nRet;
	}

Ejemplo n.º 11

0

Mostrar archivo

Archivo: agenda.cpp Proyecto: desilinguist/zpar-sandbox

void CSegmentor::segment(const CStringVector* sentence_input, CStringVector *vReturn, double *out_scores, int nBest) {
#ifdef DEBUG
   clock_t total_start_time = clock();;
#endif
   TRACE("Starting segmenting a sentence...");

   // turn the spaces in the input sentence into rules that separate corresponding characters
   static CStringVector sentence;
   static CRule rules(m_Feature->m_bRule); 
   rules.segment(sentence_input, &sentence); 
   const unsigned long length = sentence.size();

   assert(length<MAX_SENTENCE_SIZE);
   assert(vReturn!=NULL);
   vReturn->clear();

   // try to work std::cout the best item with the
   // correct outout reference param as NULL
   work(this, sentence, vReturn, out_scores, rules, NULL, nBest, -1);

   TRACE("total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC);
}

Ejemplo n.º 12

0

Mostrar archivo

Archivo: tools.cpp Proyecto: HackLinux/eMule-IS-Mod

// Получить пути к дочерним веткам реестра
CStringVector GetChildRegKeys( LPCTSTR szPath )
{
    HKEY hKey;
    if ( RegOpenKey(HKEY_LOCAL_MACHINE, szPath, &hKey) != ERROR_SUCCESS )
        return CStringVector();

    DWORD dwLargestKeySize = 0;
    if ( RegQueryInfoKey(hKey, 0,0,0,0,&dwLargestKeySize,0,0,0,0,0,0) != ERROR_SUCCESS )
        dwLargestKeySize = 1024;
    LPTSTR szKeyBuf = new TCHAR[dwLargestKeySize+2];

    CStringVector arrKeys;

    DWORD i=0;
    while ( RegEnumKey(hKey, i++, szKeyBuf, dwLargestKeySize+1) == ERROR_SUCCESS )
    {
        arrKeys.push_back(szKeyBuf);
    }

    RegCloseKey(hKey);
    delete[] szKeyBuf;

    return arrKeys;
}

Ejemplo n.º 13

0

Mostrar archivo

Archivo: agenda.cpp Proyecto: desilinguist/zpar-sandbox

void CSegmentor::train(const CStringVector* sentence_input, const CStringVector* correct, int & round) {
#ifdef DEBUG
   clock_t total_start_time = clock();;
#endif
   TRACE("Starting training using a sentence...");
   static CStringVector sentence;
   static CRule rules(m_Feature->m_bRule);
   rules.segment(sentence_input, &sentence);
   const unsigned long int length = sentence.size();

   assert(length<MAX_SENTENCE_SIZE);

   static std::vector<unsigned> correct_starts;
   static int word_length, word_index, char_length, char_index; // word_xxx are from correct, char_xxx from sentence

   char_index = 0;
   int count = 0; 
   correct_starts.clear();
   correct_starts.push_back(count);
   for (word_index=0; word_index<correct->size(); word_index++) {
      word_length = correct->at(word_index).size();
      char_length = 0; 
      while (char_length<word_length) {
         char_length += sentence[char_index++].size();
         count += 1;
      }
      assert(char_length==word_length);
      correct_starts.push_back(count);
   }

   // the main learning process with update
   work(this, sentence, 0, 0, rules, &correct_starts, 1, round);

   TRACE("Done");
   TRACE("total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC);
}

Ejemplo n.º 14

0

Mostrar archivo

Archivo: tagger.cpp Proyecto: StevenLOL/zpar

void CTagger::updateScores(const CTwoStringVector* tagged, const CTwoStringVector* correct, unsigned long round) {

   static int i , j ;
   static CStateItem item ;
   static CStringVector raw;

   if ( *tagged != *correct ) {

      // get raw sentence from tagged output
      raw.clear();
      for (i=0; i<tagged->size(); ++i)
         getCharactersFromUTF8String(tagged->at(i).first, &raw);

      buildStateItem( &raw, tagged, &item );
      for (i=0; i<tagged->size(); ++i)
         getOrUpdateLocalScore(&raw, &item, i, -1, round);


      buildStateItem( &raw, correct, &item );
      for (i=0; i<correct->size(); ++i)
         getOrUpdateLocalScore(&raw, &item, i, 1, round);

   }

   if ( round > m_nNumberOfCurrentTrainingExample ) {

      m_nNumberOfCurrentTrainingExample = round ;

      // Updates that are common for all example
      for ( i=0; i<correct->size(); ++i ) {

         const CWord &word = correct->at(i).first ;
         unsigned long tag = CTag( correct->at(i).second ).code() ;

         CStringVector chars;
         chars.clear();
         getCharactersFromUTF8String(correct->at(i).first, &chars);

         m_weights->m_mapWordFrequency[word]++;
         if (m_weights->m_mapWordFrequency[word]>m_weights->m_nMaxWordFrequency) m_weights->m_nMaxWordFrequency = m_weights->m_mapWordFrequency[word];

         m_weights->m_mapTagDictionary.add(word, tag);
         for ( j = 0 ; j < chars.size() ; ++j ) m_weights->m_mapCharTagDictionary.add(chars[j], tag) ;

         if ( !m_weights->m_Knowledge ||
             (!m_weights->m_Knowledge->isFWorCD(chars[0])&&!m_weights->m_Knowledge->isFWorCD(chars[chars.size()-1])))
         m_weights->setMaxLengthByTag( tag , chars.size() ) ;
      }

   }

}

Ejemplo n.º 15

0

Mostrar archivo

Archivo: DBStepNC.cpp Proyecto: usnistgov/QIF

// line 1 - TOOL (start of tool definition)
// line 2 - tool material (1-HSS, 2-CARBIDE, 3-COATED CAR, 4-CERAMIC, 5-BORZON, 10-UNKNOWN)
// line 3 - tool comment
// line 4 - tool name (geometry reference for backplot)
// line 5 - tool manufacturer
// line 6 - chuck designation
// line 7 - tool_no, tool_type, rad_type, dia, crad, thds, tip_angle,
//         dia_off, len_off, feed, plunge, retract, rpm, coolant, n_flutes
// line 8 - Drilling attributes (see tool_type in line 7 above)
// line 8 - cycle, peck1, peck2, peck_clr, chip_brk, dwell, shldr_angle, root_dia (tap), bore_shift
// line 8 - Milling attributes (see tool_type in line 7 above)
// line 8 - cut_able, rgh_x, rgh_z, fin_x, fin_z, tip_dia, root_dia (thd mill), thd_angle
// line 9 - pilot_dia, flute_len, oa_len, shldr_len, arbor_dia, hldr_dia, hldr_len, spindle_ccw, sfm, fpt, metric
HRESULT CDBStepNC::LoadMasterCAMTool(CString filename)
{
    CString contents = ReadAFile(filename);
    if(contents.GetLength()< 1)
        return E_INVALIDARG;
    CStringVector lines = CStringVector::Tokenize(contents, "\n");
    CStringVector columns;
    columns.push_back("toolid");
    columns.push_back("name");
    columns.push_back("tooltypeid");
    columns.push_back("materialid");
    columns.push_back("number_of_teeth");
    columns.push_back("hand_of_cut");
    columns.push_back("coolant_through_tool");
    columns.push_back("cutting_edge_length");
    columns.push_back("flute_length");
    columns.push_back("overall_length");
    columns.push_back("shoulder_length");
    columns.push_back("tip_diameter");
    columns.push_back("tool_tip_half_angle");
    columns.push_back("sfm");
    columns.push_back("fpt");
    columns.push_back("metric");

    CStringVector values;
    values.resize(16);
    for(int i=40, j=1; i< lines.size(); i+=10, j++)
    {
        CStringVector items1 = CStringVector::Tokenize(lines[i+1]," ");
        CStringVector items2 = CStringVector::Tokenize(lines[i+2]," ");
        CStringVector items3 = CStringVector::Tokenize(lines[i+3]," ");
        CStringVector items7 = CStringVector::Tokenize(lines[i+7]," ");
        CStringVector items8 = CStringVector::Tokenize(lines[i+8]," ");
        CStringVector items9 = CStringVector::Tokenize(lines[i+9]," ");
        values[0] =StrFormat("%d", j); //                  toolid
        values[1]=lines[i+3].Mid(lines[i+3].Find("-")+1).Trim();	// name
        values[2]=items7[3].Trim();	// tooltypeid
        values[3]=items2[2].Trim();	// materialid
        values[4]= items7[16].Trim();             //  number_of_teeth
        values[5]= (items9[9] ==1) ? "LEFT" : "RIGHT";    //  hand_of_cut
        values[6]= items7[15];             //  coolant_through_tool
        values[7]= items9[3];             //  cutting_edge_length
        values[8]= items9[3];             //  flute_length
        values[9]= items7[10];             //  overall_length
        values[10]= items9[5];             //  shoulder_length
        values[11]= items7[5];             //  tip_diameter
        values[12]= items7[8];             //  tool_tip_half_angle
        values[13]= items9[10];             //  sfm
        values[14]= items9[11];             //  fpt
        values[15]= items9[12].Trim();             //  metric
        InsertRow("milling_cutter", columns, values);
    }
    return S_OK;
}

Ejemplo n.º 16

0

Mostrar archivo

Archivo: tagger.cpp Proyecto: xiangyu/zpar-0.6

void CTagger :: updateLocalFeatureVector( SCORE_UPDATE method , const CTwoStringVector * sentence , unsigned long index , unsigned long round ) {
    // abstd::cout words
    CWord word = sentence->at( index ).first ;
    CWord last_word = index > 0 ? sentence->at( index - 1 ).first : g_emptyWord ;
    CWord next_word = index < sentence->size() - 1 ? sentence->at( index + 1 ).first : g_emptyWord ;
    CStringVector chars , last_chars ;
    chars.clear() ;
    getCharactersFromUTF8String( sentence->at(index).first , &chars ) ;
    last_chars.clear() ;
    if ( index > 0 ) getCharactersFromUTF8String( sentence->at( index - 1 ).first , &last_chars ) ;
    // abstd::cout length
    int length = chars.size() ; //if ( length > LENGTH_MAX-1 ) length = LENGTH_MAX-1 ;
    int last_length = last_chars.size() ; //if ( last_length > LENGTH_MAX-1 ) last_length = LENGTH_MAX-1 ;
    // abstd::cout chars
    CWord first_char = chars[ 0 ];
    CWord last_char = chars[ chars.size() - 1 ];
    CWord first_char_last_word = index > 0 ? last_chars[ 0 ] : g_emptyWord;
    CWord last_char_last_word = index > 0 ? last_chars[ last_chars.size() - 1 ] : g_emptyWord;
    CWord first_char_next_word = index + 1 < sentence->size() ? getFirstCharFromUTF8String( sentence->at( index + 1 ).first ) : g_emptyWord ;
    CWord last_twochar_last_word = last_chars.size() > 1 ? last_chars[ last_chars.size() - 2 ] + last_chars[ last_chars.size() - 1]
                                   : ( index > 1 ? getLastCharFromUTF8String(sentence->at(index-2).first) + last_chars[ 0 ] : g_emptyWord );
    CWord first_twochar = chars.size() > 1 ? chars[ 0 ] + chars [ 1 ] : ( index + 1 <sentence->size() ? chars[ 0 ] + getFirstCharFromUTF8String( sentence->at( index + 1 ).first ) : g_emptyWord );
    CWord currentword_lasttwochar = index > 1 ? last_twochar_last_word.str() + word.str() : g_emptyWord ;
    CWord lastword_firsttwochar = index > 0 && index+1 < sentence->size() ? last_word.str() + first_twochar.str() : g_emptyWord ;

    CWord two_char = index > 0 ? last_char_last_word.str() + first_char.str() : g_emptyWord ;
    CWord lastword_firstchar = index > 0 ? last_word.str() + first_char.str() : g_emptyWord ;
    CWord currentword_lastchar = index > 0 ? last_char_last_word.str() + word.str() : g_emptyWord ;
    CWord three_char = length == 1 ? last_char_last_word.str() + word.str() + first_char_next_word.str() : g_emptyWord ;

    CTwoWords two_word ;

    // abstd::cout tags
    const CTag tag( sentence->at(index).second ) ;
    const CTag last_tag = index > 0 ? CTag( sentence->at( index-1 ).second) : CTag::SENTENCE_BEGIN ;
    const CTag second_last_tag = index > 1 ? CTag( sentence->at( index-2 ).second) : CTag::SENTENCE_BEGIN ;
    const CTagSet<CTag, 2> tag_bigram(encodeTags(tag, last_tag));
    const CTagSet<CTag, 3> tag_trigram(encodeTags(tag, last_tag, second_last_tag));
    CTaggedWord<CTag, TAG_SEPARATOR> wt1, wt2;
    CTwoTaggedWords wt12;

    // abstd::cout the char categories
    long int first_char_cat = m_weights->m_mapCharTagDictionary.lookup(first_char) | (1<<tag.code()) ;
    long int last_char_cat = m_weights->m_mapCharTagDictionary.lookup(last_char) | (1<<tag.code()) ;
    SCORE_TYPE amount = method == eAdd ? 1 : -1 ;

    m_weights->m_mapCurrentTag[ std::make_pair(word, tag) ].updateCurrent( amount , round ) ;
    m_weights->m_mapLastTagByTag[ tag_bigram ].updateCurrent( amount , round ) ;
    m_weights->m_mapLastTwoTagsByTag[ tag_trigram ].updateCurrent( amount , round ) ;
    if ( index > 0 ) {
        if ( last_length <= 2 ) m_weights->m_mapTagByLastWord[ std::make_pair(last_word, tag) ].updateCurrent( amount , round ) ;
        if ( length <= 2 ) m_weights->m_mapLastTagByWord[ std::make_pair(word, last_tag) ].updateCurrent( amount , round ) ;
        if ( length <= 2 ) m_weights->m_mapTagByWordAndPrevChar[ std::make_pair(currentword_lastchar, tag) ].updateCurrent( amount , round ) ;
        if ( last_length <= 2 ) m_weights->m_mapTagByWordAndNextChar[ std::make_pair(lastword_firstchar, last_tag) ].updateCurrent( amount , round ) ;
    }
    if ( length == 1 ) {
        if ( index > 0 && index < sentence->size() - 1 )
            m_weights->m_mapTagOfOneCharWord[ std::make_pair(three_char, tag) ].updateCurrent( amount , round ) ;
    }
    else {
        m_weights->m_mapTagByFirstChar[ std::make_pair(first_char, tag) ].updateCurrent( amount , round ) ;
        m_weights->m_mapTagByLastChar[ std::make_pair(last_char, tag) ].updateCurrent( amount , round ) ;                    //
        m_weights->m_mapTagByFirstCharCat[ std::make_pair(first_char_cat, tag) ].updateCurrent( amount , round ) ;
        m_weights->m_mapTagByLastCharCat[ std::make_pair(last_char_cat, tag) ].updateCurrent( amount , round ) ;
        for ( int j = 0 ; j < chars.size() ; ++ j ) {
            if ( j > 0 && j < chars.size() - 1 )
                m_weights->m_mapTagByChar[ std::make_pair(CWord(chars[j]), tag) ].updateCurrent( amount , round ) ;
            if ( j > 0 ) {
                wt1.load(chars[j], tag);
                wt2.load(first_char);
                wt12.allocate(wt1, wt2);
                m_weights->m_mapTaggedCharByFirstChar[ wt12 ].updateCurrent( amount , round ) ;
                if ( chars[j] == chars[j-1] ) m_weights->m_mapRepeatedCharByTag[ std::make_pair(CWord(chars[j]), tag) ].updateCurrent( amount , round ) ; //
            }
            if (j<chars.size()-1) {
                wt1.load(chars[j], tag);
                wt2.load(last_char);
                wt12.allocate(wt1, wt2);
                m_weights->m_mapTaggedCharByLastChar[ wt12 ].updateCurrent(amount, round);
            }
        }
    }

}

Ejemplo n.º 17

0

Mostrar archivo

Archivo: agendachart.cpp Proyecto: karuiwu/reparse

void CSegmentor::segment(const CStringVector* sentence_input, CStringVector *vReturn, double *out_scores, int nBest) {
   clock_t total_start_time = clock();;
   const CStateItem *pGenerator, *pCandidate;
   CStateItem tempState;
   unsigned index;                              // the index of the current char
   unsigned j, k;                               // temporary index
   int subtract_score;                          // the score to be subtracted (previous item)
   static CStateItem best_bigram;
   int start_index;
   int word_length;
   int generator_index;

   static CStringVector sentence;
   static CRule rules(m_Feature->m_bRule);
   rules.segment(sentence_input, &sentence);
   const unsigned length = sentence.size();

   assert(length<MAX_SENTENCE_SIZE);
   assert(vReturn!=NULL);

   //clock_t start_time = clock();
   TRACE("Initialising the segmentation process...");
   vReturn->clear();
   clearWordCache(); 
   m_Chart.clear();

   tempState.clear();
   m_Chart[0]->insertItem(&tempState);

   TRACE("Segmenting started");
   for (index=0; index<length; index++) {

      // m_Chart index 1 correspond to the first char
      m_Chart[index+1];

      // control for the ending character of the candidate 
      if ( index < length-1 && rules.canSeparate(index+1)==false ) 
         continue ; 

      start_index = index-1 ; // the end index of last word
      word_length = 1 ; // current word length

      // enumerating the start index
      // ===========================
      // the start index of the word is actually start_index + 1
      while( start_index >= -1 && word_length <= MAX_WORD_SIZE ) {

         // control for the starting character of the candidate
         // ---------------------------------------------------
         while ( start_index >= 0 && rules.canSeparate(start_index+1)==false )
            start_index-- ; 

         // start the search process
         // ------------------------
         for ( generator_index = 0 ; generator_index < m_Chart[ start_index+1 ]->size() ; ++ generator_index ) {
            pGenerator = m_Chart[ start_index+1 ]->item( generator_index ) ;
            tempState.copy( pGenerator ) ;
            tempState.append( index ) ;
            tempState.m_nScore += m_Feature->getLocalScore( &sentence, &tempState, tempState.m_nLength-1 ) ;
            if (nBest==1) {
               if ( generator_index == 0 || tempState.m_nScore > best_bigram.m_nScore ) {
                  best_bigram.copy(&tempState);                                       //@@@
               }
            }
            else {
               m_Chart[ index+1 ]->insertItem( &tempState );
            }
         }
         if (nBest==1) {
            m_Chart[ index+1 ]->insertItem( &best_bigram );                  //@@@
         }                                                        //@@@

         // control the first character of the candidate
         if ( rules.canAppend(start_index+1)==false ) 
            break ; 

         // update start index and word len
         --start_index ;
         ++word_length ;

      }//start_index
   }
   // now generate outout sentence
   // n-best list will be stored in array
   // from the addr vReturn
   TRACE("Outputing sentence");
   for (k=0; k<nBest; ++k) {
      // clear
      vReturn[k].clear();
      if (out_scores!=NULL) 
         out_scores[k] = 0;
      // assign retval
      if (k<m_Chart[length]->size()) {
         pGenerator = m_Chart[length]->bestItem(k);
         for (j=0; j<pGenerator->m_nLength; j++) {
            std::string temp = "";
            for (unsigned l = pGenerator->getWordStart(j); l <= pGenerator->getWordEnd(j); ++l) {
               assert(sentence.at(l)!=" "); // [SPACE]
               temp += sentence.at(l);
            }
            vReturn[k].push_back(temp);
         }
         if (out_scores!=NULL)
            out_scores[k] = pGenerator->m_nScore;
      }
   }
   TRACE("Done, the best score: " << pGenerator->m_nScore);
   TRACE("total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC);
}

Ejemplo n.º 18

0

Mostrar archivo

Archivo: acl07.cpp Proyecto: fangyw/meishan-code

void CSegmentor::segment(const CStringVector* sentence_input, CStringVector *vReturn, double *out_scores, int nBest) {
   clock_t total_start_time = clock();;
   CStateItem *pGenerator, *pCandidate;
   unsigned index;                              // the index of the current char
   unsigned j, k;                               // temporary index
   int subtract_score;                          // the score to be subtracted (previous item)
   static unsigned doneLastWord[MAX_SENTENCE_SIZE];

   static CStringVector sentence;
   static CRule rules(m_Feature->m_bRule);
   rules.segment(sentence_input, &sentence);
   const unsigned length = sentence.size();

   if (length > MAX_SENTENCE_SIZE) {
      std::cerr << "The size of the sentence is " << length << " characters, which is larger than the limit of the system (" << MAX_SENTENCE_SIZE <<std::endl;
      vReturn->clear();
      return;
   }
   assert(vReturn!=NULL);

   //clock_t start_time = clock();
   TRACE("Initialising the segmentation process...");
   vReturn->clear();
   clearWordCache(); 
   m_Agenda->clear();
   pCandidate = m_Agenda->candidateItem();      // make the first item
   pCandidate->clear();                         // restore state using clean
   m_Agenda->pushCandidate();                   // and push it back
   m_Agenda->nextRound();                       // as the generator item
   if (nBest == 1)                              // optimization for one best
      for (j=0; j<MAX_SENTENCE_SIZE; ++j) doneLastWord[j] = 0;

   TRACE("Segmenting started");
   //TRACE("initialisation time: " << clock() - start_time);
   for (index=0; index<length; index++) {
      // generate new state itmes for each character
      pGenerator = m_Agenda->generatorStart();
      for (j=0; j<m_Agenda->generatorSize(); ++j) {
         // 1. generate new items according to each previous item. 
         if (pGenerator->m_nLength>0) k = pGenerator->getWordStart(pGenerator->m_nLength-1);
         // If we only ask 1-best, then we take only the best among those with the last word
         if ( ( nBest > 1 || pGenerator->m_nLength==0 || doneLastWord[k]<index+1 ) && 
              rules.canSeparate( index ) 
            ) {  
            pCandidate = m_Agenda->candidateItem();
            pCandidate->copy(pGenerator);
            pCandidate->append(index);
            pCandidate->m_nScore += m_Feature->getLocalScore(&sentence, pCandidate, pCandidate->m_nLength-1); 
            m_Agenda->pushCandidate();
            if (nBest == 1 && pGenerator->m_nLength>0) doneLastWord[k] = index+1;
         }
         // 2. generate by replacing items
         if ( index > 0 && rules.canAppend(index) ) {
            pCandidate = m_Agenda->candidateItem();
            pCandidate->copy(pGenerator);
            subtract_score = m_Feature->getLocalScore(&sentence, pGenerator, pGenerator->m_nLength-1);
            pCandidate->m_nScore -= subtract_score;
            pCandidate->replace(index);
            pCandidate->m_nScore += m_Feature->getLocalScore(&sentence, pCandidate, pCandidate->m_nLength-1);
            m_Agenda->pushCandidate();
         }
         pGenerator = m_Agenda->generatorNext();  // next generator
      }
      m_Agenda->nextRound(); // move round
   }
   // now generate outout sentence
   // n-best list will be stored in array
   // from the addr vReturn
   TRACE("Outputing sentence");
   for (k=0; k<nBest; ++k) {
      // clear
      vReturn[k].clear();
      if (out_scores!=NULL) 
         out_scores[k] = 0;
      // assign retval
      if (k<m_Agenda->generatorSize()) {
         pGenerator = m_Agenda->generator(k);
         for (j=0; j<pGenerator->m_nLength; j++) {
            std::string temp = "";
            for (unsigned l = pGenerator->getWordStart(j); l <= pGenerator->getWordEnd(j); ++l) {
               assert(sentence.at(l)!=" "); // [SPACE]
               temp += sentence.at(l);
            }
            vReturn[k].push_back(temp);
         }
         if (out_scores!=NULL)
            out_scores[k] = pGenerator->m_nScore;
      }
   }
   TRACE("Done, the best score: " << pGenerator->m_nScore);
   TRACE("total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC);
}

Ejemplo n.º 19

0

Mostrar archivo

Archivo: tagger.cpp Proyecto: StevenLOL/zpar

void CTagger::tag( const CStringVector * sentence_input , CTwoStringVector * vReturn , SCORE_TYPE * out_scores , unsigned long nBest , const CBitArray * prunes ) {
   clock_t total_start_time = clock();;
   int temp_index;
   const CSubStateItem *pGenerator;
   CSubStateItem tempState;
   int j, k;
   unsigned tag;
   unsigned index, last_tag;

   static CSubStateItem uniqueItems[AGENDA_SIZE];
   unsigned long uniqueIndex;
   static bool bUnique;
//   unsigned long long uniqueMarkup;
//   assert(CTag::COUNT<=sizeof(unsigned long long)*8);

   static CStringVector sentence;
   static CRule rules(m_weights->m_bSegmentationRules);
   rules.segment(sentence_input, &sentence);
   const unsigned length=sentence.size();

   static CSubStateItem goldState;
   goldState.clear();

   TRACE("Initialising the tagging process...");
   m_WordCache.clear();
   tempState.clear();
   m_Agenda.clear();
   m_Agenda.pushCandidate(&tempState);
   m_Agenda.nextRound();

   TRACE("Tagging started");
   //TRACE("initialisation time: " << clock() - start_time);
   for (index=0; index<length; index++) {

      // decide correction
      if ( m_bTrain ) {
         static bool bAnyCorrect;
         bAnyCorrect = false;
         pGenerator = m_Agenda.generatorStart();
         for (j=0; j<m_Agenda.generatorSize(); ++j) {
            if ( *pGenerator == goldState ) bAnyCorrect = true;
            pGenerator = m_Agenda.generatorNext();  // next generator
         }
         if ( !bAnyCorrect ) {
            TRACE("Training error at character " << index);
            pGenerator = m_Agenda.bestGenerator();
            updateScoreForState(&sentence, pGenerator, -1);
            updateScoreForState(&sentence, &goldState, 1);
            m_bTrainingError = true;
            return;
         }
      }

      // 2. generate by replacing items
      if ( index > 0 ) {
         pGenerator = m_Agenda.generatorStart();
         for (j=0; j<m_Agenda.generatorSize(); ++j) {
            assert(pGenerator->size()>0);
            if ( ( rules.canAppend(index) ) && // ( index > 0 ) &&
                 pGenerator->getWordLength(pGenerator->size()-1) <
                    m_weights->m_maxLengthByTag[pGenerator->getTag(pGenerator->size()-1).code()]
               ) {
               tempState.copy(pGenerator);
               tempState.replaceIndex(index);
               tempState.score += getOrUpdateAppendScore(&sentence, &tempState, tempState.size()-1, index);
               if (index+1==length) tempState.score += getOrUpdateSeparateScore(&sentence, &tempState, tempState.size());
               m_Agenda.pushCandidate(&tempState);
            } // if
            pGenerator = m_Agenda.generatorNext();  // next generator
         }
      }

   //_
   // 1. generate new items according to each previous item.
   // iterate postags
      for (tag=CTag::FIRST; tag<CTag::COUNT; ++tag) {

         pGenerator = m_Agenda.generatorStart();
//         uniqueMarkup=0;
         uniqueIndex=0;

         for (j=0; j<m_Agenda.generatorSize(); ++j) {

            last_tag = pGenerator->size()==0 ? CTag::SENTENCE_BEGIN : pGenerator->getTag(pGenerator->size()-1).code();

            if ( rules.canSeparate( index ) &&
                (index == 0 || canAssignTag( m_WordCache.find( pGenerator->getWordStart(pGenerator->size()-1), index-1, &sentence ), last_tag )) && // last word
                 canStartWord(sentence, tag, index) // word
               ) {

               tempState.copy(pGenerator);
               tempState.append(index, tag);
               tempState.score += getOrUpdateSeparateScore(&sentence, &tempState, tempState.size()-1);
               if (index+1==length) tempState.score += getOrUpdateSeparateScore(&sentence, &tempState, tempState.size());

               if (nBest==1) {
//                  if ( ((uniqueMarkup&(1LL<<last_tag))==0LL) || uniqueItems[last_tag].score < tempState.score ) {
//                     uniqueMarkup |= (1LL<<last_tag);
//                     uniqueItems[last_tag].copy(&tempState);
//                  }
                  bUnique = true;
                  for (temp_index=0; temp_index<uniqueIndex; ++temp_index) {
                     // only one new when index=zero.
                     assert(index>0&&uniqueItems[temp_index].size()>1);
                     if (uniqueItems[temp_index].getTag(uniqueItems[temp_index].size()-2) == tempState.getTag(tempState.size()-2) &&
                         uniqueItems[temp_index].getWordStart(uniqueItems[temp_index].size()-2) == tempState.getWordStart(tempState.size()-2)
                        ) {
                        bUnique = false;
                        if (uniqueItems[temp_index].score < tempState.score )
                           uniqueItems[temp_index].copy(&tempState);
                     }//if
                  }//for
                  if (bUnique) {
                     uniqueItems[uniqueIndex++].copy(&tempState);
                  }//if
               }
               else {
                  m_Agenda.pushCandidate(&tempState);
               }
            }
            pGenerator = m_Agenda.generatorNext();  // next generator
         }
         // push candidates
         if (nBest == 1) {
//            for (last_tag=0; last_tag<CTag::COUNT; ++last_tag) {
//               if ( (uniqueMarkup&(1LL<<last_tag)) )
//                  m_Agenda.pushCandidate(&(uniqueItems[last_tag]));
//            }
           for (temp_index=0; temp_index<uniqueIndex; ++temp_index) {
              m_Agenda.pushCandidate(&(uniqueItems[temp_index]));
           }//for
         }
      }//tag

      m_Agenda.nextRound(); // move round
      if (m_bTrain) goldState.follow(m_goldState);
   }

   if ( m_bTrain && 1 ) {
      pGenerator = m_Agenda.bestGenerator();
      if ( *pGenerator != goldState ) {
         TRACE("Training error at the last word");
         updateScoreForState(&sentence, pGenerator, -1);
         updateScoreForState(&sentence, &goldState, 1);
         m_bTrainingError = true;
      }
      m_bTrainingError = false;
      return;
   }
   TRACE("Outputing sentence");
   vReturn->clear();
   if (nBest == 1) {
      generate( m_Agenda.bestGenerator() , &sentence , this , vReturn ) ;
      if (out_scores) out_scores[ 0 ] = m_Agenda.bestGenerator( )->score ;
   }
   else {
      m_Agenda.sortGenerators();
      for ( temp_index = 0 ; temp_index < nBest ; ++ temp_index ) {
         vReturn[ temp_index ].clear() ;
         if (out_scores) out_scores[ temp_index ] = 0 ;
         if ( temp_index < m_Agenda.generatorSize() ) {
            generate( m_Agenda.generator( temp_index ) , &sentence , this , &(vReturn[ temp_index ]) ) ;
            if (out_scores) out_scores[ temp_index ] = m_Agenda.bestGenerator( )->score ;
         }
      }
   }
   TRACE("Done, the highest score is: " << m_Agenda.bestGenerator()->score) ;
   TRACE("The total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC) ;
}

Ejemplo n.º 20

0

Mostrar archivo

Archivo: tagger.cpp Proyecto: StevenLOL/zpar

void CTagger::tag( const CStringVector * sentence_input , CTwoStringVector * vReturn , SCORE_TYPE * out_scores , unsigned long nBest , const CBitArray * prunes ) {
   clock_t total_start_time = clock();;
   int index , start_index , generator_index , temp_index, word_length;
   const CStateItem * generator_item ;
   CStateItem *candidate_item , tempState , maxState ;
   static CStateItem best_bigram[ 1<<CTag::SIZE ] ;
   unsigned long long best_bigram_mask = 0; // and the count
   unsigned long tag, last_tag ;

   static CStringVector sentence;
   static CRule rules(m_weights->m_bSegmentationRules);
   rules.segment(sentence_input, &sentence);
   const int length = sentence.size() ;

   if (length>=m_nMaxSentSize)
      THROW("the length of the sentence is bigger than the maximum sentence size "<<m_nMaxSentSize<<"; try changing the option");

   assert(vReturn!=NULL);

   TRACE("Initialising the tagging process...");
   m_WordCache.clear() ;
   m_Chart.clear() ;
   // put an empty sentence to the beginning
   tempState.clear() ;
   m_Chart[ 0 ]->insertItem( &tempState ) ;

   TRACE("Tagging started");
   // enumerating the end index
   // =========================
   // index is the word index starting from 0
   for ( index = 0 ; index < length ; ++ index ) {

      // m_Chart index 1 correspond to the first char
      m_Chart[ index + 1 ] ; // this is to make some necessary initialisation for each agenda, when pruning

      // control for the ending character of the candidate
      if ( index < length-1 && rules.canSeparate(index+1)==false )
         continue ;

      // enumerating the possible tags
      // =============================
      // the tag 0 is the NONE tag, and tag 1 is the BEGIN tag
      for ( tag = CTag::FIRST ; tag <= CTag::LAST ; ++ tag ) {

         start_index = index-1 ; // the end index of last word
         word_length = 1 ; // current word length

         // enumerating the start index
         // ===========================
         // the start index of the word is actually start_index + 1
         while( start_index >= -1 && word_length <= m_weights->m_maxLengthByTag[ tag ] ) {

            // control for the starting character of the candidate
            // ---------------------------------------------------
            while ( start_index >= 0 && rules.canSeparate(start_index+1)==false )
               start_index-- ;

            // start the search process
            // ------------------------
            // with pruning
            if (  ( prunes==NULL || prunes->isset( ( start_index+1 ) * m_nMaxSentSize + index ) ) && // not pruned
                  (  (  m_weights->m_mapWordFrequency.find( m_WordCache.find( start_index+1 , index , &sentence ) , 0 ) <
                        m_weights->m_nMaxWordFrequency/5000+5 &&
                        PENN_TAG_CLOSED[ tag ] == false  ) ||
                     m_weights->m_mapTagDictionary.lookup( m_WordCache.find( start_index+1 , index , &sentence ), tag )
                  ) // wordtag match
               ) {

               if (nBest==1) best_bigram_mask=0LL;

               for ( generator_index = 0 ; generator_index < m_Chart[ start_index+1 ]->size() ; ++ generator_index ) {
                  generator_item = m_Chart[ start_index+1 ]->item( generator_index ) ;
                  tempState.copy( generator_item ) ;
                  tempState.append( index , tag ) ;
                  tempState.score += getOrUpdateLocalScore( &sentence , &tempState , tempState.size()-1 ) ;
                  if (nBest==1) {
                     last_tag = tempState.size()>1 ? tempState.getTag(tempState.size()-2).code() : CTag::SENTENCE_BEGIN;
                     if ( ((best_bigram_mask&(1LL<<last_tag))==0LL) || best_bigram[last_tag].score < tempState.score ) {
                        best_bigram_mask|=(1LL<<last_tag);
                        best_bigram[last_tag].copy(&tempState);
                     }
                  }
                  else {
                     m_Chart[ index+1 ]->insertItem( &tempState );
                  }
               }
               if (nBest==1) {
                  for ( last_tag=0; last_tag<CTag::COUNT; ++last_tag ) {
                     if ( (best_bigram_mask&(1LL<<last_tag)) )
                        m_Chart[ index+1 ]->insertItem( &(best_bigram[last_tag]) );
                  }
               }
            }//if

            // control the first character of the candidate
            if ( rules.canAppend(start_index+1)==false )
               break ;

            // update start index and word len
            --start_index ;
            ++word_length ;

         }//start_index
      }//tag
   }//index

   TRACE("Outputing sentence");
   for ( temp_index = 0 ; temp_index < nBest ; ++ temp_index ) {
      vReturn[ temp_index ].clear() ;
         if (out_scores) out_scores[ temp_index ] = 0 ;
      if ( temp_index < m_Chart[length]->size() ) {
         generate( m_Chart[ length ]->bestItem( temp_index ) , &sentence , this , &(vReturn[ temp_index ]) ) ;
         if (out_scores) out_scores[ temp_index ] = m_Chart[ length ]->bestItem( temp_index )->score ;
      }
   }
   TRACE("Done, the highest score is: " << m_Chart[ length ]->bestItem( 0 )->score) ;
   TRACE("The total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC) ;
}

Ejemplo n.º 21

0

Mostrar archivo

Archivo: agenda.cpp Proyecto: desilinguist/zpar-sandbox

bool work(CSegmentor *segmentor, const CStringVector &sentence, CStringVector *vReturn, double *out_scores, CRule &rules, std::vector<unsigned> *correct_starts, unsigned nBest, int round) {
   static CStateItem lattice[(MAX_SENTENCE_SIZE+2)*BEAM_SIZE];
   static CStateItem *lattice_index[MAX_SENTENCE_SIZE+2];
   static const CStateItem *pGenerator, *pBestGen;
   static const CStateItem *correct, *temp;
   static int index, temp_index;                       // the index of the current char
   static unsigned long int doneWordRnd[MAX_SENTENCE_SIZE];  // mask whether candidate with the last word has been cached
   static unsigned long int doneWordLink[MAX_SENTENCE_SIZE]; // link to the corresponding cache state item from word_length + 1
   static CScoredAct doneWordItems[BEAM_SIZE]; 
   static int doneItemPointer; 
   static unsigned correct_word;
   static bool correct_append;
   static unsigned long word_length;
   static bool bCompatible; 
   const int length = sentence.size();
   static CAgendaSimple<CScoredAct> beam(BEAM_SIZE);
   static CScoredAct action;
   static const CStateItem *best[BEAM_SIZE];
   static unsigned nBestGen;

   //clock_t start_time = clock();
   TRACE("Initialising the decoding process...");
   segmentor->clearWordCache(); 

   lattice[0].clear();
   lattice_index[0] = lattice;
   lattice_index[1] = lattice+1;

   if (correct_starts) {
      correct = lattice;                             
      correct_word=0;
      correct_append=false;
   }

   if (nBest == 1) // optimization for one best
      memset(doneWordRnd, 0, MAX_SENTENCE_SIZE*sizeof(doneWordRnd[0]));

   TRACE("Decoding started");
   // index is character index and lattice index shifts 1 right
   for (index=0; index<length; ++index) {

      lattice_index[index+2] = lattice_index[index+1];
         
      // generate new state itmes for each character
      beam.clear();

      doneItemPointer = 0;

      for (pGenerator=lattice_index[index]; pGenerator!=lattice_index[index+1]; ++pGenerator) { // for each generator

         // 1. generate new items according to each previous item. 
         if ( rules.canSeparate( index ) ) {  
            action.load(pGenerator, false, getOrUpdateSeparateScore(segmentor, &sentence, pGenerator));
            if ( nBest == 1 ) {
               word_length = pGenerator->getWordLength();
               if ( doneWordRnd[word_length] < index+1 ) {
                  doneWordLink[word_length] = doneItemPointer;   // doneWordLink[i] caches the last word with length i+1
                  doneWordItems[doneItemPointer]=action; // copy item to cache.
                  ++doneItemPointer;
                  doneWordRnd[word_length] = index+1;
               }
               else {
                  assert(doneWordRnd[word_length] == index+1);
                  if ( action > doneWordItems[doneWordLink[word_length]] )
                     doneWordItems[doneWordLink[word_length]]=action;
               }
            }
            else {
               beam.insertItem(&action);
            }
         }

         // 2. generate by replacing items
         if ( index > 0 && rules.canAppend(index) ) {
            action.load(pGenerator, true, getOrUpdateAppendScore(segmentor, &sentence, pGenerator, index-1));
            beam.insertItem(&action);
         }

      }

      // 3. recollect the items for separate
      if (nBest == 1) {
         for (temp_index = 0; temp_index<doneItemPointer; ++temp_index) {
            beam.insertItem(&doneWordItems[temp_index]);
         }
      }

      // build new items in decode
      if (correct_starts) {
         bCompatible = false;
         if (index==correct_starts->at(correct_word)) {
            correct_append = false;
            ++correct_word;
         }
         else {
            assert(correct_word==correct_starts->size()||index<correct_starts->at(correct_word));
            correct_append = true;
         }
         pBestGen = 0;
      }
      for (temp_index=0; temp_index<beam.size(); ++temp_index) {
         pGenerator = beam.item(temp_index)->item;
         if (beam.item(temp_index)->append)
            pGenerator->append(lattice_index[index+2]);
         else
            pGenerator->separate(lattice_index[index+2]);
         lattice_index[index+2]->score = beam.item(temp_index)->score;
         if (correct_starts) {
            if (pBestGen==0 || lattice_index[index+2]->score > pBestGen->score)
               pBestGen = lattice_index[index+2];
            if (correct == pGenerator && correct_append == beam.item(temp_index)->append) {
               bCompatible = true;
               correct = lattice_index[index+2];
            }
         }
         ++lattice_index[index+2];
      }
         
      // update scores if none from the agenda is correct state.
      if (correct_starts && !bCompatible) {
         TRACE("Decoding error, updating the weight std::vector");
         if (correct_append)
            correct->append(lattice_index[index+2]);
         else
            correct->separate(lattice_index[index+2]);
         updateScoreVectorForStates(segmentor, &sentence, pBestGen, lattice_index[index+2], round);
         return false;
      }

   }

   // a final step adding the last separate score for items. 
   beam.clear();
   for (pGenerator=lattice_index[length]; pGenerator!=lattice_index[length+1]; ++pGenerator) { 
      action.load(pGenerator, false, getOrUpdateSeparateScore(segmentor, &sentence, pGenerator));
      beam.insertItem(&action);
   }
   beam.sortItems(); // sort final items
   nBestGen = beam.size();
   for (temp_index=0; temp_index<nBestGen; ++temp_index) {
      best[temp_index] = beam.item(temp_index)->item;
   }

   if (correct_starts) {
      assert(bCompatible);
      if (correct!=best[0]) {
         TRACE("Decoding error, updating the weight std::vector");
         updateScoreVectorForStates(segmentor, &sentence, best[0], correct, round);
         return false;
      }
   }

   TRACE("Decoding finished");

   // now generate outout sentence
   // n-best list will be stored in array
   if (!correct_starts){
      TRACE("Outputing sentence");
      for ( index=0; index<std::min(nBest, nBestGen); ++index ) {
         // clear
         vReturn[index].clear();
         if ( out_scores ) out_scores[index] = 0;
         // assign retval
         static unsigned count;
         static unsigned start;
         count = 0;
         temp = best[index];
         while (!temp->empty()) {
            ++count;
            temp = temp->prev();
         }
         vReturn[index].resize(count);
         --count;
         temp = best[index];
         while (!temp->empty()) {
            for (temp_index=temp->getWordStart(); temp_index<=temp->getWordEnd(); ++temp_index) {
               vReturn[index].at(count) += sentence.at(temp_index);
            }
            --count;
            temp = temp->prev();
         }
         if ( out_scores!=NULL )
            out_scores[index] = best[index]->score;
      }
   }
   return true;
}