Ejemplos de CStateItem en C++ (Cpp), ejemplos de CStateItem en C++ (Cpp)

Ejemplo n.º 1

0

Mostrar archivo

Archivo: depparser.cpp Proyecto: desilinguist/zpar-sandbox

void CDepParser::updateScoresForStates( const CStateItem *outout , const CStateItem *correct , SCORE_TYPE amount_add, SCORE_TYPE amount_subtract ) {

   // do not update those steps where they are correct
   static CStateItem item(&m_lCache);
   static unsigned action, correct_action;
   item.clear();
   while ( item != *outout ) {
      action = item.FollowMove( outout );
      correct_action = item.FollowMove( correct );
      if ( action == correct_action )
         item.Move( action );
      else break;
   }

   // for the necessary information for the correct and outout parsetree
   updateScoreForState( item, correct , amount_add ) ;
   updateScoreForState( item, outout , amount_subtract ) ;

   m_nTotalErrors++;
}

Ejemplo n.º 2

0

Mostrar archivo

Archivo: conparser.cpp Proyecto: SUTDNLP/ZPar

void CConParser::work(const CTwoStringVector &sentence, const CSentenceParsed &correct, CCoNLLOutput *o_conll){
	const int length = sentence.size();
	static int tmp_i,tmp_j;
	static CAction correct_action;
	m_lCache.clear();
	m_lWordLen.clear();
	for (tmp_i=0; tmp_i<length; tmp_i++ ) {
		m_lCache.push_back( CTaggedWord<CTag, TAG_SEPARATOR>(sentence[tmp_i].first , sentence[tmp_i].second) );
		m_lWordLen.push_back( getUTF8StringLength(sentence[tmp_i].first) );
	}
	std::vector<CStateItem> p(MAX_SENTENCE_SIZE*(2+UNARY_MOVES)+2);
	CStateItem *correctState = &p[0];
	//getLabeledBrackets(correct, correctState->gold_lb);
	correctState->clear();
	correctState->words = (&m_lCache);
	tmp_i = 1;
	while(true){
		correctState->StandardMove(correct, correct_action);
		//std::cerr<<correct_action<<std::endl;
		correctState->Move(&p[tmp_i],correct_action);
		correctState = &p[tmp_i];
		tmp_i ++;
		if (correctState == 0 || correctState->IsTerminated()) break; // while
	}

   correctState->GenerateStanford(sentence, o_conll);
	p.clear();
}

Ejemplo n.º 3

0

Mostrar archivo

Archivo: depparser.cpp Proyecto: fangyw/meishan-code

void
CDepParser::extract_features(const CDependencyParse &input0, const CDependencyParse &input1) {
  CStateItem item;
  unsigned action;
  CPackedScoreType<SCORE_TYPE, action::kMax> empty;

  // word and pos
  m_lCache.clear();
#ifdef LABELED
  m_lCacheLabel0.clear();
  m_lCacheLabel1.clear();
#endif

  for (int i = 0; i < input0.size(); ++ i) {
    m_lCache.push_back(CTaggedWord<CTag, TAG_SEPARATOR>(input0[i].word,
                                                        input0[i].tag));

#ifdef LABELED
    m_lCacheLabel0.push_back(CDependencyLabel(input0[i].label));
    m_lCacheLabel1.push_back(CDependencyLabel(input1[i].label));
#endif
  }

  // make standard item
  item.clear(); item.len_ = input0.size();
  for (int i = 0; i < input0.size() * 4; ++ i) {
    unsigned action = item.StandardMove(input0, input1
#ifdef LABELED
        , m_lCacheLabel0, m_lCacheLabel1
#endif
        );

    GetOrUpdateStackScore(&item, empty, action, 1, 1);
    item.Move(item.nextactionindex(), action);
  }
}

Ejemplo n.º 4

0

Mostrar archivo

Archivo: depparser.cpp Proyecto: desilinguist/zpar-sandbox

void CDepParser::work( const bool bTrain , const CTwoStringVector &sentence , CDependencyParse *retval , const CDependencyParse &correct , int nBest , SCORE_TYPE *scores ) {

#ifdef DEBUG
   clock_t total_start_time = clock();
#endif
   static int index;
   const int length = sentence.size() ; 

   const CStateItem *pGenerator ;
   static CStateItem pCandidate(&m_lCache) ;

   // used only for training
   static bool bCorrect ;  // used in learning for early update
   static bool bContradictsRules;
   static CStateItem correctState(&m_lCache) ;
   static CPackedScoreType<SCORE_TYPE, action::MAX> packed_scores;

   ASSERT(length<MAX_SENTENCE_SIZE, "The size of the sentence is larger than the system configuration.");

   TRACE("Initialising the decoding process...") ;
   // initialise word cache
   bContradictsRules = false;
   m_lCache.clear();
   for ( index=0; index<length; ++index ) {
      m_lCache.push_back( CTaggedWord<CTag, TAG_SEPARATOR>(sentence[index].first , sentence[index].second) );
      // filter std::cout training examples with rules
      if (bTrain && m_weights->rules()) {
         // the root
         if ( correct[index].head == DEPENDENCY_LINK_NO_HEAD && canBeRoot(m_lCache[index].tag.code())==false) {
            TRACE("Rule contradiction: " << m_lCache[index].tag.code() << " can be root.");
            bContradictsRules = true;
         }
         // head left
         if ( correct[index].head < index && hasLeftHead(m_lCache[index].tag.code())==false) {
            TRACE("Rule contradiction: " << m_lCache[index].tag.code() << " has left head.");
            bContradictsRules = true;
         }
         // head right
         if ( correct[index].head > index && hasRightHead(m_lCache[index].tag.code())==false) {
            TRACE("Rule contradiction: " << m_lCache[index].tag.code() << " has right head.");
            bContradictsRules = true;
         }
      }
   }

   // initialise agenda
   m_Agenda->clear();
   pCandidate.clear();                          // restore state using clean
   m_Agenda->pushCandidate(&pCandidate);           // and push it back
   m_Agenda->nextRound();                       // as the generator item
   if (bTrain) correctState.clear();

   // verifying supertags
   if (m_supertags) {
      ASSERT(m_supertags->getSentenceSize()==length, "Sentence size does not match supertags size");
   }

#ifdef LABELED
   unsigned long label;
   m_lCacheLabel.clear();
   if (bTrain) {
      for (index=0; index<length; ++index) {
         m_lCacheLabel.push_back(CDependencyLabel(correct[index].label));
         if (m_weights->rules() && !canAssignLabel(m_lCache, correct[index].head, index, m_lCacheLabel[index])) {
            TRACE("Rule contradiction: " << correct[index].label << " on link head " << m_lCache[correct[index].head].tag.code() << " dep " << m_lCache[index].tag.code());
            bContradictsRules = true;
         }
      }
   }
#endif

   // skip the training example if contradicts
   if (bTrain && m_weights->rules() && bContradictsRules) {
      std::cout << "Skipping training example because it contradicts rules..." <<std::endl;
      return;
   }

   TRACE("Decoding started"); 
   // loop with the next word to process in the sentence
   for (index=0; index<length*2; ++index) {
      
      if (bTrain) bCorrect = false ; 

      // none can this find with pruning ???
      if (m_Agenda->generatorSize() == 0) {
         WARNING("parsing failed"); 
         return;
      }

      pGenerator = m_Agenda->generatorStart();
      // iterate generators
      for (int j=0; j<m_Agenda->generatorSize(); ++j) {

         // for the state items that already contain all words
         m_Beam->clear();
         packed_scores.reset();
         getOrUpdateStackScore( pGenerator, packed_scores, action::NO_ACTION );
         if ( pGenerator->size() == length ) {
            assert( pGenerator->stacksize() != 0 );
            if ( pGenerator->stacksize()>1 ) {
#ifdef FRAGMENTED_TREE
               if (pGenerator->head(pGenerator->stacktop()) == DEPENDENCY_LINK_NO_HEAD)
                  poproot(pGenerator, packed_scores);
               else
#endif
               reduce(pGenerator, packed_scores) ;
            }
            else {
               poproot(pGenerator, packed_scores); 
            }
         }
         // for the state items that still need more words
         else {  
            if ( !pGenerator->afterreduce() ) { // there are many ways when there are many arcrighted items on the stack and the root need arcleft. force this.               
               if ( 
#ifndef FRAGMENTED_TREE
                    ( pGenerator->size() < length-1 || pGenerator->stackempty() ) && // keep only one global root
#endif
                    ( pGenerator->stackempty() || m_supertags == 0 || m_supertags->canShift( pGenerator->size() ) ) && // supertags
                    ( pGenerator->stackempty() || !m_weights->rules() || canBeRoot( m_lCache[pGenerator->size()].tag.code() ) || hasRightHead(m_lCache[pGenerator->size()].tag.code()) ) // rules
                  ) {
                  shift(pGenerator, packed_scores) ;
               }
            }
            if ( !pGenerator->stackempty() ) {
               if ( 
#ifndef FRAGMENTED_TREE
                    ( pGenerator->size() < length-1 || pGenerator->headstacksize() == 1 ) && // one root
#endif
                    ( m_supertags == 0 || m_supertags->canArcRight(pGenerator->stacktop(), pGenerator->size()) ) && // supertags conform to this action
                    ( !m_weights->rules() || hasLeftHead(m_lCache[pGenerator->size()].tag.code()) ) // rules
                  ) { 
                  arcright(pGenerator, packed_scores) ;
               }
            }
            if ( (!m_bCoNLL && !pGenerator->stackempty()) ||
                 (m_bCoNLL && pGenerator->stacksize()>1) // make sure that for conll the first item is not popped
               ) {
               if ( pGenerator->head( pGenerator->stacktop() ) != DEPENDENCY_LINK_NO_HEAD ) {
                  reduce(pGenerator, packed_scores) ;
               }
               else {
                  if ( (m_supertags == 0 || m_supertags->canArcLeft(pGenerator->size(), pGenerator->stacktop())) && // supertags
                       (!m_weights->rules() || hasRightHead(m_lCache[pGenerator->stacktop()].tag.code())) // rules
                     ) {
                     arcleft(pGenerator, packed_scores) ;
                  }
               }
            }
         }

         // insert item
         for (unsigned i=0; i<m_Beam->size(); ++i) {
            pCandidate = *pGenerator;
            pCandidate.score = m_Beam->item(i)->score;
            pCandidate.Move( m_Beam->item(i)->action );
            m_Agenda->pushCandidate(&pCandidate);
         }

         if (bTrain && *pGenerator == correctState) {
            bCorrect = true ;
         }
         pGenerator = m_Agenda->generatorNext() ;

      }
      // when we are doing training, we need to consider the standard move and update
      if (bTrain) {
#ifdef EARLY_UPDATE
         if (!bCorrect) {
            TRACE("Error at the "<<correctState.size()<<"th word; total is "<<correct.size())
            updateScoresForStates(m_Agenda->bestGenerator(), &correctState, 1, -1) ; 
#ifndef LOCAL_LEARNING
            return ;
#else
            m_Agenda->clearCandidates();
            m_Agenda->pushCandidate(&correctState);
#endif
         }
#endif

         if (bCorrect) {
#ifdef LABELED
            correctState.StandardMoveStep(correct, m_lCacheLabel);
#else
            correctState.StandardMoveStep(correct);
#endif
         }
#ifdef LOCAL_LEARNING
         ++m_nTrainingRound; // each training round is one transition-action
#endif
      } 
      
      m_Agenda->nextRound(); // move round
   }

   if (bTrain) {
      correctState.StandardFinish(); // pop the root that is left
      // then make sure that the correct item is stack top finally
      if ( *(m_Agenda->bestGenerator()) != correctState ) {
         TRACE("The best item is not the correct one")
         updateScoresForStates(m_Agenda->bestGenerator(), &correctState, 1, -1) ; 
         return ;
      }
   } 

   TRACE("Outputing sentence");
   m_Agenda->sortGenerators();
   for (int i=0; i<std::min(m_Agenda->generatorSize(), nBest); ++i) {
      pGenerator = m_Agenda->generator(i) ; 
      if (pGenerator) {
         pGenerator->GenerateTree( sentence , retval[i] ) ; 
         if (scores) scores[i] = pGenerator->score;
      }
   }
   TRACE("Done, the highest score is: " << m_Agenda->bestGenerator()->score ) ;
   TRACE("The total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC) ;
}

Ejemplo n.º 5

0

Mostrar archivo

Archivo: depparser.cpp Proyecto: fangyw/meishan-code

int
CDepParser::work(const bool is_train,
                 const CTwoStringVector & sentence,
                 CDependencyParse * retval0, CDependencyParse * retval1,
                 const CDependencyParse & oracle_tree0, const CDependencyParse & oracle_tree1,
                 int nbest,
                 SCORE_TYPE *scores) {

#ifdef DEBUG
  clock_t total_start_time = clock();
#endif

  const int length = sentence.size();
  const int max_round = length * 4 + 1;
  const int max_lattice_size = (kAgendaSize + 1) * max_round;

  ASSERT(length < MAX_SENTENCE_SIZE,
         "The size of sentence is too long.");

  CStateItem * lattice = GetLattice(max_lattice_size);
  CStateItem * lattice_wrapper[max_lattice_size];
  CStateItem ** lattice_index[max_round];
  CStateItem * correct_state = lattice;



  for (int i = 0; i < max_lattice_size; ++ i) {
    lattice_wrapper[i] = lattice + i;
    lattice[i].len_ = length;
  }

  lattice[0].clear();
  correct_state = lattice;
  lattice_index[0] = lattice_wrapper;
  lattice_index[1] = lattice_index[0] + 1;

  static CPackedScoreType<SCORE_TYPE, action::kMax> packed_scores;


  TRACE("Initialising the decoding process ...");

  m_lCache.clear();
  for (int i = 0; i < length; ++ i) {
    m_lCache.push_back(CTaggedWord<CTag, TAG_SEPARATOR>(sentence[i].first,
                                                        sentence[i].second));
#ifdef LABELED
    if (is_train) {
      if (i == 0) { m_lCacheLabel0.clear();  m_lCacheLabel1.clear(); }
      m_lCacheLabel0.push_back(CDependencyLabel(oracle_tree0[i].label));
      m_lCacheLabel1.push_back(CDependencyLabel(oracle_tree1[i].label));
    }
#endif
  }

  int num_results = 0;
  int round = 0;
  bool is_correct; // used for training to specify correct state in lattice

  // loop with the next word to process in the sentence,
  // `round` represent the generators, and the condidates should be inserted
  // into the `round + 1`
  for (round = 1; round < max_round; ++ round) {
    if (lattice_index[round - 1] == lattice_index[round]) {
      // there is nothing in generators, the proning has cut all legel
      // generator. actually, in this kind of case, we should raise a
      // exception. however to achieve a parsing tree, an alternative
      // solution is go back to the previous round
      WARNING("Parsing Failed!");
      -- round;
      break;
    }

    int current_beam_size = 0;
    // loop over the generator states
    // std::cout << "round : " << round << std::endl;
    for (CStateItem ** q = lattice_index[round - 1];
        q != lattice_index[round];
        ++ q) {
      const CStateItem * generator = (*q);
      m_Beam->clear(); packed_scores.reset();

      GetOrUpdateStackScore(generator, packed_scores, action::kNoAction);


      Transit(generator, packed_scores);

      for (unsigned i = 0; i < m_Beam->size(); ++ i) {
        CStateItem candidate; candidate = (*generator);
        // generate candidate state according to the states in beam
        int curIndex = candidate.nextactionindex();
        candidate.Move(curIndex, m_Beam->item(i)->action);
        candidate.score = m_Beam->item(i)->score;
        candidate.previous_ = generator;
        current_beam_size += InsertIntoBeam(lattice_index[round],
                                            &candidate,
                                            current_beam_size,
                                            kAgendaSize);
      }
    }


    lattice_index[round + 1] = lattice_index[round] + current_beam_size;

    if (is_train) {
        CStateItem next_correct_state(*correct_state);
      unsigned goldaction = next_correct_state.StandardMoveStep(oracle_tree0, oracle_tree1
#ifdef LABELED
          , m_lCacheLabel0, m_lCacheLabel1
#endif // end for LABELED
          );

      //std::cout << *correct_state << std::endl;
      //std::cout << goldaction << std::endl;

      next_correct_state.previous_ = correct_state;
      is_correct = false;

      for (CStateItem ** q = lattice_index[round];
           q != lattice_index[round + 1];
          ++ q) {

        CStateItem * p = *q;
        if (next_correct_state.last_action_index == p->last_action_index
            && next_correct_state.last_action[next_correct_state.last_action_index] == p->last_action[p->last_action_index]
             && p->previous_ == correct_state) {
          correct_state = p;
          is_correct = true;
          break;
        }
      }



      //std::cout << *correct_state << std::endl;
      //std::cout << goldaction << std::endl;

#ifdef EARLY_UPDATE
      if (!is_correct || round == max_round-1) {
        int curIndex = next_correct_state.nextactionindex();
        TRACE("ERROR at the " << next_correct_state.size() << "th word for schema " << curIndex);
        if(curIndex == 0)
        {
            TRACE(" Total is " << oracle_tree0.size());
        }
        else
        {
            TRACE(" Total is " << oracle_tree1.size());
        }

        CStateItem * best_generator = (*lattice_index[round]);
        for (CStateItem ** q = lattice_index[round];
             q != lattice_index[round + 1];
              ++ q) {
          CStateItem * p = (*q);
          if (best_generator->score < p->score) {
            best_generator = p;
          }
        }
        UpdateScoresForStates(best_generator, &next_correct_state, 1, -1);
        return -1;
      }
#endif // end for EARLY_UPDATE

    }
  }

//   if (is_train) {
//      CStateItem * best_generator = (*lattice_index[round-1]);
//        for (CStateItem ** q = lattice_index[round-1]; q != lattice_index[round]; ++ q) {
//           CStateItem * p = (*q);
//          if (best_generator->score < p->score) {
//               best_generator = p;
//            }
//        }
//        if (best_generator != correct_state) {
//            UpdateScoresForStates(best_generator, correct_state, 1, -1);
//        }
//        return -1;
//    }
  //delete[] sequence_correct_state;


/*
  if (is_train) {
      //correct_state->StandardFinish(); // pop the root that is left
     // then make sure that the correct item is stack top finally
      CStateItem * best_generator = (*lattice_index[round-1]);
      for (CStateItem ** q = lattice_index[round-1];
           q != lattice_index[round ];
           ++ q) {
        CStateItem * p = (*q);
        if (best_generator->score < p->score) {
          best_generator = p;
        }
      }

     {
        //TRACE("The best item is not the correct one")
        UpdateScoresForStates(best_generator, correct_state, 1, -1) ;
     }
  }
*/
  if (!retval0 || !retval1) {
    return -1;
  }

  TRACE("Output sentence");
  std::sort(lattice_index[round - 1], lattice_index[round], StateHeapMore);
  num_results = lattice_index[round] - lattice_index[round - 1];

  for (int i = 0; i < std::min(num_results, nbest); ++ i) {
    assert( (*(lattice_index[round - 1] + i))->size() == m_lCache.size());
    (*(lattice_index[round - 1] + i))->GenerateTree(sentence, retval0[i], retval1[i]);
    if (scores) { scores[i] = (*(lattice_index[round - 1] + i))->score; }
  }
  TRACE("Done, total time spent: " << double(clock() - total_start_time) / CLOCKS_PER_SEC);
  return num_results;
}

Ejemplo n.º 6

0

Mostrar archivo

Archivo: depparser.cpp Proyecto: frcchang/zpar

/*---------------------------------------------------------------
 *
 * work - the working process shared by training and parsing
 *
 * Returns: makes a new instance of CDependencyParse
 *
 *--------------------------------------------------------------*/
int
CDepParser::work(const bool is_train,
                 const CTwoStringVector & sentence,
                 CDependencyParse * retval,
                 const CDependencyParse & oracle_tree,
                 int nbest,
                 SCORE_TYPE *scores) {
#ifdef DEBUG
    clock_t total_start_time = clock();
#endif

    const int length = sentence.size();
    const int max_round = length * 2 + 1;
    const int max_lattice_size = (kAgendaSize + 1) * max_round;

    ASSERT(length < MAX_SENTENCE_SIZE,
           "The size of sentence is too long.");

    CStateItem * lattice = GetLattice(max_lattice_size);
    CStateItem * lattice_index[max_round];
    CStateItem * correct_state = lattice;

    for (int i = 0; i < max_lattice_size; ++ i) {
        lattice[i].len_ = length;
    }

    lattice[0].clear();
    correct_state = lattice;
    lattice_index[0] = lattice;
    lattice_index[1] = lattice_index[0] + 1;

    static CPackedScore packed_scores;
    TRACE("Initialising the decoding process ...");

    m_lCache.clear();
    for (int i = 0; i < length; ++ i) {
        m_lCache.push_back(CTaggedWord<CTag, TAG_SEPARATOR>(sentence[i].first,
                           sentence[i].second));
#ifdef LABELED
        if (is_train) {
            if (i == 0) {
                m_lCacheLabel.clear();
            }
            m_lCacheLabel.push_back(CDependencyLabel(oracle_tree[i].label));
        }
#endif
    }

    int num_results = 0;
    int round = 0;
    bool is_correct; // used for training to specify correct state in lattice

    // loop with the next word to process in the sentence, 'round' represent the
    // generators, and the condidates should be inserted into the 'round + 1'
    for (round = 1; round < max_round; ++ round) {
        if (lattice_index[round - 1] == lattice_index[round]) {
            // There is nothing in generators, the proning has cut all legel
            // generator. Actually, in this kind of case, we should raise a
            // exception. However to achieve a parsing tree, an alternative
            // solution is go back to the previous round
            WARNING("Parsing Failed!");
            -- round;
            break;
        }

        current_beam_size_ = 0;
        // loop over the generator states
        // std::cout << "round : " << round << std::endl;
        for (CStateItem * q = lattice_index[round - 1]; q != lattice_index[round];
                ++ q) {
            const CStateItem * generator = q;
            packed_scores.reset();
            GetOrUpdateStackScore(generator, packed_scores, action::kNoAction);
            Transit(generator, packed_scores);
        }

        for (unsigned i = 0; i < current_beam_size_; ++ i) {
            const CScoredTransition& transition = m_kBestTransitions[i];
            CStateItem* target = lattice_index[round]+ i;
            (*target) = (*transition.source);
            // generate candidate state according to the states in beam
            target->Move(transition.action);
            target->score = transition.score;
            target->previous_ = transition.source;
        }

        lattice_index[round + 1] = lattice_index[round] + current_beam_size_;

        if (is_train) {
            CStateItem next_correct_state(*correct_state);
            next_correct_state.StandardMoveStep(oracle_tree
#ifdef LABELED
                                                , m_lCacheLabel
#endif // end for LABELED
                                               );

            next_correct_state.previous_ = correct_state;
            is_correct = false;

            for (CStateItem *p = lattice_index[round]; p != lattice_index[round + 1];
                    ++ p) {
                if (next_correct_state.last_action == p->last_action
                        && p->previous_ == correct_state) {
                    correct_state = p;
                    is_correct = true;
                    break;
                }
            }

#ifdef EARLY_UPDATE
            if (!is_correct) {
                TRACE("ERROR at the " << next_correct_state.size() << "th word;"
                      << " Total is " << oracle_tree.size());

                CStateItem * best_generator = lattice_index[round];
                for (CStateItem * p = lattice_index[round]; p != lattice_index[round + 1];
                        ++ p) {
                    if (best_generator->score < p->score) {
                        best_generator = p;
                    }
                }
                UpdateScoresForStates(best_generator, &next_correct_state, 1, -1);
                return -1;
            }
#endif // end for EARLY_UPDATE
        }
    }

    if (is_train) {
        CStateItem * best_generator = lattice_index[round-1];

        for (CStateItem * p = lattice_index[round-1]; p != lattice_index[round]; ++ p) {
            if (best_generator->score < p->score) {
                best_generator = p;
            }
        }
        if (best_generator != correct_state) {
            UpdateScoresForStates(best_generator, correct_state, 1, -1);
        }
        return -1;
    }

    if (!retval) {
        return -1;
    }

    TRACE("Output sentence");
    std::sort(lattice_index[round - 1], lattice_index[round], StateMore);
    num_results = lattice_index[round] - lattice_index[round - 1];

    for (int i = 0; i < std::min(num_results, nbest); ++ i) {
        assert( (lattice_index[round - 1] + i)->size() == m_lCache.size());
        (lattice_index[round - 1] + i)->GenerateTree(sentence, retval[i]);
        if (scores) {
            scores[i] = (lattice_index[round - 1] + i)->score;
        }
    }
    TRACE("Done, total time spent: " << double(clock() - total_start_time) / CLOCKS_PER_SEC);
    return num_results;
}

Ejemplo n.º 7

0

Mostrar archivo

Archivo: agendachart.cpp Proyecto: karuiwu/reparse

void CSegmentor::segment(const CStringVector* sentence_input, CStringVector *vReturn, double *out_scores, int nBest) {
   clock_t total_start_time = clock();;
   const CStateItem *pGenerator, *pCandidate;
   CStateItem tempState;
   unsigned index;                              // the index of the current char
   unsigned j, k;                               // temporary index
   int subtract_score;                          // the score to be subtracted (previous item)
   static CStateItem best_bigram;
   int start_index;
   int word_length;
   int generator_index;

   static CStringVector sentence;
   static CRule rules(m_Feature->m_bRule);
   rules.segment(sentence_input, &sentence);
   const unsigned length = sentence.size();

   assert(length<MAX_SENTENCE_SIZE);
   assert(vReturn!=NULL);

   //clock_t start_time = clock();
   TRACE("Initialising the segmentation process...");
   vReturn->clear();
   clearWordCache(); 
   m_Chart.clear();

   tempState.clear();
   m_Chart[0]->insertItem(&tempState);

   TRACE("Segmenting started");
   for (index=0; index<length; index++) {

      // m_Chart index 1 correspond to the first char
      m_Chart[index+1];

      // control for the ending character of the candidate 
      if ( index < length-1 && rules.canSeparate(index+1)==false ) 
         continue ; 

      start_index = index-1 ; // the end index of last word
      word_length = 1 ; // current word length

      // enumerating the start index
      // ===========================
      // the start index of the word is actually start_index + 1
      while( start_index >= -1 && word_length <= MAX_WORD_SIZE ) {

         // control for the starting character of the candidate
         // ---------------------------------------------------
         while ( start_index >= 0 && rules.canSeparate(start_index+1)==false )
            start_index-- ; 

         // start the search process
         // ------------------------
         for ( generator_index = 0 ; generator_index < m_Chart[ start_index+1 ]->size() ; ++ generator_index ) {
            pGenerator = m_Chart[ start_index+1 ]->item( generator_index ) ;
            tempState.copy( pGenerator ) ;
            tempState.append( index ) ;
            tempState.m_nScore += m_Feature->getLocalScore( &sentence, &tempState, tempState.m_nLength-1 ) ;
            if (nBest==1) {
               if ( generator_index == 0 || tempState.m_nScore > best_bigram.m_nScore ) {
                  best_bigram.copy(&tempState);                                       //@@@
               }
            }
            else {
               m_Chart[ index+1 ]->insertItem( &tempState );
            }
         }
         if (nBest==1) {
            m_Chart[ index+1 ]->insertItem( &best_bigram );                  //@@@
         }                                                        //@@@

         // control the first character of the candidate
         if ( rules.canAppend(start_index+1)==false ) 
            break ; 

         // update start index and word len
         --start_index ;
         ++word_length ;

      }//start_index
   }
   // now generate outout sentence
   // n-best list will be stored in array
   // from the addr vReturn
   TRACE("Outputing sentence");
   for (k=0; k<nBest; ++k) {
      // clear
      vReturn[k].clear();
      if (out_scores!=NULL) 
         out_scores[k] = 0;
      // assign retval
      if (k<m_Chart[length]->size()) {
         pGenerator = m_Chart[length]->bestItem(k);
         for (j=0; j<pGenerator->m_nLength; j++) {
            std::string temp = "";
            for (unsigned l = pGenerator->getWordStart(j); l <= pGenerator->getWordEnd(j); ++l) {
               assert(sentence.at(l)!=" "); // [SPACE]
               temp += sentence.at(l);
            }
            vReturn[k].push_back(temp);
         }
         if (out_scores!=NULL)
            out_scores[k] = pGenerator->m_nScore;
      }
   }
   TRACE("Done, the best score: " << pGenerator->m_nScore);
   TRACE("total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC);
}