void CDepParser::work( const bool bTrain , const CTwoStringVector &sentence , CDependencyParse *retval , const CDependencyParse &correct , int nBest , SCORE_TYPE *scores ) { #ifdef DEBUG clock_t total_start_time = clock(); #endif static int index; const int length = sentence.size() ; const CStateItem *pGenerator ; static CStateItem pCandidate(&m_lCache) ; // used only for training static bool bCorrect ; // used in learning for early update static bool bContradictsRules; static CStateItem correctState(&m_lCache) ; static CPackedScoreType<SCORE_TYPE, action::MAX> packed_scores; ASSERT(length<MAX_SENTENCE_SIZE, "The size of the sentence is larger than the system configuration."); TRACE("Initialising the decoding process...") ; // initialise word cache bContradictsRules = false; m_lCache.clear(); for ( index=0; index<length; ++index ) { m_lCache.push_back( CTaggedWord<CTag, TAG_SEPARATOR>(sentence[index].first , sentence[index].second) ); // filter std::cout training examples with rules if (bTrain && m_weights->rules()) { // the root if ( correct[index].head == DEPENDENCY_LINK_NO_HEAD && canBeRoot(m_lCache[index].tag.code())==false) { TRACE("Rule contradiction: " << m_lCache[index].tag.code() << " can be root."); bContradictsRules = true; } // head left if ( correct[index].head < index && hasLeftHead(m_lCache[index].tag.code())==false) { TRACE("Rule contradiction: " << m_lCache[index].tag.code() << " has left head."); bContradictsRules = true; } // head right if ( correct[index].head > index && hasRightHead(m_lCache[index].tag.code())==false) { TRACE("Rule contradiction: " << m_lCache[index].tag.code() << " has right head."); bContradictsRules = true; } } } // initialise agenda m_Agenda->clear(); pCandidate.clear(); // restore state using clean m_Agenda->pushCandidate(&pCandidate); // and push it back m_Agenda->nextRound(); // as the generator item if (bTrain) correctState.clear(); // verifying supertags if (m_supertags) { ASSERT(m_supertags->getSentenceSize()==length, "Sentence size does not match supertags size"); } #ifdef LABELED unsigned long label; m_lCacheLabel.clear(); if (bTrain) { for (index=0; index<length; ++index) { m_lCacheLabel.push_back(CDependencyLabel(correct[index].label)); if (m_weights->rules() && !canAssignLabel(m_lCache, correct[index].head, index, m_lCacheLabel[index])) { TRACE("Rule contradiction: " << correct[index].label << " on link head " << m_lCache[correct[index].head].tag.code() << " dep " << m_lCache[index].tag.code()); bContradictsRules = true; } } } #endif // skip the training example if contradicts if (bTrain && m_weights->rules() && bContradictsRules) { std::cout << "Skipping training example because it contradicts rules..." <<std::endl; return; } TRACE("Decoding started"); // loop with the next word to process in the sentence for (index=0; index<length*2; ++index) { if (bTrain) bCorrect = false ; // none can this find with pruning ??? if (m_Agenda->generatorSize() == 0) { WARNING("parsing failed"); return; } pGenerator = m_Agenda->generatorStart(); // iterate generators for (int j=0; j<m_Agenda->generatorSize(); ++j) { // for the state items that already contain all words m_Beam->clear(); packed_scores.reset(); getOrUpdateStackScore( pGenerator, packed_scores, action::NO_ACTION ); if ( pGenerator->size() == length ) { assert( pGenerator->stacksize() != 0 ); if ( pGenerator->stacksize()>1 ) { #ifdef FRAGMENTED_TREE if (pGenerator->head(pGenerator->stacktop()) == DEPENDENCY_LINK_NO_HEAD) poproot(pGenerator, packed_scores); else #endif reduce(pGenerator, packed_scores) ; } else { poproot(pGenerator, packed_scores); } } // for the state items that still need more words else { if ( !pGenerator->afterreduce() ) { // there are many ways when there are many arcrighted items on the stack and the root need arcleft. force this. if ( #ifndef FRAGMENTED_TREE ( pGenerator->size() < length-1 || pGenerator->stackempty() ) && // keep only one global root #endif ( pGenerator->stackempty() || m_supertags == 0 || m_supertags->canShift( pGenerator->size() ) ) && // supertags ( pGenerator->stackempty() || !m_weights->rules() || canBeRoot( m_lCache[pGenerator->size()].tag.code() ) || hasRightHead(m_lCache[pGenerator->size()].tag.code()) ) // rules ) { shift(pGenerator, packed_scores) ; } } if ( !pGenerator->stackempty() ) { if ( #ifndef FRAGMENTED_TREE ( pGenerator->size() < length-1 || pGenerator->headstacksize() == 1 ) && // one root #endif ( m_supertags == 0 || m_supertags->canArcRight(pGenerator->stacktop(), pGenerator->size()) ) && // supertags conform to this action ( !m_weights->rules() || hasLeftHead(m_lCache[pGenerator->size()].tag.code()) ) // rules ) { arcright(pGenerator, packed_scores) ; } } if ( (!m_bCoNLL && !pGenerator->stackempty()) || (m_bCoNLL && pGenerator->stacksize()>1) // make sure that for conll the first item is not popped ) { if ( pGenerator->head( pGenerator->stacktop() ) != DEPENDENCY_LINK_NO_HEAD ) { reduce(pGenerator, packed_scores) ; } else { if ( (m_supertags == 0 || m_supertags->canArcLeft(pGenerator->size(), pGenerator->stacktop())) && // supertags (!m_weights->rules() || hasRightHead(m_lCache[pGenerator->stacktop()].tag.code())) // rules ) { arcleft(pGenerator, packed_scores) ; } } } } // insert item for (unsigned i=0; i<m_Beam->size(); ++i) { pCandidate = *pGenerator; pCandidate.score = m_Beam->item(i)->score; pCandidate.Move( m_Beam->item(i)->action ); m_Agenda->pushCandidate(&pCandidate); } if (bTrain && *pGenerator == correctState) { bCorrect = true ; } pGenerator = m_Agenda->generatorNext() ; } // when we are doing training, we need to consider the standard move and update if (bTrain) { #ifdef EARLY_UPDATE if (!bCorrect) { TRACE("Error at the "<<correctState.size()<<"th word; total is "<<correct.size()) updateScoresForStates(m_Agenda->bestGenerator(), &correctState, 1, -1) ; #ifndef LOCAL_LEARNING return ; #else m_Agenda->clearCandidates(); m_Agenda->pushCandidate(&correctState); #endif } #endif if (bCorrect) { #ifdef LABELED correctState.StandardMoveStep(correct, m_lCacheLabel); #else correctState.StandardMoveStep(correct); #endif } #ifdef LOCAL_LEARNING ++m_nTrainingRound; // each training round is one transition-action #endif } m_Agenda->nextRound(); // move round } if (bTrain) { correctState.StandardFinish(); // pop the root that is left // then make sure that the correct item is stack top finally if ( *(m_Agenda->bestGenerator()) != correctState ) { TRACE("The best item is not the correct one") updateScoresForStates(m_Agenda->bestGenerator(), &correctState, 1, -1) ; return ; } } TRACE("Outputing sentence"); m_Agenda->sortGenerators(); for (int i=0; i<std::min(m_Agenda->generatorSize(), nBest); ++i) { pGenerator = m_Agenda->generator(i) ; if (pGenerator) { pGenerator->GenerateTree( sentence , retval[i] ) ; if (scores) scores[i] = pGenerator->score; } } TRACE("Done, the highest score is: " << m_Agenda->bestGenerator()->score ) ; TRACE("The total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC) ; }
int CDepParser::work(const bool is_train, const CTwoStringVector & sentence, CDependencyParse * retval0, CDependencyParse * retval1, const CDependencyParse & oracle_tree0, const CDependencyParse & oracle_tree1, int nbest, SCORE_TYPE *scores) { #ifdef DEBUG clock_t total_start_time = clock(); #endif const int length = sentence.size(); const int max_round = length * 4 + 1; const int max_lattice_size = (kAgendaSize + 1) * max_round; ASSERT(length < MAX_SENTENCE_SIZE, "The size of sentence is too long."); CStateItem * lattice = GetLattice(max_lattice_size); CStateItem * lattice_wrapper[max_lattice_size]; CStateItem ** lattice_index[max_round]; CStateItem * correct_state = lattice; for (int i = 0; i < max_lattice_size; ++ i) { lattice_wrapper[i] = lattice + i; lattice[i].len_ = length; } lattice[0].clear(); correct_state = lattice; lattice_index[0] = lattice_wrapper; lattice_index[1] = lattice_index[0] + 1; static CPackedScoreType<SCORE_TYPE, action::kMax> packed_scores; TRACE("Initialising the decoding process ..."); m_lCache.clear(); for (int i = 0; i < length; ++ i) { m_lCache.push_back(CTaggedWord<CTag, TAG_SEPARATOR>(sentence[i].first, sentence[i].second)); #ifdef LABELED if (is_train) { if (i == 0) { m_lCacheLabel0.clear(); m_lCacheLabel1.clear(); } m_lCacheLabel0.push_back(CDependencyLabel(oracle_tree0[i].label)); m_lCacheLabel1.push_back(CDependencyLabel(oracle_tree1[i].label)); } #endif } int num_results = 0; int round = 0; bool is_correct; // used for training to specify correct state in lattice // loop with the next word to process in the sentence, // `round` represent the generators, and the condidates should be inserted // into the `round + 1` for (round = 1; round < max_round; ++ round) { if (lattice_index[round - 1] == lattice_index[round]) { // there is nothing in generators, the proning has cut all legel // generator. actually, in this kind of case, we should raise a // exception. however to achieve a parsing tree, an alternative // solution is go back to the previous round WARNING("Parsing Failed!"); -- round; break; } int current_beam_size = 0; // loop over the generator states // std::cout << "round : " << round << std::endl; for (CStateItem ** q = lattice_index[round - 1]; q != lattice_index[round]; ++ q) { const CStateItem * generator = (*q); m_Beam->clear(); packed_scores.reset(); GetOrUpdateStackScore(generator, packed_scores, action::kNoAction); Transit(generator, packed_scores); for (unsigned i = 0; i < m_Beam->size(); ++ i) { CStateItem candidate; candidate = (*generator); // generate candidate state according to the states in beam int curIndex = candidate.nextactionindex(); candidate.Move(curIndex, m_Beam->item(i)->action); candidate.score = m_Beam->item(i)->score; candidate.previous_ = generator; current_beam_size += InsertIntoBeam(lattice_index[round], &candidate, current_beam_size, kAgendaSize); } } lattice_index[round + 1] = lattice_index[round] + current_beam_size; if (is_train) { CStateItem next_correct_state(*correct_state); unsigned goldaction = next_correct_state.StandardMoveStep(oracle_tree0, oracle_tree1 #ifdef LABELED , m_lCacheLabel0, m_lCacheLabel1 #endif // end for LABELED ); //std::cout << *correct_state << std::endl; //std::cout << goldaction << std::endl; next_correct_state.previous_ = correct_state; is_correct = false; for (CStateItem ** q = lattice_index[round]; q != lattice_index[round + 1]; ++ q) { CStateItem * p = *q; if (next_correct_state.last_action_index == p->last_action_index && next_correct_state.last_action[next_correct_state.last_action_index] == p->last_action[p->last_action_index] && p->previous_ == correct_state) { correct_state = p; is_correct = true; break; } } //std::cout << *correct_state << std::endl; //std::cout << goldaction << std::endl; #ifdef EARLY_UPDATE if (!is_correct || round == max_round-1) { int curIndex = next_correct_state.nextactionindex(); TRACE("ERROR at the " << next_correct_state.size() << "th word for schema " << curIndex); if(curIndex == 0) { TRACE(" Total is " << oracle_tree0.size()); } else { TRACE(" Total is " << oracle_tree1.size()); } CStateItem * best_generator = (*lattice_index[round]); for (CStateItem ** q = lattice_index[round]; q != lattice_index[round + 1]; ++ q) { CStateItem * p = (*q); if (best_generator->score < p->score) { best_generator = p; } } UpdateScoresForStates(best_generator, &next_correct_state, 1, -1); return -1; } #endif // end for EARLY_UPDATE } } // if (is_train) { // CStateItem * best_generator = (*lattice_index[round-1]); // for (CStateItem ** q = lattice_index[round-1]; q != lattice_index[round]; ++ q) { // CStateItem * p = (*q); // if (best_generator->score < p->score) { // best_generator = p; // } // } // if (best_generator != correct_state) { // UpdateScoresForStates(best_generator, correct_state, 1, -1); // } // return -1; // } //delete[] sequence_correct_state; /* if (is_train) { //correct_state->StandardFinish(); // pop the root that is left // then make sure that the correct item is stack top finally CStateItem * best_generator = (*lattice_index[round-1]); for (CStateItem ** q = lattice_index[round-1]; q != lattice_index[round ]; ++ q) { CStateItem * p = (*q); if (best_generator->score < p->score) { best_generator = p; } } { //TRACE("The best item is not the correct one") UpdateScoresForStates(best_generator, correct_state, 1, -1) ; } } */ if (!retval0 || !retval1) { return -1; } TRACE("Output sentence"); std::sort(lattice_index[round - 1], lattice_index[round], StateHeapMore); num_results = lattice_index[round] - lattice_index[round - 1]; for (int i = 0; i < std::min(num_results, nbest); ++ i) { assert( (*(lattice_index[round - 1] + i))->size() == m_lCache.size()); (*(lattice_index[round - 1] + i))->GenerateTree(sentence, retval0[i], retval1[i]); if (scores) { scores[i] = (*(lattice_index[round - 1] + i))->score; } } TRACE("Done, total time spent: " << double(clock() - total_start_time) / CLOCKS_PER_SEC); return num_results; }
inline void CConParser::getOrUpdateStackScore( CWeight *cast_weights, CPackedScoreType<SCORE_TYPE, CAction::MAX> &retval, const CStateItem *item, const CAction &action, SCORE_TYPE amount , int round ) { retval.reset(); if (m_Context.stacksize==0) return; static unsigned long j; static CCFGSet s0ts1tbt; s0ts1tbt.copy(m_Context.s0ts1tbt); #ifdef _CHINESE_CFG_H // static unsigned long s0c_bracket_action; // static unsigned long s1c_bracket_action; // static unsigned long n0t_bracket_action; // static unsigned long s0cs1c_bracket_action; // static unsigned long s0cn0t_bracket_action; #endif // static unsigned long s0cs1c_distaction; #ifdef _CHINESE_CFG_H // s0c_bracket_action = encodeAction(action, m_Context.s0c_bracket); // s1c_bracket_action = encodeAction(action, m_Context.s1c_bracket); // n0t_bracket_action = encodeAction(action, m_Context.n0t_bracket); // s0cs1c_bracket_action = encodeAction(action, m_Context.s0cs1c_bracket); // s0cn0t_bracket_action = encodeAction(action, m_Context.s0cn0t_bracket); #endif // s0cs1c_distaction = encodeAction(action, m_Context.s0cs1c_dist); static CTuple2<CWord, CConstituent> word_constituent; static CTuple2<CTag, CConstituent> tag_constituent; static CTuple2<CTwoWords, CCFGSet> twoword_cfgset; static CTuple2<CWord, CCFGSet> word_cfgset; static CActionType actionType; actionType.code = action.type(); const CAction &a1 = item->action; const CAction &a2 = item->statePtr->action; static CTuple2<CAction, CAction> tuple_action2; // CWeight* cast_weights = (amount&&(round!=-1)) ? m_delta : static_cast<CWeight*>(m_weights); // S0 cast_weights->m_mapS0w.getOrUpdateScore(retval, *(m_Context.s0wt), action.code(), m_nScoreIndex, amount, round); if (!m_Context.s0c.empty()) cast_weights->m_mapS0c.getOrUpdateScore(retval, m_Context.s0c, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(tag_constituent, &(m_Context.s0t), &(m_Context.s0c)); cast_weights->m_mapS0tc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s0w, &(m_Context.s0c)); cast_weights->m_mapS0wc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); // S1 if (m_Context.s1!=0) { cast_weights->m_mapS1w.getOrUpdateScore(retval, *(m_Context.s1wt), action.code(), m_nScoreIndex, amount, round); if (!m_Context.s1c.empty()) cast_weights->m_mapS1c.getOrUpdateScore(retval, m_Context.s1c, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(tag_constituent, &(m_Context.s1t), &(m_Context.s1c)); cast_weights->m_mapS1tc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s1w, &(m_Context.s1c)); cast_weights->m_mapS1wc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // S2 if (m_Context.s2!=0) { // cast_weights->m_mapS2w.getOrUpdateScore(retval, *(m_Context.s2w), action.code(), m_nScoreIndex, amount, round); // cast_weights->m_mapS2c.getOrUpdateScore(retval, s2c_action, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(tag_constituent, &(m_Context.s2t), &(m_Context.s2c)); cast_weights->m_mapS2tc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s2w, &(m_Context.s2c)); cast_weights->m_mapS2wc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // S3 if (m_Context.s3!=0) { // cast_weights->m_mapS3w.getOrUpdateScore(retval, *(m_Context.s3w), action.code(), m_nScoreIndex, amount, round); // cast_weights->m_mapS3c.getOrUpdateScore(retval, s3c_action, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(tag_constituent, &(m_Context.s3t), &(m_Context.s3c)); cast_weights->m_mapS3tc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s3w, &(m_Context.s3c)); cast_weights->m_mapS3wc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // N0 if (m_Context.n0!=-1) { // cast_weights->m_mapN0w.getOrUpdateScore(retval, *(m_Context.n0w), action.code(), m_nScoreIndex, amount, round); // cast_weights->m_mapN0t.getOrUpdateScore(retval, n0t_action, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapN0wt.getOrUpdateScore(retval, *(m_Context.n0wt), action.code(), m_nScoreIndex, amount, round); } // N1 if (m_Context.n1!=-1) { // cast_weights->m_mapN1w.getOrUpdateScore(retval, *(m_Context.n1w), action.code(), m_nScoreIndex, amount, round); // cast_weights->m_mapN1t.getOrUpdateScore(retval, n1t_action, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapN1wt.getOrUpdateScore(retval, *(m_Context.n1wt), action.code(), m_nScoreIndex, amount, round); } // N2 if (m_Context.n2!=-1) { // cast_weights->m_mapN2w.getOrUpdateScore(retval, *(m_Context.n2w), action.code(), m_nScoreIndex, amount, round); // cast_weights->m_mapN2t.getOrUpdateScore(retval, n2t_action, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapN2wt.getOrUpdateScore(retval, *(m_Context.n2wt), action.code(), m_nScoreIndex, amount, round); } // N3 if (m_Context.n3!=-1) { // cast_weights->m_mapN3w.getOrUpdateScore(retval, *(m_Context.n3w), action.code(), m_nScoreIndex, amount, round); // cast_weights->m_mapN3t.getOrUpdateScore(retval, n3t_action, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapN3wt.getOrUpdateScore(retval, *(m_Context.n3wt), action.code(), m_nScoreIndex, amount, round); } // S0L if (m_Context.s0l!=0) { refer_or_allocate_tuple2(tag_constituent, &(m_Context.s0lt), &(m_Context.s0lc)); cast_weights->m_mapS0Ltc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s0lw, &(m_Context.s0lc)); cast_weights->m_mapS0Lwc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // S0R if (m_Context.s0r!=0) { refer_or_allocate_tuple2(tag_constituent, &(m_Context.s0rt), &(m_Context.s0rc)); cast_weights->m_mapS0Rtc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s0rw, &(m_Context.s0rc)); cast_weights->m_mapS0Rwc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // S0U if (m_Context.s0u!=0) { refer_or_allocate_tuple2(tag_constituent, &(m_Context.s0ut), &(m_Context.s0uc)); cast_weights->m_mapS0Utc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s0uw, &(m_Context.s0uc)); cast_weights->m_mapS0Uwc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // S1L if (m_Context.s1l!=0) { refer_or_allocate_tuple2(tag_constituent, &(m_Context.s1lt), &(m_Context.s1lc)); cast_weights->m_mapS1Ltc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s1lw, &(m_Context.s1lc)); cast_weights->m_mapS1Lwc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // S1R if (m_Context.s1r!=0) { refer_or_allocate_tuple2(tag_constituent, &(m_Context.s1rt), &(m_Context.s1rc)); cast_weights->m_mapS1Rtc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s1rw, &(m_Context.s1rc)); cast_weights->m_mapS1Rwc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // S1U if (m_Context.s1u!=0) { refer_or_allocate_tuple2(tag_constituent, &(m_Context.s1ut), &(m_Context.s1uc)); cast_weights->m_mapS1Utc.getOrUpdateScore(retval, tag_constituent, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_constituent, m_Context.s1uw, &(m_Context.s1uc)); cast_weights->m_mapS1Uwc.getOrUpdateScore(retval, word_constituent, action.code(), m_nScoreIndex, amount, round); } // S0 S1 if (m_Context.s1!=0) { refer_or_allocate_tuple2(twoword_cfgset, &(m_Context.s0ws1w), &(m_Context.s0cs1c)); cast_weights->m_mapS0wcS1wc.getOrUpdateScore(retval, twoword_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.s1w, &(m_Context.s0cs1c)); cast_weights->m_mapS0cS1w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.s0w, &(m_Context.s0cs1c)); cast_weights->m_mapS0wS1c.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapS0cS1c.getOrUpdateScore(retval, m_Context.s0cs1c, action.code(), m_nScoreIndex, amount, round); } // S0 N0 if (m_Context.n0!=-1) { refer_or_allocate_tuple2(twoword_cfgset, &(m_Context.s0wn0w), &(m_Context.s0cn0t)); cast_weights->m_mapS0wN0w.getOrUpdateScore(retval, twoword_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.n0w, &(m_Context.s0cn0t)); cast_weights->m_mapS0cN0w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.s0w, &(m_Context.s0cn0t)); cast_weights->m_mapS0wN0t.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapS0cN0t.getOrUpdateScore(retval, m_Context.s0cn0t, action.code(), m_nScoreIndex, amount, round); } // S1 N0 if (m_Context.s1!=0 && m_Context.n0!=-1) { refer_or_allocate_tuple2(twoword_cfgset, &(m_Context.s1wn0w), &(m_Context.s1cn0t)); cast_weights->m_mapS1wN0w.getOrUpdateScore(retval, twoword_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.n0w, &(m_Context.s1cn0t)); cast_weights->m_mapS1cN0w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.s1w, &(m_Context.s1cn0t)); cast_weights->m_mapS1wN0t.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapS1cN0t.getOrUpdateScore(retval, m_Context.s1cn0t, action.code(), m_nScoreIndex, amount, round); } // N0 N1 if (m_Context.n1!=-1) { refer_or_allocate_tuple2(twoword_cfgset, &(m_Context.n0wn1w), &(m_Context.n0tn1t)); cast_weights->m_mapN0wN1w.getOrUpdateScore(retval, twoword_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.n1w, &(m_Context.n0tn1t)); cast_weights->m_mapN0tN1w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.n0w, &(m_Context.n0tn1t)); cast_weights->m_mapN0wN1t.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapN0tN1t.getOrUpdateScore(retval, m_Context.n0tn1t, action.code(), m_nScoreIndex, amount, round); } #ifdef _CHINESE_CFG_H /* BRACKET PUNC */ #endif // S0 S1 N0 refer_or_allocate_tuple2(word_cfgset, m_Context.s0w, &(m_Context.s0cs1cn0t)); cast_weights->m_mapS0wS1cN0t.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); if (m_Context.s1!=0) { refer_or_allocate_tuple2(word_cfgset, m_Context.s1w, &(m_Context.s0cs1cn0t)); cast_weights->m_mapS0cS1wN0t.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); } if (m_Context.n0!=-1) { refer_or_allocate_tuple2(word_cfgset, m_Context.n0w, &(m_Context.s0cs1cn0t)); cast_weights->m_mapS0cS1cN0w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); } cast_weights->m_mapS0cS1cN0t.getOrUpdateScore(retval, m_Context.s0cs1cn0t, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapS0tS1tN0t.getOrUpdateScore(retval, m_Context.s0ts1tn0t, action.code(), m_nScoreIndex, amount, round); // S0 N0 N1 if (m_Context.n0!=-1) { refer_or_allocate_tuple2(word_cfgset, m_Context.s0w, &(m_Context.s0cn0tn1t)); cast_weights->m_mapS0wN0tN1t.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.n0w, &(m_Context.s0cn0tn1t)); cast_weights->m_mapS0cN0wN1t.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); if (m_Context.n1!=-1) { refer_or_allocate_tuple2(word_cfgset, m_Context.n1w, &(m_Context.s0cn0tn1t)); cast_weights->m_mapS0cN0tN1w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); } cast_weights->m_mapS0cN0tN1t.getOrUpdateScore(retval, m_Context.s0cn0tn1t, action.code(), m_nScoreIndex, amount, round); // m_Context.n0 cast_weights->m_mapS0tN0tN1t.getOrUpdateScore(retval, m_Context.s0tn0tn1t, action.code(), m_nScoreIndex, amount, round); // m_Context.n0 } // S0 S1 S2 if (m_Context.s1!=0) { refer_or_allocate_tuple2(word_cfgset, m_Context.s0w, &(m_Context.s0cs1cs2c)); cast_weights->m_mapS0wS1cS2c.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.s1w, &(m_Context.s0cs1cs2c)); cast_weights->m_mapS0cS1wS2c.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); if (m_Context.s2!=0) { refer_or_allocate_tuple2(word_cfgset, m_Context.s2w, &(m_Context.s0cs1cs2c)); cast_weights->m_mapS0cS1cS2w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); } cast_weights->m_mapS0cS1cS2c.getOrUpdateScore(retval, m_Context.s0cs1cs2c, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapS0tS1tS2t.getOrUpdateScore(retval, m_Context.s0ts1ts2t, action.code(), m_nScoreIndex, amount, round); } if (m_Context.n0!=-1 && m_Context.s0r!=0) { cast_weights->m_mapS0cS0RcN0t.getOrUpdateScore(retval, m_Context.s0cs0rcn0t, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapS0cS0RjN0t.getOrUpdateScore(retval, m_Context.s0cs0rjn0t, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.n0w, &(m_Context.s0cs0rc)); cast_weights->m_mapS0cS0RcN0w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); } // S0 S0LRUS1 if (m_Context.s1!=0 && m_Context.s0l!=0) { cast_weights->m_mapS0cS0LcS1c.getOrUpdateScore(retval, m_Context.s0cs0lcs1c, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapS0cS0LjS1j.getOrUpdateScore(retval, m_Context.s0cs0ljs1j, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.s1w, &(m_Context.s0cs0lc)); cast_weights->m_mapS0cS0LcS1w.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); } if (m_Context.s1 != 0 && m_Context.s1r != 0) { cast_weights->m_mapS0cS1cS1Rc.getOrUpdateScore(retval, m_Context.s0cs1cs1rc, action.code(), m_nScoreIndex, amount, round); cast_weights->m_mapS0jS1cS1Rj.getOrUpdateScore(retval, m_Context.s0js1cs1rj, action.code(), m_nScoreIndex, amount, round); refer_or_allocate_tuple2(word_cfgset, m_Context.s0w, &(m_Context.s1cs1rc)); cast_weights->m_mapS0wS1cS1Rc.getOrUpdateScore(retval, word_cfgset, action.code(), m_nScoreIndex, amount, round); } // cast_weights->m_mapA1.getOrUpdateScore(retval, a1, action.code(), m_nScoreIndex, amount, round); // refer_or_allocate_tuple2(tuple_action2, &a1, &a2); // cast_weights->m_mapA1A2.getOrUpdateScore(retval, tuple_action2, action.code(), m_nScoreIndex, amount, round); }
void TARGET_LANGUAGE::CTagger::tag( CStringVector * sentence , CTwoStringVector * vReturn , int nBest , double * out_scores ) { clock_t total_start_time = clock();; // initialise the return value, the agenda and cache TRACE("Initialising the tagging process..."); static int index, temp_index, j; static unsigned tag, last_tag; static CPackedScoreType<SCORE_TYPE, CTag::MAX_COUNT> scores; const CStateItem *pGenerator; static CStateItem best_bigram[1<<CTag::SIZE][1<<CTag::SIZE]; static int done_bigram[1<<CTag::SIZE][1<<CTag::SIZE]; static CStateItem temp; m_CacheSize = sentence->size(); assert(vReturn!=NULL); vReturn->clear(); if (m_CacheSize+3>m_nMaxSentenceSize) { while (m_CacheSize+3>m_nMaxSentenceSize) { m_nMaxSentenceSize *= 2; } delete []stateindice; delete []stateitems; delete []m_possibletags; stateitems = new CStateItem[AGENDA_SIZE*m_nMaxSentenceSize]; stateindice = new unsigned[m_nMaxSentenceSize]; m_possibletags = new unsigned long long[m_nMaxSentenceSize]; } if (m_CacheSize == 0) { TRACE("Empty input."); return; } // init caches; for ( index=0; index<m_CacheSize; ++index ) { m_Cache[index].load(sentence->at(index)); } if (m_TopTags) { // toptags m_CacheTopTags.clear(); for ( index=0; index<m_CacheSize; ++index ) { m_CacheTopTags.push_back(m_TopTags->find(m_Cache[index], CTag::NONE)); } } for (index=0; index<m_CacheSize; ++index) { m_possibletags[index] = getPossibleTagsForWord(m_Cache[index]); if (m_bTrain) m_possibletags[index] |= (1LL<<m_CacheTags[index]); } // start tag TRACE("Tagging started"); m_Agenda->clear(); // the first step stateindice[0] = 0; stateindice[1] = 0; temp.prev = 0; scores.reset(); getLocalScore(scores, sentence, 0, 0); for (tag=0; tag<CTag::COUNT; ++tag) { if ( m_possibletags[0] & (1LL<<tag) ) { temp.tag = tag; temp.m_nScore = scores[tag] ; m_Agenda->insertItem(&temp); } } for (temp_index=0; temp_index<m_Agenda->size(); ++temp_index) { stateitems[stateindice[1]] = *(m_Agenda->item(temp_index)); ++stateindice[1]; } stateindice[2] = stateindice[1]; if (nBest == 1) { // for ( tag=0; tag<CTag::COUNT; ++tag ) // for ( last_tag=0; last_tag<CTag::COUNT; ++last_tag ) // done_bigram[last_tag][tag] = -1; memset(done_bigram, 0, (1<<CTag::SIZE)*(1<<CTag::SIZE)*sizeof(int)); } for ( index=1; index<m_CacheSize; index++ ) { m_Agenda->clear(); for ( j=stateindice[index-1]; j<stateindice[index]; ++j ) { pGenerator = &stateitems[j]; last_tag = pGenerator->tag; // lookup dictionary scores.reset(); getLocalScore(scores, sentence, pGenerator, index); for ( tag=CTag::FIRST; tag<CTag::COUNT; ++tag ) { if ( m_possibletags[index] & (1LL<<tag) ) { temp.prev = pGenerator; temp.tag = tag; // temp.m_nScore = pGenerator->m_nScore + getLocalScore(sentence, &temp, index); temp.m_nScore = pGenerator->m_nScore + scores[tag]; if (nBest==1) { if ( done_bigram[last_tag][tag] != index || temp.m_nScore > best_bigram[last_tag][tag].m_nScore ) { done_bigram[last_tag][tag] = index; best_bigram[last_tag][tag] = temp ; } } else { m_Agenda->insertItem(&temp); } } }//assert(bDone); }//for pGenerator if (nBest==1) { for ( tag=CTag::FIRST; tag<CTag::COUNT; ++tag ) { for ( last_tag=0; last_tag<CTag::COUNT; ++last_tag ) { if ( done_bigram[last_tag][tag]==index ) { m_Agenda->insertItem(&best_bigram[last_tag][tag]); } } } } for (temp_index=0; temp_index<m_Agenda->size(); ++temp_index) { stateitems[stateindice[index+1]] = *(m_Agenda->item(temp_index)); ++stateindice[index+1]; } stateindice[index+2] = stateindice[index+1]; // TRACE("The time for iteration" << index << ":was " << double(clock() - total_start_time)/CLOCKS_PER_SEC); } // outout TRACE("Outputing sentence"); m_Agenda->sortItems(); for ( temp_index = 0 ; temp_index < std::min(nBest, m_Agenda->size()) ; ++ temp_index ) { vReturn[temp_index].resize(m_CacheSize); pGenerator = m_Agenda->item(temp_index); for (j=0; j<m_CacheSize; ++j) { vReturn[temp_index][m_CacheSize-j-1].first = sentence->at(m_CacheSize-j-1); vReturn[temp_index][m_CacheSize-j-1].second = CTag(pGenerator->tag).str(); pGenerator = pGenerator->prev; } assert(pGenerator==0); if (out_scores) out_scores[temp_index] = m_Agenda->item(temp_index)->m_nScore; } TRACE("Done, the highest score is: " << m_Agenda->item(0)->m_nScore); TRACE("The total time spent: " << double(clock() - total_start_time)/CLOCKS_PER_SEC); }