Exemplo n.º 1
0
inline void CDepParser::getOrUpdateStackScore( const CStateItem *item, CPackedScoreType<SCORE_TYPE, action::MAX> &retval, const unsigned &action, SCORE_TYPE amount , int round ) {

   const int &st_index = item->stackempty() ? -1 : item->stacktop(); // stack top
   
   const int &st1_index = item->stacksize() <= 1 ? -1 : item->stackitem(item->stacksize()-2); // stack[1] //Miguel
   const int &st2_index = item->stacksize() <= 2 ? -1 : item->stackitem(item->stacksize()-3); // stack[2] //Miguel
   
   const int &sth_index = st_index == -1 ? -1 : item->head(st_index); // stack top head
   const int &sthh_index = sth_index == -1 ? -1 : item->head(sth_index); // stack top head
   const int &stld_index = st_index == -1 ? -1 : item->leftdep(st_index); // leftmost dep of stack
   const int &strd_index = st_index == -1 ? -1 : item->rightdep(st_index); // rightmost dep st
   const int &stl2d_index = stld_index == -1 ? -1 : item->sibling(stld_index); // left 2ndmost dep of stack
   const int &str2d_index = strd_index == -1 ? -1 : item->sibling(strd_index); // right 2ndmost dep st
   const int &n0_index = item->size()==m_lCache.size() ? -1 : item->size(); // next
   assert(n0_index<static_cast<int>(m_lCache.size())); // the next index shouldn't exceed sentence
   const int &n0ld_index = n0_index==-1 ? -1 : item->leftdep(n0_index); // leftmost dep of next
   const int &n0l2d_index = n0ld_index==-1 ? -1 : item->sibling(n0ld_index); // leftmost dep of next
   const int &ht_index = item->headstackempty() ? -1 : item->headstacktop(); // headstack
   const int &ht2_index = item->headstacksize()<2 ? -1 : item->headstackitem(item->headstacksize()-2); // headstack 2nd
   static int n1_index;
   static int n2_index;
   static int n3_index;
   n1_index = (n0_index != -1 && n0_index+1<m_lCache.size()) ? n0_index+1 : -1 ;
   n2_index = (n0_index != -1 && n0_index+2<m_lCache.size()) ? n0_index+2 : -1 ;
   n3_index = (n0_index != -1 && n0_index+3<m_lCache.size()) ? n0_index+3 : -1 ;

   const CTaggedWord<CTag, TAG_SEPARATOR> &st_word_tag = st_index==-1 ? g_emptyTaggedWord : m_lCache[st_index];
   
   const CTaggedWord<CTag, TAG_SEPARATOR> &st1_word_tag = st1_index==-1 ? g_emptyTaggedWord : m_lCache[st1_index]; //Stack[1]
   const CTaggedWord<CTag, TAG_SEPARATOR> &st2_word_tag = st2_index==-1 ? g_emptyTaggedWord : m_lCache[st2_index]; //Stack[2]
   
   const CTaggedWord<CTag, TAG_SEPARATOR> &sth_word_tag = sth_index==-1 ? g_emptyTaggedWord : m_lCache[sth_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &sthh_word_tag = sthh_index==-1 ? g_emptyTaggedWord : m_lCache[sthh_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &stld_word_tag = stld_index==-1 ? g_emptyTaggedWord : m_lCache[stld_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &strd_word_tag = strd_index==-1 ? g_emptyTaggedWord : m_lCache[strd_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &stl2d_word_tag = stl2d_index==-1 ? g_emptyTaggedWord : m_lCache[stl2d_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &str2d_word_tag = str2d_index==-1 ? g_emptyTaggedWord : m_lCache[str2d_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n0_word_tag = n0_index==-1 ? g_emptyTaggedWord : m_lCache[n0_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n0ld_word_tag = n0ld_index==-1 ? g_emptyTaggedWord : m_lCache[n0ld_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n0l2d_word_tag = n0l2d_index==-1 ? g_emptyTaggedWord : m_lCache[n0l2d_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n1_word_tag = n1_index==-1 ? g_emptyTaggedWord : m_lCache[n1_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n2_word_tag = n2_index==-1 ? g_emptyTaggedWord : m_lCache[n2_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &ht_word_tag = ht_index==-1 ? g_emptyTaggedWord : m_lCache[ht_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &ht2_word_tag = ht2_index==-1 ? g_emptyTaggedWord : m_lCache[ht2_index];

   const CWord &st_word = st_word_tag.word;
   
   const CWord &st1_word = st1_word_tag.word; //STACK[1] Miguel
   const CWord &st2_word = st2_word_tag.word; //STACK[2] Miguel
   
   const CWord &sth_word = sth_word_tag.word;
   const CWord &sthh_word = sthh_word_tag.word;
   const CWord &stld_word = stld_word_tag.word;
   const CWord &strd_word = strd_word_tag.word;
   const CWord &stl2d_word = stl2d_word_tag.word;
   const CWord &str2d_word = str2d_word_tag.word;
   const CWord &n0_word = n0_word_tag.word;
   const CWord &n0ld_word = n0ld_word_tag.word;
   const CWord &n0l2d_word = n0l2d_word_tag.word;
   const CWord &n1_word = n1_word_tag.word;
   const CWord &n2_word = n2_word_tag.word;
   const CWord &ht_word = ht_word_tag.word;
   const CWord &ht2_word = ht2_word_tag.word;

   const CTag &st_tag = st_word_tag.tag;
   
   const CTag &st1_tag = st1_word_tag.tag; //STACK[1] Miguel
   const CTag &st2_tag = st2_word_tag.tag; //STACK[2] Miguel
   
   const CTag &sth_tag = sth_word_tag.tag;
   const CTag &sthh_tag = sthh_word_tag.tag;
   const CTag &stld_tag = stld_word_tag.tag;
   const CTag &strd_tag = strd_word_tag.tag;
   const CTag &stl2d_tag = stl2d_word_tag.tag;
   const CTag &str2d_tag = str2d_word_tag.tag;
   const CTag &n0_tag = n0_word_tag.tag;
   const CTag &n0ld_tag = n0ld_word_tag.tag;
   const CTag &n0l2d_tag = n0l2d_word_tag.tag;
   const CTag &n1_tag = n1_word_tag.tag;
   const CTag &n2_tag = n2_word_tag.tag;
   const CTag &ht_tag = ht_word_tag.tag;
   const CTag &ht2_tag = ht2_word_tag.tag;

   const int &st_label = st_index==-1 ? CDependencyLabel::NONE : item->label(st_index);
   
   const int &st1_label = st1_index==-1 ? CDependencyLabel::NONE : item->label(st1_index); //STACK[1] Miguel
   const int &st2_label = st2_index==-1 ? CDependencyLabel::NONE : item->label(st2_index); //STACK[2] Miguel
   
   const int &sth_label = sth_index==-1 ? CDependencyLabel::NONE : item->label(sth_index);
   const int &stld_label = stld_index==-1 ? CDependencyLabel::NONE : item->label(stld_index);
   const int &strd_label = strd_index==-1 ? CDependencyLabel::NONE : item->label(strd_index);
   const int &stl2d_label = stl2d_index==-1 ? CDependencyLabel::NONE : item->label(stl2d_index);
   const int &str2d_label = str2d_index==-1 ? CDependencyLabel::NONE : item->label(strd_index);
   const int &n0ld_label = n0ld_index==-1 ? CDependencyLabel::NONE : item->label(n0ld_index);
   const int &n0l2d_label = n0l2d_index==-1 ? CDependencyLabel::NONE : item->label(n0l2d_index);

   static int st_n0_dist;
   st_n0_dist = encodeLinkDistance(st_index, n0_index);

   const int st_rarity = st_index==-1?0:item->rightarity(st_index);
   const int st_larity = st_index==-1?0:item->leftarity(st_index);
   const int n0_larity = n0_index==-1?0:item->leftarity(n0_index);

   const CSetOfTags<CDependencyLabel> &st_rtagset = st_index==-1?CSetOfTags<CDependencyLabel>():item->righttagset(st_index);
   const CSetOfTags<CDependencyLabel> &st_ltagset = st_index==-1?CSetOfTags<CDependencyLabel>():item->lefttagset(st_index);
   const CSetOfTags<CDependencyLabel> &n0_ltagset = n0_index==-1?CSetOfTags<CDependencyLabel>():item->lefttagset(n0_index);

   static CTwoTaggedWords st_word_tag_n0_word_tag ;
   static CTwoWords st_word_n0_word ;
   if ( amount == 0 ) {
      st_word_tag_n0_word_tag.refer( &st_word_tag, &n0_word_tag );
      st_word_n0_word.refer( &st_word, &n0_word );
   }
   else {
      st_word_tag_n0_word_tag.allocate( st_word_tag, n0_word_tag );
      st_word_n0_word.allocate( st_word, n0_word );
   }

   static CTuple2<CWord, CTag> word_tag;
   static CTuple2<CWord, int> word_int;
   static CTuple2<CTag, int> tag_int;
   static CTuple2<CTag, CDependencyLabel> tag_label;
   static CTuple3<CWord, CTag, CTag> word_tag_tag;
   static CTuple3<CWord, CWord, CTag> word_word_tag;
   static CTuple3<CWord, CWord, int> word_word_int;
   static CTuple3<CTag, CTag, int> tag_tag_int;
   static CTuple2<CWord, CSetOfTags<CDependencyLabel> > word_tagset;
   static CTuple2<CTag, CSetOfTags<CDependencyLabel> > tag_tagset;

   // single
   if (st_index != -1) {
      cast_weights->m_mapSTw.getOrUpdateScore( retval, st_word, action, m_nScoreIndex, amount, round) ;
      cast_weights->m_mapSTt.getOrUpdateScore( retval, st_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTwt.getOrUpdateScore( retval, st_word_tag, action, m_nScoreIndex, amount, round) ;
   }
   
   //STACK[1] Miguel
   if (st1_index != -1) {
         cast_weights->m_mapSTw.getOrUpdateScore( retval, st1_word, action, m_nScoreIndex, amount, round) ;
         cast_weights->m_mapSTt.getOrUpdateScore( retval, st1_tag, action, m_nScoreIndex, amount, round ) ;
         cast_weights->m_mapSTwt.getOrUpdateScore( retval, st1_word_tag, action, m_nScoreIndex, amount, round) ;
      }
   
   //STACK[2] Miguel
   if (st2_index != -1) {
            cast_weights->m_mapSTw.getOrUpdateScore( retval, st2_word, action, m_nScoreIndex, amount, round) ;
            cast_weights->m_mapSTt.getOrUpdateScore( retval, st2_tag, action, m_nScoreIndex, amount, round ) ;
            cast_weights->m_mapSTwt.getOrUpdateScore( retval, st2_word_tag, action, m_nScoreIndex, amount, round) ;
         }
   
   if (n0_index != -1) {
      cast_weights->m_mapN0w.getOrUpdateScore( retval, n0_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN0t.getOrUpdateScore( retval, n0_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN0wt.getOrUpdateScore( retval, n0_word_tag, action, m_nScoreIndex, amount, round) ;
   }

   if (n1_index != -1) {
      cast_weights->m_mapN1w.getOrUpdateScore( retval, n1_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN1t.getOrUpdateScore( retval, n1_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN1wt.getOrUpdateScore( retval, n1_word_tag, action, m_nScoreIndex, amount, round) ;
   }

   if (n2_index != -1) {
      cast_weights->m_mapN2w.getOrUpdateScore( retval, n2_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN2t.getOrUpdateScore( retval, n2_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN2wt.getOrUpdateScore( retval, n2_word_tag, action, m_nScoreIndex, amount, round) ;
   }

   if (sth_index != -1) {
      cast_weights->m_mapSTHw.getOrUpdateScore( retval, sth_word, action, m_nScoreIndex, amount, round) ;
      cast_weights->m_mapSTHt.getOrUpdateScore( retval, sth_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTi.getOrUpdateScore( retval, st_label, action, m_nScoreIndex, amount, round) ;
   }

   if (sthh_index != -1) {
      cast_weights->m_mapSTHHw.getOrUpdateScore( retval, sthh_word, action, m_nScoreIndex, amount, round) ;
      cast_weights->m_mapSTHHt.getOrUpdateScore( retval, sthh_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTHi.getOrUpdateScore( retval, sth_label, action, m_nScoreIndex, amount, round) ;
   }

   if (stld_index != -1) {
      cast_weights->m_mapSTLDw.getOrUpdateScore( retval, stld_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTLDt.getOrUpdateScore( retval, stld_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTLDi.getOrUpdateScore( retval, stld_label, action, m_nScoreIndex, amount, round) ;
   }

   if (strd_index != -1) {
      cast_weights->m_mapSTRDw.getOrUpdateScore( retval, strd_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTRDt.getOrUpdateScore( retval, strd_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTRDi.getOrUpdateScore( retval, strd_label, action, m_nScoreIndex, amount, round) ;
   }

   if (n0ld_index != -1) {
      cast_weights->m_mapN0LDw.getOrUpdateScore( retval, n0ld_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN0LDt.getOrUpdateScore( retval, n0ld_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN0LDi.getOrUpdateScore( retval, n0ld_label, action, m_nScoreIndex, amount, round) ;
   }

   if (stl2d_index != -1) {
      cast_weights->m_mapSTL2Dw.getOrUpdateScore( retval, stl2d_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTL2Dt.getOrUpdateScore( retval, stl2d_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTL2Di.getOrUpdateScore( retval, stl2d_label, action, m_nScoreIndex, amount, round) ;
   }

   if (str2d_index != -1) {
      cast_weights->m_mapSTR2Dw.getOrUpdateScore( retval, str2d_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTR2Dt.getOrUpdateScore( retval, str2d_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTR2Di.getOrUpdateScore( retval, str2d_label, action, m_nScoreIndex, amount, round) ;
   }

   if (n0l2d_index != -1) {
      cast_weights->m_mapN0L2Dw.getOrUpdateScore( retval, n0l2d_word, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN0L2Dt.getOrUpdateScore( retval, n0l2d_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapN0L2Di.getOrUpdateScore( retval, n0l2d_label, action, m_nScoreIndex, amount, round) ;
   }

   // s0 and n0
   if (st_index != -1) {
      cast_weights->m_mapSTwtN0wt.getOrUpdateScore( retval, st_word_tag_n0_word_tag, action, m_nScoreIndex, amount, round ); 
      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &st_tag);
      cast_weights->m_mapSTwtN0w.getOrUpdateScore( retval, word_word_tag, action, m_nScoreIndex, amount, round ) ; 
      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &n0_tag);
      cast_weights->m_mapSTwN0wt.getOrUpdateScore( retval, word_word_tag, action, m_nScoreIndex, amount, round ) ; 
      refer_or_allocate_tuple3(word_tag_tag, &st_word, &st_tag, &n0_tag);
      cast_weights->m_mapSTwtN0t.getOrUpdateScore( retval, word_tag_tag, action, m_nScoreIndex, amount, round ) ; 
      refer_or_allocate_tuple3(word_tag_tag, &n0_word, &st_tag, &n0_tag);
      cast_weights->m_mapSTtN0wt.getOrUpdateScore( retval, word_tag_tag, action, m_nScoreIndex, amount, round ) ;
      cast_weights->m_mapSTwN0w.getOrUpdateScore( retval, st_word_n0_word, action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapSTtN0t.getOrUpdateScore( retval, CTagSet<CTag, 2>(encodeTags(st_tag,n0_tag)), action, m_nScoreIndex, amount, round ) ; 
      //refer_or_allocate_tuple2(tag_label, &st_tag, &sn0_label);
      //cast_weights->m_mapSTtN0l.getOrUpdateScore( retval, tag_label, action, m_nScoreIndex, amount, round ) ; 

   }

   if (st_index != -1 && n0_index != -1) {
      cast_weights->m_mapN0tN1t.getOrUpdateScore( retval, CTagSet<CTag, 2>(encodeTags(n0_tag,n1_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapN0tN1tN2t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(n0_tag,n1_tag,n2_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapSTtN0tN1t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,n0_tag,n1_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapSTtN0tN0LDt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,n0_tag,n0ld_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapN0tN0LDtN0L2Dt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(n0_tag,n0ld_tag,n0l2d_tag)), action, m_nScoreIndex, amount, round ) ; 
   }
   if (st_index!=-1) {
      cast_weights->m_mapSTHtSTtN0t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(sth_tag,st_tag,n0_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapSTHHtSTHtSTt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(sthh_tag, sth_tag,st_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapSTtSTLDtN0t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,stld_tag,n0_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapSTtSTLDtSTL2Dt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,stld_tag,stl2d_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapSTtSTRDtN0t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,strd_tag,n0_tag)), action, m_nScoreIndex, amount, round ) ; 
      cast_weights->m_mapSTtSTRDtSTR2Dt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,strd_tag,str2d_tag)), action, m_nScoreIndex, amount, round ) ; 
   }

   // distance
   if (st_index!=-1 && n0_index!=-1) {
      refer_or_allocate_tuple2(word_int, &st_word, &st_n0_dist);
      cast_weights->m_mapSTwd.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round) ;
      refer_or_allocate_tuple2(tag_int, &st_tag, &st_n0_dist);
      cast_weights->m_mapSTtd.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
      refer_or_allocate_tuple2(word_int, &n0_word, &st_n0_dist);
      cast_weights->m_mapN0wd.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round ) ;
      refer_or_allocate_tuple2(tag_int, &n0_tag, &st_n0_dist);
      cast_weights->m_mapN0td.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
      refer_or_allocate_tuple3(word_word_int, &st_word, &n0_word, &st_n0_dist);
      cast_weights->m_mapSTwN0wd.getOrUpdateScore( retval, word_word_int, action, m_nScoreIndex, amount, round ) ; 
      refer_or_allocate_tuple3(tag_tag_int, &st_tag, &n0_tag, &st_n0_dist);
      cast_weights->m_mapSTtN0td.getOrUpdateScore( retval, tag_tag_int, action, m_nScoreIndex, amount, round ) ; 
   }

   // st arity
   if (st_index != -1) {
      refer_or_allocate_tuple2(word_int, &st_word, &st_rarity);
      cast_weights->m_mapSTwra.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round) ;
      refer_or_allocate_tuple2(tag_int, &st_tag, &st_rarity);
      cast_weights->m_mapSTtra.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
      refer_or_allocate_tuple2(word_int, &st_word, &st_larity);
      cast_weights->m_mapSTwla.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round) ;
      refer_or_allocate_tuple2(tag_int, &st_tag, &st_larity);
      cast_weights->m_mapSTtla.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
   }

   // n0 arity
   if (n0_index!=-1) {
      refer_or_allocate_tuple2(word_int, &n0_word, &n0_larity);
      cast_weights->m_mapN0wla.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round) ;
      refer_or_allocate_tuple2(tag_int, &n0_tag, &n0_larity);
      cast_weights->m_mapN0tla.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
   }

   // st labelset
   if (st_index != -1){
      refer_or_allocate_tuple2(word_tagset, &st_word, &st_rtagset);
      cast_weights->m_mapSTwrp.getOrUpdateScore( retval, word_tagset, action, m_nScoreIndex, amount, round) ;
      refer_or_allocate_tuple2(tag_tagset, &st_tag, &st_rtagset);
      cast_weights->m_mapSTtrp.getOrUpdateScore( retval, tag_tagset, action, m_nScoreIndex, amount, round ) ;

      refer_or_allocate_tuple2(word_tagset, &st_word, &st_ltagset);
      cast_weights->m_mapSTwlp.getOrUpdateScore( retval, word_tagset, action, m_nScoreIndex, amount, round) ;
      refer_or_allocate_tuple2(tag_tagset, &st_tag, &st_ltagset);
      cast_weights->m_mapSTtlp.getOrUpdateScore( retval, tag_tagset, action, m_nScoreIndex, amount, round ) ;
   }

   // n0 labelset
   if (n0_index != -1){
      refer_or_allocate_tuple2(word_tagset, &n0_word, &n0_ltagset);
      cast_weights->m_mapN0wlp.getOrUpdateScore( retval, word_tagset, action, m_nScoreIndex, amount, round) ;
      refer_or_allocate_tuple2(tag_tagset, &n0_tag, &n0_ltagset);
      cast_weights->m_mapN0tlp.getOrUpdateScore( retval, tag_tagset, action, m_nScoreIndex, amount, round ) ;
   }

   //if (false) {
   if (m_bCoNLL) {

      static unsigned i;
      
      //std::cout<<"(conll)\n";

      //STACK[0]
      if (st_index!=-1) {
         if (!m_lCacheCoNLLLemma[st_index].empty()) cast_weights->m_mapSTl.getOrUpdateScore( retval, m_lCacheCoNLLLemma[st_index], action, m_nScoreIndex, amount, round) ;
         //if (m_lCacheCoNLLCPOS[st_index] != CCoNLLCPOS()) cast_weights->m_mapSTc.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[st_index], action, m_nScoreIndex, amount, round) ;
         /*if (!m_lCacheCoNLLFeats[st_index].empty()) cast_weights->m_mapSTf.getOrUpdateScore( retval, m_lCacheCoNLLFeats[st_index][0], action, m_nScoreIndex, amount, round) ;*/
         for (i=1; i<m_lCacheCoNLLFeats[st_index].size(); ++i)
            cast_weights->m_mapSTf.getOrUpdateScore( retval, m_lCacheCoNLLFeats[st_index][i], action, m_nScoreIndex, amount, round) ;
         //is this possible?
      } // if (st_index!=-1)
      
      //STACK[1] Miguel
      if (st1_index!=-1) {
               if (!m_lCacheCoNLLLemma[st1_index].empty()) cast_weights->m_mapSTl.getOrUpdateScore( retval, m_lCacheCoNLLLemma[st1_index], action, m_nScoreIndex, amount, round) ;
               //if (m_lCacheCoNLLCPOS[st1_index] != CCoNLLCPOS()) cast_weights->m_mapSTc.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[st1_index], action, m_nScoreIndex, amount, round) ;
               //for (i=0; i<m_lCacheCoNLLFeats[st1_index].size(); ++i)
               //  cast_weights->m_mapSTf.getOrUpdateScore( retval, m_lCacheCoNLLFeats[st1_index][i], action, m_nScoreIndex, amount, round) ;
               //if (m_lCacheCoNLLFeats[st1_index] != CCoNLLFEATS()) cast_weights->m_mapSTc.getOrUpdateScore( retval, m_lCacheCoNLLFeats[st1_index], action, m_nScoreIndex, amount, round) ;
            } // if (st_index!=-1)
      
      //STACK[2] Miguel
      if (st2_index!=-1) {
               if (!m_lCacheCoNLLLemma[st2_index].empty()) cast_weights->m_mapSTl.getOrUpdateScore( retval, m_lCacheCoNLLLemma[st2_index], action, m_nScoreIndex, amount, round) ;
               //if (m_lCacheCoNLLCPOS[st2_index] != CCoNLLCPOS()) cast_weights->m_mapSTc.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[st2_index], action, m_nScoreIndex, amount, round) ;
               //for (i=0; i<m_lCacheCoNLLFeats[st2_index].size(); ++i)
               //   cast_weights->m_mapSTf.getOrUpdateScore( retval, m_lCacheCoNLLFeats[st2_index][i], action, m_nScoreIndex, amount, round) ;
            } // if (st_index!=-1)
	  
      //INPUT[0]
      if (n0_index!=-1) {
         if (!m_lCacheCoNLLLemma[n0_index].empty()) cast_weights->m_mapN0l.getOrUpdateScore( retval, m_lCacheCoNLLLemma[n0_index], action, m_nScoreIndex, amount, round) ;
         //if (m_lCacheCoNLLCPOS[n0_index] != CCoNLLCPOS()) cast_weights->m_mapN0c.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[n0_index], action, m_nScoreIndex, amount, round) ;
         for (i=1; i<m_lCacheCoNLLFeats[n0_index].size(); ++i)
            cast_weights->m_mapN0f.getOrUpdateScore( retval, m_lCacheCoNLLFeats[n0_index][i], action, m_nScoreIndex, amount, round) ;
         if (!m_lCacheCoNLLFeats[n0_index].empty()) cast_weights->m_mapSTf.getOrUpdateScore( retval, m_lCacheCoNLLFeats[n0_index][0], action, m_nScoreIndex, amount, round) ;
      } // if (n0_index!=-1)

      //INPUT[1]
      if (n1_index!=-1) {
         //if (!m_lCacheCoNLLLemma[n1_index].empty()) cast_weights->m_mapN1l.getOrUpdateScore( retval, m_lCacheCoNLLLemma[n1_index], action, m_nScoreIndex, amount, round) ;
         //if (m_lCacheCoNLLCPOS[n1_index] != CCoNLLCPOS()) cast_weights->m_mapN1c.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[n1_index], action, m_nScoreIndex, amount, round) ;
         for (i=1; i<m_lCacheCoNLLFeats[n1_index].size(); ++i)
            cast_weights->m_mapN1f.getOrUpdateScore( retval, m_lCacheCoNLLFeats[n1_index][i], action, m_nScoreIndex, amount, round) ;
         if (!m_lCacheCoNLLFeats[n1_index].empty()) cast_weights->m_mapSTf.getOrUpdateScore( retval, m_lCacheCoNLLFeats[n1_index][0], action, m_nScoreIndex, amount, round) ;
      } // if (n1_index!=-1)
      
      //INPUT[2] Miguel
      /*if (n2_index!=-1) {
         if (!m_lCacheCoNLLLemma[n2_index].empty()) cast_weights->m_mapN1l.getOrUpdateScore( retval, m_lCacheCoNLLLemma[n2_index], action, m_nScoreIndex, amount, round) ;
         if (m_lCacheCoNLLCPOS[n2_index] != CCoNLLCPOS()) cast_weights->m_mapN1c.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[n2_index], action, m_nScoreIndex, amount, round) ;
         for (i=0; i<m_lCacheCoNLLFeats[n2_index].size(); ++i)
            cast_weights->m_mapN1f.getOrUpdateScore( retval, m_lCacheCoNLLFeats[n2_index][i], action, m_nScoreIndex, amount, round) ;
         } // if (n1_index!=-1)
         */
   	}
}
Exemplo n.º 2
0
SCORE_TYPE CTagger::getOrUpdateSeparateScore( const CStringVector *sentence, const CSubStateItem *item, unsigned long index, SCORE_TYPE amount, unsigned long round ) {
   static SCORE_TYPE nReturn ; 
   static unsigned long start_0; 
   static unsigned long start_1, end_1, length_1; 
   static unsigned long start_2, end_2, length_2; 

   // about the words
   assert(amount!=0||index==item->size()-1||index==item->size());
   start_0 = index==item->size() ? 0 : item->getWordStart( index ) ;

   start_1 = index > 0 ? item->getWordStart( index-1 ) : 0 ;
   end_1 = index > 0 ? item->getWordEnd( index-1 ) : 0 ;
   assert(index==item->size()||index==0 || end_1 == start_0-1);
   length_1 = index > 0 ? item->getWordLength( index-1 ) : 0;

   start_2 = index > 1 ? item->getWordStart( index-2 ) : 0 ;
   end_2 = index > 1 ? item->getWordEnd( index-2 ) : 0 ;
   assert(index<2 || end_2 == start_1-1);
   length_2 = index > 1 ? item->getWordLength( index-2 ) : 0;

   const CWord &word_1 = index>0 ? find_or_replace_word_cache( start_1, end_1 ) : g_emptyWord; 
   const CWord &word_2 = index>1 ? find_or_replace_word_cache( start_2, end_2 ) : g_emptyWord; 

   // about the length
   if (length_1>LENGTH_MAX) length_1 = LENGTH_MAX;
   if (length_2>LENGTH_MAX) length_2 = LENGTH_MAX;

   // about the chars
   const CWord &first_char_0 = index<item->size() ? find_or_replace_word_cache( start_0, start_0 ) : g_emptyWord ;
   const CWord &first_char_1 = index>0 ? find_or_replace_word_cache( start_1, start_1 ) : g_emptyWord;

   const CWord &last_char_1 = index>0 ? find_or_replace_word_cache( end_1, end_1 ) : g_emptyWord;
   const CWord &last_char_2 = index>1 ? find_or_replace_word_cache( end_2, end_2 ) : g_emptyWord;
   const CWord &two_char = index>0&&index<item->size() ? find_or_replace_word_cache( end_1, start_0 ) : g_emptyWord ;
   const CWord &word_1_first_char_0 = index>0&&index<item->size() ? find_or_replace_word_cache( start_1, start_0 ) : g_emptyWord;
   const CWord &word_1_last_char_2 = index>1 ? find_or_replace_word_cache( end_2, end_1 ) : g_emptyWord;
   const CWord &three_char = ( length_1==1 && index>1 && index<item->size() ) ? find_or_replace_word_cache( end_2, start_0 ) : g_emptyWord;

   static CTwoWords word_2_word_1, first_char_1_last_char_1, first_char_0_first_char_1, last_char_1_last_char_2 ;
   if (amount==0&&index>0) {
      word_2_word_1.refer( &word_1 , &word_2 ) ;
      first_char_1_last_char_1.refer( &first_char_1 , &last_char_1 ) ;
      first_char_0_first_char_1.refer( &first_char_0 , &first_char_1 ) ;
      last_char_1_last_char_2.refer( &last_char_1 , &last_char_2 ) ;
   }
   else {
      word_2_word_1.allocate( word_1, word_2 ) ;
      first_char_1_last_char_1.allocate( first_char_1, last_char_1 ) ;
      first_char_0_first_char_1.allocate( first_char_0, first_char_1 ) ;
      last_char_1_last_char_2.allocate( last_char_1, last_char_2 ) ;
   }

   // about the tags 
   const CTag &tag_0 = index<item->size() ? item->getTag( index ) : g_beginTag;
   const CTag &tag_1 = index>0 ? item->getTag(index-1) : g_beginTag;
   const CTag &tag_2 = index>1 ? item->getTag(index-2) : g_beginTag;

   static CTaggedWord<CTag, TAG_SEPARATOR> wt1, wt2;
   static CTwoTaggedWords wt12;

   unsigned long long first_char_cat_0 = m_weights->m_mapCharTagDictionary.lookup(first_char_0) | (static_cast<unsigned long long>(1)<<tag_0.code()) ;
   unsigned long long last_char_cat_1 = m_weights->m_mapCharTagDictionary.lookup(last_char_1) | (static_cast<unsigned long long>(1)<<tag_1.code()) ;

   static CTagSet<CTag, 2> tag_0_tag_1, tag_0_tag_2, tag_1_tag_2;
   static CTagSet<CTag, 3> tag_0_tag_1_tag_2;
   tag_0_tag_1.load( encodeTags(tag_0, tag_1) );
   tag_0_tag_2.load( encodeTags(tag_0, tag_2) );
   tag_1_tag_2.load( encodeTags(tag_1, tag_2) );
   tag_0_tag_1_tag_2.load( encodeTags(tag_0, tag_1, tag_2) );

   static int j ; 

   // adding scores with features for last word
   if (index>0) {
      nReturn = m_weights->m_mapSeenWords.getOrUpdateScore( word_1 , m_nScoreIndex , amount , round ) ; 
      if (index>1) nReturn += m_weights->m_mapLastWordByWord.getOrUpdateScore( word_2_word_1 , m_nScoreIndex , amount , round ) ;

      if ( length_1 == 1 ) {
         nReturn += m_weights->m_mapOneCharWord.getOrUpdateScore( word_1 , m_nScoreIndex , amount , round ) ;
      }
      else {
         nReturn += m_weights->m_mapFirstAndLastChars.getOrUpdateScore( first_char_1_last_char_1 , m_nScoreIndex , amount , round ) ;

         nReturn += m_weights->m_mapLengthByFirstChar.getOrUpdateScore( std::make_pair(first_char_1, length_1) , m_nScoreIndex , amount , round ) ;
         nReturn += m_weights->m_mapLengthByLastChar.getOrUpdateScore( std::make_pair(last_char_1, length_1) , m_nScoreIndex , amount , round ) ;

//         nReturn += m_weights->m_mapLengthByTagAndFirstChar.getOrUpdateScore( std::make_pair(first_char_1, (length_1<<CTag::SIZE)|tag_1.code()) , m_nScoreIndex , amount , round ) ;
//         nReturn += m_weights->m_mapLengthByTagAndLastChar.getOrUpdateScore( std::make_pair(last_char_1, (length_1<<CTag::SIZE)|tag_1.code()) , m_nScoreIndex , amount , round ) ;
      }

      if (index>1) {
         nReturn += m_weights->m_mapCurrentWordLastChar.getOrUpdateScore( word_1_last_char_2 , m_nScoreIndex , amount , round ) ;
         nReturn += m_weights->m_mapLastWordByLastChar.getOrUpdateScore( last_char_1_last_char_2 , m_nScoreIndex , amount , round ) ;

         nReturn += m_weights->m_mapLengthByLastWord.getOrUpdateScore( std::make_pair(word_2, length_1) , m_nScoreIndex , amount , round ) ;
         nReturn += m_weights->m_mapLastLengthByWord.getOrUpdateScore( std::make_pair(word_1, length_2), m_nScoreIndex , amount , round ) ;
      }

      nReturn += m_weights->m_mapCurrentTag.getOrUpdateScore( std::make_pair(word_1, tag_1) , m_nScoreIndex , amount , round ) ; 

      if ( length_1 <= 2 ) nReturn += m_weights->m_mapLastTagByWord.getOrUpdateScore( std::make_pair(word_1, tag_2) , m_nScoreIndex , amount , round ) ;

      if (index>1) {
         if ( length_1 <= 2 ) nReturn += m_weights->m_mapTagByWordAndPrevChar.getOrUpdateScore( std::make_pair(word_1_last_char_2, tag_1) , m_nScoreIndex , amount , round ) ;
         if ( length_1 == 1 && index<item->size() ) nReturn += m_weights->m_mapTagOfOneCharWord.getOrUpdateScore( std::make_pair(three_char, tag_1) , m_nScoreIndex , amount , round ) ;
      }

      nReturn += m_weights->m_mapTagByLastChar.getOrUpdateScore( std::make_pair(last_char_1, tag_1) , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapTagByLastCharCat.getOrUpdateScore( std::make_pair(last_char_cat_1, tag_1) , m_nScoreIndex , amount , round ) ;

      for (j=0; j<length_1-1; ++j) {
         wt1.load(find_or_replace_word_cache(start_1+j, start_1+j), tag_1);
         wt2.load(last_char_1);//
         if (amount==0) { wt12.refer(&wt1, &wt2); } else { wt12.allocate(wt1, wt2); }
         nReturn += m_weights->m_mapTaggedCharByLastChar.getOrUpdateScore(wt12, m_nScoreIndex, amount, round) ;
      }
   }

   // all about the current word
   nReturn += m_weights->m_mapLastTagByTag.getOrUpdateScore( tag_0_tag_1, m_nScoreIndex , amount , round ) ;
   if (index>0) nReturn += m_weights->m_mapTag0Tag1Size1.getOrUpdateScore( std::make_pair( tag_0_tag_1, length_1 ), m_nScoreIndex , amount , round ) ;
   if (index>0) nReturn += m_weights->m_mapTag1Tag2Size1.getOrUpdateScore( std::make_pair( tag_1_tag_2, length_1 ), m_nScoreIndex , amount , round ) ;
   if (index>0) nReturn += m_weights->m_mapTag0Tag1Tag2Size1.getOrUpdateScore( std::make_pair( tag_0_tag_1_tag_2, length_1 ), m_nScoreIndex , amount , round ) ;

   if ( length_1 <= 2 ) nReturn += m_weights->m_mapTagByLastWord.getOrUpdateScore( std::make_pair(word_1, tag_0) , m_nScoreIndex , amount , round ) ;

   if ( index > 0 ) {
      nReturn += m_weights->m_mapLastTwoTagsByTag.getOrUpdateScore( tag_0_tag_1_tag_2, m_nScoreIndex , amount , round ) ;
   }

if (index<item->size()) {
   if ( index>0 ) {
      nReturn += m_weights->m_mapSeparateChars.getOrUpdateScore( two_char , m_nScoreIndex , amount , round ) ; 

      nReturn += m_weights->m_mapLastWordFirstChar.getOrUpdateScore( word_1_first_char_0 , m_nScoreIndex , amount , round ) ;

      nReturn += m_weights->m_mapFirstCharLastWordByWord.getOrUpdateScore( first_char_0_first_char_1 , m_nScoreIndex , amount , round ) ;

      if ( length_1 <= 2 ) nReturn += m_weights->m_mapTagByWordAndNextChar.getOrUpdateScore( std::make_pair(word_1_first_char_0, tag_1) , m_nScoreIndex , amount , round ) ;

//      nReturn += m_weights->m_mapSepCharAndNextChar.getOrUpdateScore( find_or_replace_word_cache(start_0, start_0==sentence->size()-1?start_0:start_0+1) , m_nScoreIndex , amount , round ) ; 
      
   }
  
   nReturn += m_weights->m_mapTagByFirstChar.getOrUpdateScore( std::make_pair(first_char_0, tag_0) , m_nScoreIndex , amount , round ) ; 
   nReturn += m_weights->m_mapTagByFirstCharCat.getOrUpdateScore( std::make_pair(first_char_cat_0, tag_0) , m_nScoreIndex , amount , round ) ; 

   nReturn += m_weights->m_mapFirstCharBy2Tags.getOrUpdateScore( std::make_pair(first_char_0, tag_0_tag_1) , m_nScoreIndex , amount , round ) ; 
   if (index>0)nReturn += m_weights->m_mapFirstCharBy3Tags.getOrUpdateScore( std::make_pair(first_char_0, tag_0_tag_1_tag_2) , m_nScoreIndex , amount , round ) ; 

   nReturn += m_weights->m_mapTagByChar.getOrUpdateScore( std::make_pair(first_char_0, tag_0), m_nScoreIndex , amount , round ) ;

   if (index>0) {
      wt1.load(last_char_1, tag_1);
      wt2.load(first_char_0, tag_0);
      if (amount==0) { wt12.refer(&wt1, &wt2); } else { wt12.allocate(wt1, wt2); }
      nReturn += m_weights->m_mapTaggedSeparateChars.getOrUpdateScore( wt12, m_nScoreIndex , amount , round ) ;

   }

   if (index>0) nReturn += m_weights->m_mapTagWordTag.getOrUpdateScore( std::make_pair( word_1, tag_0_tag_2 ), m_nScoreIndex, amount, round);
   if (index>1) nReturn += m_weights->m_mapWordTagTag.getOrUpdateScore( std::make_pair( word_2, tag_0_tag_1 ), m_nScoreIndex, amount, round);
}

   // ===================================================================================
   // character scores -- with end_1-1 middled
//   static int char_info;
//   char_info = encodeCharSegmentation(start_1==end_1, true);
//   if (index>0) {
//      for (j = std::max(0, static_cast<int>(end_1)-1); j < std::min(static_cast<unsigned long>(sentence->size()), end_1+2); ++j) {
//         nReturn += m_weights->m_mapCharUnigram.getOrUpdateScore( std::make_pair( find_or_replace_word_cache(j, j), encodeCharInfoAndPosition(char_info, j-end_1) ), m_nScoreIndex, amount, round);
//         if (hasCharTypeKnowledge()) nReturn += m_weights->m_mapCharCatUnigram.getOrUpdateScore( std::make_pair( groupCharTypes(segmentor, sentence, j, 1, amount), encodeCharInfoAndPosition(char_info, j-end_1) ), m_nScoreIndex, amount, round);
//      }
   
//      for (j = std::max(0, static_cast<int>(end_1)-1); j < std::min(static_cast<unsigned long>(sentence->size())-1, end_1+1); ++j) {
//         nReturn += m_weights->m_mapCharBigram.getOrUpdateScore( std::make_pair( find_or_replace_word_cache(j, j+1), encodeCharInfoAndPosition(char_info, j-end_1) ), m_nScoreIndex, amount, round);
//         if (hasCharTypeKnowledge()) nReturn += m_weights->m_mapCharCatBigram.getOrUpdateScore( std::make_pair( groupCharTypes(segmentor, sentence, j, 2, amount), encodeCharInfoAndPosition(char_info, j-end_1) ), m_nScoreIndex, amount, round);
//      }
   
//      for (j = std::max(0, static_cast<int>(end_1)-1); j < std::min(static_cast<unsigned long>(sentence->size())-2, end_1); ++j) {
//         nReturn += m_weights->m_mapCharTrigram.getOrUpdateScore( std::make_pair( find_or_replace_word_cache(j, j+2), encodeCharInfoAndPosition(char_info, j-end_1) ), m_nScoreIndex, amount, round);
//         if (hasCharTypeKnowledge()) nReturn += m_weights->m_mapCharCatTrigram.getOrUpdateScore( std::make_pair( groupCharTypes(segmentor, sentence, j, 3, amount), encodeCharInfoAndPosition(char_info, j-end_1) ), m_nScoreIndex, amount, round);
//      }
//   }

   return nReturn;
}
Exemplo n.º 3
0
SCORE_TYPE CTagger::getLocalScore( const CStringVector * sentence, CStateItem * item , unsigned long index ) {

    static SCORE_TYPE nReturn ;
    static unsigned long int last_start , last_length ;
    static unsigned long int start , end , length ;

    // abstd::cout the words
    start = item->getWordStart( index ) ;
    end = item->getWordEnd( index ) ;
    length = item->getWordLength( index ) ;
    last_start = index > 0 ? item->getWordStart( index-1 ) : 999999 ;
    last_length = index > 0 ? item->getWordLength( index-1 ) : 99999 ;
    const CWord &word = m_WordCache.find( start , end , sentence ) ;
    const CWord &last_word = index > 0 ? m_WordCache.find( last_start , start-1 , sentence ) : g_emptyWord ;

    // abstd::cout the chars
    const CWord &first_char = m_WordCache.find( start , start , sentence ) ;
    const CWord &last_char = m_WordCache.find( end , end , sentence ) ;
    const CWord &first_char_last_word = index > 0 ? m_WordCache.find( last_start , last_start , sentence ) : g_emptyWord ;
    const CWord &last_char_last_word = index > 0 ? m_WordCache.find( start-1 , start-1 , sentence) : g_emptyWord;
    const CWord &first_char_next_word = end+1 < sentence->size() ? m_WordCache.find( end+1 , end+1 , sentence) : g_emptyWord ;
    const CWord &first_twochar = start+1 < sentence->size() ? m_WordCache.find( start , start+1 , sentence ) : g_emptyWord ;
    const CWord &last_twochar_last_word = start>1 ? m_WordCache.find( start-2 , start-1 , sentence ) : g_emptyWord ;
    const CWord &two_char = index > 0 ? m_WordCache.find( start-1 , start, sentence) : g_emptyWord;
    const CWord &lastword_firstchar = index > 0 ? m_WordCache.find( last_start , start , sentence ) : g_emptyWord ;
    const CWord &currentword_lastchar = index > 0 ? m_WordCache.find( start-1 , end , sentence) : g_emptyWord ;
    const CWord &currentword_lasttwochar = start > 1 ? m_WordCache.find( start-2 , end , sentence ) : g_emptyWord ;
    const CWord &lastword_firsttwochar = index > 0 && start+1 < sentence->size() ? m_WordCache.find( last_start , start+1 , sentence ) : g_emptyWord ;
    const CWord &three_char = length == 1 && start > 0 && end < sentence->size()-1 ? m_WordCache.find( start-1 , end+1 , sentence ) : g_emptyWord ;

    CTwoWords two_word;

    // abstd::cout the tags
    const CTag tag = item->getTag(index);
    const CTag last_tag = index>0 ? item->getTag(index-1) : CTag::SENTENCE_BEGIN;
    const CTag second_last_tag = index>1 ? item->getTag(index-2) : CTag::SENTENCE_BEGIN;
    const CTagSet<CTag, 2> tag_bigram(encodeTags(tag, last_tag));
    const CTagSet<CTag, 3> tag_trigram(encodeTags(tag, last_tag, second_last_tag));
    static CTaggedWord<CTag, TAG_SEPARATOR> wt1, wt2;
    static CTwoTaggedWords wt12;

    long int first_char_cat = m_weights->m_mapCharTagDictionary.lookup(first_char) | (1<<tag.code()) ;
    long int last_char_cat = m_weights->m_mapCharTagDictionary.lookup(last_char) | (1<<tag.code()) ;

    nReturn = m_weights->m_mapCurrentTag.getScore( std::make_pair(word, tag) , m_nScoreIndex ) ;
    nReturn += m_weights->m_mapLastTagByTag.getScore( tag_bigram , m_nScoreIndex ) ;
    nReturn += m_weights->m_mapLastTwoTagsByTag.getScore( tag_trigram , m_nScoreIndex ) ;
    if ( start > 0 ) {
        if ( last_length <= 2 ) nReturn += m_weights->m_mapTagByLastWord.getScore( std::make_pair(last_word, tag) , m_nScoreIndex ) ;
        if ( length <= 2 ) nReturn += m_weights->m_mapLastTagByWord.getScore( std::make_pair(word, last_tag) , m_nScoreIndex ) ;
        if ( length <= 2 ) nReturn += m_weights->m_mapTagByWordAndPrevChar.getScore( std::make_pair(currentword_lastchar, tag) , m_nScoreIndex ) ;
        if ( last_length <= 2 ) nReturn += m_weights->m_mapTagByWordAndNextChar.getScore( std::make_pair(lastword_firstchar, last_tag) , m_nScoreIndex) ;

    }
    if ( length == 1 ) {
        if ( start > 0 && end < sentence->size()-1 )
            nReturn += m_weights->m_mapTagOfOneCharWord.getScore( std::make_pair(three_char, tag) , m_nScoreIndex ) ;
    }
    else {
        nReturn += m_weights->m_mapTagByFirstChar.getScore( std::make_pair(first_char, tag) , m_nScoreIndex ) ;
        nReturn += m_weights->m_mapTagByLastChar.getScore( std::make_pair(last_char, tag) , m_nScoreIndex ) ;
        nReturn += m_weights->m_mapTagByFirstCharCat.getScore( std::make_pair(first_char_cat, tag) , m_nScoreIndex ) ;
        nReturn += m_weights->m_mapTagByLastCharCat.getScore( std::make_pair(last_char_cat, tag) , m_nScoreIndex ) ;

        for ( int j = 0 ; j < item->getWordLength( index ) ; ++ j ) {
            if ( j > 0 && j < item->getWordLength( index )-1 )
                nReturn += m_weights->m_mapTagByChar.getScore( std::make_pair(m_WordCache.find(start+j, start+j, sentence), tag) , m_nScoreIndex );
            if ( j > 0 ) {
                wt1.load( m_WordCache.find(start+j, start+j, sentence), tag );
                wt2.load( first_char );
                wt12.refer(&wt1, &wt2);
                nReturn += m_weights->m_mapTaggedCharByFirstChar.getScore( wt12, m_nScoreIndex );
                if ( m_WordCache.find(start+j, start+j, sentence) == m_WordCache.find(start+j-1, start+j-1, sentence))
                    nReturn += m_weights->m_mapRepeatedCharByTag.getScore( std::make_pair(m_WordCache.find(start+j, start+j, sentence), tag) , m_nScoreIndex );
            }
            if ( j < item->getWordLength( index )-1 ) {
                wt1.load( m_WordCache.find(start+j, start+j, sentence), tag );
                wt2.load( last_char );
                wt12.refer(&wt1, &wt2);
                nReturn += m_weights->m_mapTaggedCharByLastChar.getScore( wt12 , m_nScoreIndex );
            }
        }
    }

    return nReturn;
}
Exemplo n.º 4
0
SCORE_TYPE CTagger::getOrUpdateLocalScore( const CStringVector *sentence, const CStateItem *item, unsigned long index, SCORE_TYPE amount, unsigned long round ) {
   static SCORE_TYPE nReturn ;
   static unsigned long last_start , last_length ;
   static unsigned long start , end , length , word_length ; // word length is the un-normalised version
   // about the words
   start = item->getWordStart( index ) ;
   end = item->getWordEnd( index ) ;
   length = item->getWordLength( index ) ;

   last_start = index > 0 ? item->getWordStart( index-1 ) : 0 ;
   last_length = index > 0 ? item->getWordLength( index-1 ) : 0 ;
   word_length = length ;  // use word_length instead of item->getWordLength() because the length can include " ".

   const CWord &word = amount==0 ? m_WordCache.find( start , end , sentence )
                                 : m_WordCache.replace( start , end , sentence ) ;

   const CWord &last_word =  index > 0 ? ( amount==0 ? m_WordCache.find( last_start , start-1 , sentence )
                                                     : m_WordCache.replace( last_start , start-1 , sentence ) )
                                       : g_emptyWord ;

   // about the length
   if( length > LENGTH_MAX-1 ) length = LENGTH_MAX-1 ;
   if( last_length > LENGTH_MAX-1 ) last_length = LENGTH_MAX-1 ;

   // about the chars
   const CWord &first_char = amount==0 ? m_WordCache.find( start , start , sentence )
                                      : m_WordCache.replace( start , start , sentence ) ;
   const CWord &last_char = amount==0 ? m_WordCache.find( end , end , sentence )
                                     : m_WordCache.replace( end , end , sentence ) ;
   const CWord &first_char_last_word = index > 0 ? ( amount==0 ? m_WordCache.find( last_start , last_start , sentence )
                                                               : m_WordCache.replace( last_start , last_start , sentence ) )
                                                 : g_emptyWord ;
   const CWord &last_char_last_word = index > 0 ? ( amount==0 ? m_WordCache.find( start-1 , start-1 , sentence)
                                                              : m_WordCache.replace( start-1 , start-1 , sentence) )
                                                : g_emptyWord ;
   const CWord &two_char = index > 0 ? ( amount == 0 ? m_WordCache.find( start-1 , start, sentence)
                                                     : m_WordCache.replace( start-1 , start, sentence) )
                                     : g_emptyWord ;
   const CWord &lastword_firstchar = index > 0 ? ( amount==0 ? m_WordCache.find( last_start , start , sentence )
                                                             : m_WordCache.replace( last_start , start , sentence ) )
                                               : g_emptyWord ;
   const CWord &currentword_lastchar = index > 0 ? ( amount==0 ? m_WordCache.find( start-1 , end , sentence)
                                                               : m_WordCache.replace( start-1 , end , sentence) )
                                                 : g_emptyWord ;
   const CWord &three_char = ( length == 1 && start > 0 && end < sentence->size()-1 )                   ?
                                      ( amount==0 ? m_WordCache.find( start-1 , end+1 , sentence )
                                                  : m_WordCache.replace( start-1 , end+1 , sentence ) ) : g_emptyWord ;

   static CTwoWords two_word , first_and_last_char , firstchars_twoword , lastchars_twoword ;
   if (amount==0) {
      two_word.refer( &word , &last_word ) ;
      first_and_last_char.refer( &first_char , &last_char ) ;
      firstchars_twoword.refer( &first_char_last_word , &first_char ) ;
      lastchars_twoword.refer( &last_char_last_word , &last_char ) ;
   }
   else {
      two_word.allocate( word, last_word ) ;
      first_and_last_char.allocate( first_char, last_char ) ;
      firstchars_twoword.allocate( first_char_last_word, first_char ) ;
      lastchars_twoword.allocate( last_char_last_word, last_char ) ;
   }

   // about the tags
   const CTag &tag = item->getTag( index ) ;
   const CTag &last_tag = index>0 ? item->getTag( index-1 ) : CTag(CTag::SENTENCE_BEGIN) ;
   const CTag &second_last_tag = index>1 ? item->getTag(index-2) : CTag(CTag::SENTENCE_BEGIN) ;

   static CTaggedWord<CTag, TAG_SEPARATOR> wt1, wt2;
   static CTwoTaggedWords wt12;

   unsigned long long first_char_cat = m_weights->m_mapCharTagDictionary.lookup(first_char) | (static_cast<unsigned long long>(1)<<tag.code()) ;
   unsigned long long last_char_cat = m_weights->m_mapCharTagDictionary.lookup(last_char) | (static_cast<unsigned long long>(1)<<tag.code()) ;

   static int j ;

   // adding scores with features
   nReturn = m_weights->m_mapSeenWords.getOrUpdateScore( word , m_nScoreIndex , amount , round ) ;
   nReturn += m_weights->m_mapLastWordByWord.getOrUpdateScore( two_word , m_nScoreIndex , amount , round ) ;

   if ( length == 1 ) {
      nReturn += m_weights->m_mapOneCharWord.getOrUpdateScore( word , m_nScoreIndex , amount , round ) ;
   }
   else {
      nReturn += m_weights->m_mapFirstAndLastChars.getOrUpdateScore( first_and_last_char , m_nScoreIndex , amount , round ) ;

      nReturn += m_weights->m_mapLengthByFirstChar.getOrUpdateScore( std::make_pair(first_char, length) , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapLengthByLastChar.getOrUpdateScore( std::make_pair(last_char, length) , m_nScoreIndex , amount , round ) ;

      for (j=0; j<word_length-1; ++j)
         nReturn += m_weights->m_mapConsecutiveChars.getOrUpdateScore(
                                    amount==0 ? m_WordCache.find(start+j, start+j+1, sentence)
                                              : m_WordCache.replace(start+j, start+j+1, sentence) ,
                               m_nScoreIndex, amount, round ) ;
   }
   if ( start > 0 ) {
      nReturn += m_weights->m_mapSeparateChars.getOrUpdateScore( two_char , m_nScoreIndex , amount , round ) ;

      nReturn += m_weights->m_mapCurrentWordLastChar.getOrUpdateScore( currentword_lastchar , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapLastWordFirstChar.getOrUpdateScore( lastword_firstchar , m_nScoreIndex , amount , round ) ;

      nReturn += m_weights->m_mapFirstCharLastWordByWord.getOrUpdateScore( firstchars_twoword , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapLastWordByLastChar.getOrUpdateScore( lastchars_twoword , m_nScoreIndex , amount , round ) ;

      nReturn += m_weights->m_mapLengthByLastWord.getOrUpdateScore( std::make_pair(last_word, length) , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapLastLengthByWord.getOrUpdateScore( std::make_pair(word, last_length), m_nScoreIndex , amount , round ) ;
   }

   nReturn += m_weights->m_mapCurrentTag.getOrUpdateScore( std::make_pair(word, tag) , m_nScoreIndex , amount , round ) ;
   nReturn += m_weights->m_mapLastTagByTag.getOrUpdateScore( CTagSet<CTag, 2>(encodeTags( tag, last_tag )), m_nScoreIndex , amount , round ) ;
   nReturn += m_weights->m_mapLastTwoTagsByTag.getOrUpdateScore( CTagSet<CTag, 3>(encodeTags( tag, last_tag, second_last_tag )), m_nScoreIndex , amount , round ) ;
   if ( start > 0 ) {
      if ( last_length <= 2 ) nReturn += m_weights->m_mapTagByLastWord.getOrUpdateScore( std::make_pair(last_word, tag) , m_nScoreIndex , amount , round ) ;
      if ( length <= 2 ) nReturn += m_weights->m_mapLastTagByWord.getOrUpdateScore( std::make_pair(word, last_tag) , m_nScoreIndex , amount , round ) ;
      if ( length <= 2 ) nReturn += m_weights->m_mapTagByWordAndPrevChar.getOrUpdateScore( std::make_pair(currentword_lastchar, tag) , m_nScoreIndex , amount , round ) ;
      if ( last_length <= 2 ) nReturn += m_weights->m_mapTagByWordAndNextChar.getOrUpdateScore( std::make_pair(lastword_firstchar, last_tag) , m_nScoreIndex , amount , round ) ;
   }
   if ( length == 1 ) {
      if ( start > 0 && end < sentence->size()-1 )
         nReturn += m_weights->m_mapTagOfOneCharWord.getOrUpdateScore( std::make_pair(three_char, tag) , m_nScoreIndex , amount , round ) ;
   }
   else {
      nReturn += m_weights->m_mapTagByFirstChar.getOrUpdateScore( std::make_pair(first_char, tag) , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapTagByLastChar.getOrUpdateScore( std::make_pair(last_char, tag) , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapTagByFirstCharCat.getOrUpdateScore( std::make_pair(first_char_cat, tag) , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapTagByLastCharCat.getOrUpdateScore( std::make_pair(last_char_cat, tag) , m_nScoreIndex , amount , round ) ;

      for ( j = 0 ; j < word_length ; ++j ) {

         if ( j > 0 && j < word_length-1 )
            nReturn += m_weights->m_mapTagByChar.getOrUpdateScore(
                                    std::make_pair( amount==0 ? m_WordCache.find(start+j, start+j, sentence)
                                                         : m_WordCache.replace(start+j, start+j, sentence), tag),
                                    m_nScoreIndex , amount , round ) ;

         if ( j > 0 ) {

            if (amount==0) {
               wt1.load( m_WordCache.find(start+j, start+j, sentence) , tag );
               wt2.load(first_char);
               wt12.refer(&wt1, &wt2);
            }
            else {
               wt1.load( m_WordCache.replace(start+j, start+j, sentence) , tag );
               wt2.load(first_char);
               wt12.allocate(wt1, wt2);
            }
            nReturn += m_weights->m_mapTaggedCharByFirstChar.getOrUpdateScore(wt12, m_nScoreIndex, amount, round) ;

            if ( m_WordCache.find(start+j, start+j, sentence) == m_WordCache.find(start+j-1, start+j-1, sentence))
               nReturn += m_weights->m_mapRepeatedCharByTag.getOrUpdateScore(
                                       std::make_pair( amount==0 ? m_WordCache.find(start+j, start+j, sentence)
                                                            : m_WordCache.replace(start+j, start+j, sentence), tag),
                                       m_nScoreIndex, amount, round) ;
         }
         if ( j < word_length-1 ) {
            if (amount==0) {
               wt1.load( m_WordCache.find(start+j, start+j, sentence) , tag );
               wt2.load(last_char);
               wt12.refer(&wt1, &wt2);
            }
            else {
               wt1.load( m_WordCache.replace(start+j, start+j, sentence) , tag );
               wt2.load(last_char);
               wt12.allocate(wt1, wt2);
            }
            nReturn += m_weights->m_mapTaggedCharByLastChar.getOrUpdateScore(wt12, m_nScoreIndex, amount, round) ;
         }
      }
   }

   return nReturn;
}
Exemplo n.º 5
0
inline void CDepParser::getOrUpdateStackScore( const CStateItem *item, CPackedScoreType<SCORE_TYPE, action::MAX> &retval, const unsigned &action, SCORE_TYPE amount , int round ) {

   const int &st_index = item->stackempty() ? -1 : item->stacktop(); // stack top
   const int &sth_index = st_index == -1 ? -1 : item->head(st_index); // stack top head
   const int &sthh_index = sth_index == -1 ? -1 : item->head(sth_index); // stack top head
   const int &stld_index = st_index == -1 ? -1 : item->leftdep(st_index); // leftmost dep of stack
   const int &strd_index = st_index == -1 ? -1 : item->rightdep(st_index); // rightmost dep st
   const int &stl2d_index = stld_index == -1 ? -1 : item->sibling(stld_index); // left 2ndmost dep of stack
   const int &str2d_index = strd_index == -1 ? -1 : item->sibling(strd_index); // right 2ndmost dep st
   const int &n0_index = item->size()==m_lCache.size() ? -1 : item->size(); // next
   assert(n0_index<static_cast<int>(m_lCache.size())); // the next index shouldn't exceed sentence
   const int &n0ld_index = n0_index==-1 ? -1 : item->leftdep(n0_index); // leftmost dep of next
   const int &n0l2d_index = n0ld_index==-1 ? -1 : item->sibling(n0ld_index); // leftmost dep of next
   const int &ht_index = item->headstackempty() ? -1 : item->headstacktop(); // headstack
   const int &ht2_index = item->headstacksize()<2 ? -1 : item->headstackitem(item->headstacksize()-2); // headstack 2nd
   static int n1_index;
   static int n2_index;
   static int n3_index;
   n1_index = (n0_index != -1 && n0_index+1<m_lCache.size()) ? n0_index+1 : -1 ;
   n2_index = (n0_index != -1 && n0_index+2<m_lCache.size()) ? n0_index+2 : -1 ;
   n3_index = (n0_index != -1 && n0_index+3<m_lCache.size()) ? n0_index+3 : -1 ;

   static CPackedScoreType<SCORE_TYPE, action::MAX> freq;

   const CTaggedWord<CTag, TAG_SEPARATOR> &st_word_tag = st_index==-1 ? g_emptyTaggedWord : m_lCache[st_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &sth_word_tag = sth_index==-1 ? g_emptyTaggedWord : m_lCache[sth_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &sthh_word_tag = sthh_index==-1 ? g_emptyTaggedWord : m_lCache[sthh_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &stld_word_tag = stld_index==-1 ? g_emptyTaggedWord : m_lCache[stld_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &strd_word_tag = strd_index==-1 ? g_emptyTaggedWord : m_lCache[strd_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &stl2d_word_tag = stl2d_index==-1 ? g_emptyTaggedWord : m_lCache[stl2d_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &str2d_word_tag = str2d_index==-1 ? g_emptyTaggedWord : m_lCache[str2d_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n0_word_tag = n0_index==-1 ? g_emptyTaggedWord : m_lCache[n0_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n0ld_word_tag = n0ld_index==-1 ? g_emptyTaggedWord : m_lCache[n0ld_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n0l2d_word_tag = n0l2d_index==-1 ? g_emptyTaggedWord : m_lCache[n0l2d_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n1_word_tag = n1_index==-1 ? g_emptyTaggedWord : m_lCache[n1_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &n2_word_tag = n2_index==-1 ? g_emptyTaggedWord : m_lCache[n2_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &ht_word_tag = ht_index==-1 ? g_emptyTaggedWord : m_lCache[ht_index];
   const CTaggedWord<CTag, TAG_SEPARATOR> &ht2_word_tag = ht2_index==-1 ? g_emptyTaggedWord : m_lCache[ht2_index];

   const CWord &st_word = st_word_tag.word;
   const CWord &sth_word = sth_word_tag.word;
   const CWord &sthh_word = sthh_word_tag.word;
   const CWord &stld_word = stld_word_tag.word;
   const CWord &strd_word = strd_word_tag.word;
   const CWord &stl2d_word = stl2d_word_tag.word;
   const CWord &str2d_word = str2d_word_tag.word;
   const CWord &n0_word = n0_word_tag.word;
   const CWord &n0ld_word = n0ld_word_tag.word;
   const CWord &n0l2d_word = n0l2d_word_tag.word;
   const CWord &n1_word = n1_word_tag.word;
   const CWord &n2_word = n2_word_tag.word;
   const CWord &ht_word = ht_word_tag.word;
   const CWord &ht2_word = ht2_word_tag.word;

   const CTag &st_tag = st_word_tag.tag;
   const CTag &sth_tag = sth_word_tag.tag;
   const CTag &sthh_tag = sthh_word_tag.tag;
   const CTag &stld_tag = stld_word_tag.tag;
   const CTag &strd_tag = strd_word_tag.tag;
   const CTag &stl2d_tag = stl2d_word_tag.tag;
   const CTag &str2d_tag = str2d_word_tag.tag;
   const CTag &n0_tag = n0_word_tag.tag;
   const CTag &n0ld_tag = n0ld_word_tag.tag;
   const CTag &n0l2d_tag = n0l2d_word_tag.tag;
   const CTag &n1_tag = n1_word_tag.tag;
   const CTag &n2_tag = n2_word_tag.tag;
   const CTag &ht_tag = ht_word_tag.tag;
   const CTag &ht2_tag = ht2_word_tag.tag;

   const int &st_label = st_index==-1 ? CDependencyLabel::NONE : item->label(st_index);
   const int &sth_label = sth_index==-1 ? CDependencyLabel::NONE : item->label(sth_index);
   const int &stld_label = stld_index==-1 ? CDependencyLabel::NONE : item->label(stld_index);
   const int &strd_label = strd_index==-1 ? CDependencyLabel::NONE : item->label(strd_index);
   const int &stl2d_label = stl2d_index==-1 ? CDependencyLabel::NONE : item->label(stl2d_index);
   const int &str2d_label = str2d_index==-1 ? CDependencyLabel::NONE : item->label(strd_index);
   const int &n0ld_label = n0ld_index==-1 ? CDependencyLabel::NONE : item->label(n0ld_index);
   const int &n0l2d_label = n0l2d_index==-1 ? CDependencyLabel::NONE : item->label(n0l2d_index);

   static int st_n0_dist;
   st_n0_dist = encodeLinkDistance(st_index, n0_index);

   const int st_rarity = st_index==-1?0:item->rightarity(st_index);
   const int st_larity = st_index==-1?0:item->leftarity(st_index);
   const int n0_larity = n0_index==-1?0:item->leftarity(n0_index);

   const CSetOfTags<CDependencyLabel> &st_rtagset = st_index==-1?CSetOfTags<CDependencyLabel>():item->righttagset(st_index);
   const CSetOfTags<CDependencyLabel> &st_ltagset = st_index==-1?CSetOfTags<CDependencyLabel>():item->lefttagset(st_index);
   const CSetOfTags<CDependencyLabel> &n0_ltagset = n0_index==-1?CSetOfTags<CDependencyLabel>():item->lefttagset(n0_index);

   static CTwoTaggedWords st_word_tag_n0_word_tag ;
   static CTwoWords st_word_n0_word ;
   if ( amount == 0 ) {
      st_word_tag_n0_word_tag.refer( &st_word_tag, &n0_word_tag );
      st_word_n0_word.refer( &st_word, &n0_word );
   }
   else {
      st_word_tag_n0_word_tag.allocate( st_word_tag, n0_word_tag );
      st_word_n0_word.allocate( st_word, n0_word );
   }

   static CTuple2<CWord, CTag> word_tag;
   static CTuple2<CWord, int> word_int;
   static CTuple2<CTag, int> tag_int;
   static CTuple3<CWord, CTag, CTag> word_tag_tag;
   static CTuple3<CWord, CWord, CTag> word_word_tag;
   static CTuple3<CWord, CWord, int> word_word_int;
   static CTuple3<CWord, CWord, CWord> word_word_word;
   static CTuple3<CTag, CTag, int> tag_tag_int;
   static CTuple2<CWord, CSetOfTags<CDependencyLabel> > word_tagset;
   static CTuple2<CTag, CSetOfTags<CDependencyLabel> > tag_tagset;

   static CTuple2<CTag, long> ti;

   // single
   if (st_index != -1) {
      normal_and_meta( m_mapSTw, st_word );
      normal_and_meta( m_mapSTt, st_tag );
      normal_and_meta( m_mapSTwt, st_word_tag );
   }

   if (n0_index != -1) {
      normal_and_meta( m_mapN0w, n0_word );
      normal_and_meta( m_mapN0t, n0_tag );
      normal_and_meta( m_mapN0wt, n0_word_tag );
   }

   if (n1_index != -1) {
      normal_and_meta( m_mapN1w, n1_word );
      normal_and_meta( m_mapN1t, n1_tag );
      normal_and_meta( m_mapN1wt, n1_word_tag );
   }

   if (n2_index != -1) {
      normal_and_meta( m_mapN2w, n2_word );
      normal_and_meta( m_mapN2t, n2_tag );
      normal_and_meta( m_mapN2wt, n2_word_tag );
   }

   if (sth_index != -1) {
      normal_and_meta( m_mapSTHw, sth_word );
      normal_and_meta( m_mapSTHt, sth_tag );
      normal_and_meta( m_mapSTi, st_label);
   }

   if (sthh_index != -1) {
      normal_and_meta( m_mapSTHHw, sthh_word );
      normal_and_meta( m_mapSTHHt, sthh_tag );
      normal_and_meta( m_mapSTHi, sth_label );
   }

   if (stld_index != -1) {
      normal_and_meta( m_mapSTLDw, stld_word );
      normal_and_meta( m_mapSTLDt, stld_tag );
      normal_and_meta( m_mapSTLDi, stld_label );
   }

   if (strd_index != -1) {
      normal_and_meta( m_mapSTRDw, strd_word );
      normal_and_meta( m_mapSTRDt, strd_tag );
      normal_and_meta( m_mapSTRDi, strd_label );
   }

   if (n0ld_index != -1) {
      normal_and_meta( m_mapN0LDw, n0ld_word );
      normal_and_meta( m_mapN0LDt, n0ld_tag );
      normal_and_meta( m_mapN0LDi, n0ld_label );
   }

   if (stl2d_index != -1) {
      normal_and_meta( m_mapSTL2Dw, stl2d_word );
      normal_and_meta( m_mapSTL2Dt, stl2d_tag );
      normal_and_meta( m_mapSTL2Di, stl2d_label );
   }

   if (str2d_index != -1) {
      normal_and_meta( m_mapSTR2Dw, str2d_word );
      normal_and_meta( m_mapSTR2Dt, str2d_tag );
      normal_and_meta( m_mapSTR2Di, str2d_label );
   }

   if (n0l2d_index != -1) {
      normal_and_meta( m_mapN0L2Dw, n0l2d_word );
      normal_and_meta( m_mapN0L2Dt, n0l2d_tag );
      normal_and_meta( m_mapN0L2Di, n0l2d_label );
   }

   // s0 and n0
   if (st_index != -1) {
      normal_and_meta( m_mapSTwtN0wt, st_word_tag_n0_word_tag );
      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &st_tag);
      normal_and_meta( m_mapSTwtN0w, word_word_tag );
      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &n0_tag);
      normal_and_meta( m_mapSTwN0wt, word_word_tag );
      refer_or_allocate_tuple3(word_tag_tag, &st_word, &st_tag, &n0_tag);
      normal_and_meta( m_mapSTwtN0t, word_tag_tag );
      refer_or_allocate_tuple3(word_tag_tag, &n0_word, &st_tag, &n0_tag);
      normal_and_meta( m_mapSTtN0wt, word_tag_tag );
      normal_and_meta( m_mapSTwN0w, st_word_n0_word );
      normal_and_meta( m_mapSTtN0t, (CTagSet<CTag,2>(encodeTags(st_tag,n0_tag))) );
   }

   if (st_index != -1 && n0_index != -1) {
      normal_and_meta( m_mapN0tN1t, (CTagSet<CTag,2>(encodeTags(n0_tag,n1_tag))) );

      normal_and_meta(m_mapN0tN1tN2t, (CTagSet<CTag,3>(encodeTags(n0_tag,n1_tag,n2_tag))) );

      normal_and_meta( m_mapSTtN0tN1t, (CTagSet<CTag,3>(encodeTags(st_tag,n0_tag,n1_tag))) );

      refer_or_allocate_tuple3(word_word_word, &st_word, &n0_word, &n1_word);
      normal_and_meta( m_mapSTwN0wN1w, word_word_word );

      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &n1_tag);
      normal_and_meta( m_mapSTwN0wN1t, word_word_tag );

      normal_and_meta( m_mapSTtN0tN0LDt, (CTagSet<CTag,3>(encodeTags(st_tag,n0_tag,n0ld_tag))) );

      refer_or_allocate_tuple3(word_word_word, &st_word, &n0_word, &n0ld_word);
      normal_and_meta( m_mapSTwN0wN0LDw, word_word_word );

      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &n0ld_tag);
      normal_and_meta( m_mapSTwN0wN0LDt, word_word_tag );

      normal_and_meta(m_mapN0tN0LDtN0L2Dt, (CTagSet<CTag,3>(encodeTags(n0_tag,n0ld_tag,n0l2d_tag))) );
   }

   if (st_index!=-1) {
      normal_and_meta( m_mapSTHtSTtN0t, (CTagSet<CTag,3>(encodeTags(sth_tag,st_tag,n0_tag))) );

      refer_or_allocate_tuple3(word_word_word, &sth_word, &st_word, &n0_word);
      normal_and_meta( m_mapSTHwSTwN0w, word_word_word );

      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &sth_tag);
      normal_and_meta( m_mapSTHtSTwN0w, word_word_tag );

      normal_and_meta( m_mapSTHHtSTHtSTt, (CTagSet<CTag,3>(encodeTags(sthh_tag,sth_tag,st_tag))) );

      normal_and_meta( m_mapSTtSTLDtN0t, (CTagSet<CTag,3>(encodeTags(st_tag,stld_tag,n0_tag))) );

      refer_or_allocate_tuple3(word_word_word, &st_word, &stld_word, &n0_word);
      normal_and_meta( m_mapSTwSTLDwN0w, word_word_word );

      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &stld_tag);
      normal_and_meta( m_mapSTwSTLDtN0w, word_word_tag );

      normal_and_meta( m_mapSTtSTLDtSTL2Dt, (CTagSet<CTag,3>(encodeTags(st_tag,stld_tag,stl2d_tag))) );

      normal_and_meta( m_mapSTtSTRDtN0t, (CTagSet<CTag,3>(encodeTags(st_tag,strd_tag,n0_tag))) );

      refer_or_allocate_tuple3(word_word_word, &st_word, &strd_word, &n0_word);
      normal_and_meta( m_mapSTwSTRDwN0w, word_word_word );

      refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &strd_tag);
      normal_and_meta( m_mapSTwSTRDtN0w, word_word_tag );

      normal_and_meta( m_mapSTtSTRDtSTR2Dt, (CTagSet<CTag,3>(encodeTags(st_tag,strd_tag,str2d_tag))) );
   }

   // distance
   if (st_index!=-1 && n0_index!=-1) {
      refer_or_allocate_tuple2(word_int, &st_word, &st_n0_dist);
      normal_and_meta( m_mapSTwd, word_int );
      refer_or_allocate_tuple2(tag_int, &st_tag, &st_n0_dist);
      normal_and_meta( m_mapSTtd, tag_int );
      refer_or_allocate_tuple2(word_int, &n0_word, &st_n0_dist);
      normal_and_meta( m_mapN0wd, word_int );
      refer_or_allocate_tuple2(tag_int, &n0_tag, &st_n0_dist);
      normal_and_meta( m_mapN0td, tag_int );
      refer_or_allocate_tuple3(word_word_int, &st_word, &n0_word, &st_n0_dist);
      normal_and_meta( m_mapSTwN0wd, word_word_int );
      refer_or_allocate_tuple3(tag_tag_int, &st_tag, &n0_tag, &st_n0_dist);
      normal_and_meta( m_mapSTtN0td, tag_tag_int );
   }

   // st arity
   if (st_index != -1) {
      refer_or_allocate_tuple2(word_int, &st_word, &st_rarity);
      normal_and_meta( m_mapSTwra, word_int );
      refer_or_allocate_tuple2(tag_int, &st_tag, &st_rarity);
      normal_and_meta( m_mapSTtra, tag_int );
      refer_or_allocate_tuple2(word_int, &st_word, &st_larity);
      normal_and_meta( m_mapSTwla, word_int );
      refer_or_allocate_tuple2(tag_int, &st_tag, &st_larity);
      normal_and_meta( m_mapSTtla, tag_int );
   }

   // n0 arity
   if (n0_index!=-1) {
      refer_or_allocate_tuple2(word_int, &n0_word, &n0_larity);
      normal_and_meta( m_mapN0wla, word_int );
      refer_or_allocate_tuple2(tag_int, &n0_tag, &n0_larity);
     normal_and_meta( m_mapN0tla, tag_int );
   }

   // st labelset
   if (st_index != -1){
      refer_or_allocate_tuple2(word_tagset, &st_word, &st_rtagset);
      normal_and_meta( m_mapSTwrp, word_tagset );
      refer_or_allocate_tuple2(tag_tagset, &st_tag, &st_rtagset);
      normal_and_meta( m_mapSTtrp, tag_tagset );

      refer_or_allocate_tuple2(word_tagset, &st_word, &st_ltagset);
      normal_and_meta( m_mapSTwlp, word_tagset );
      refer_or_allocate_tuple2(tag_tagset, &st_tag, &st_ltagset);
      normal_and_meta( m_mapSTtlp, tag_tagset );
   }

   // n0 labelset
   if (n0_index != -1){
      refer_or_allocate_tuple2(word_tagset, &n0_word, &n0_ltagset);
      normal_and_meta( m_mapN0wlp, word_tagset );
      refer_or_allocate_tuple2(tag_tagset, &n0_tag, &n0_ltagset);
      normal_and_meta( m_mapN0tlp, tag_tagset );
   }

   if (m_bCoNLL) {

      static unsigned i;

      if (st_index!=-1) {
         if (!m_lCacheCoNLLLemma[st_index].empty()) {
            normal_and_meta( m_mapSTl, m_lCacheCoNLLLemma[st_index] );
         }
         if (m_lCacheCoNLLCPOS[st_index] != CCoNLLCPOS()) {
            normal_and_meta( m_mapSTc, m_lCacheCoNLLCPOS[st_index] );
         }
         for (i=0; i<m_lCacheCoNLLFeats[st_index].size(); ++i) {
            normal_and_meta( m_mapSTf, m_lCacheCoNLLFeats[st_index][i] );
         }
      } // if (st_index!=-1)

      if (n0_index!=-1) {
         if (!m_lCacheCoNLLLemma[n0_index].empty()) {
            normal_and_meta( m_mapN0l, m_lCacheCoNLLLemma[n0_index] );
         }
         if (m_lCacheCoNLLCPOS[n0_index] != CCoNLLCPOS()) {
            normal_and_meta( m_mapN0c, m_lCacheCoNLLCPOS[n0_index] );
         }
         for (i=0; i<m_lCacheCoNLLFeats[n0_index].size(); ++i) {
            normal_and_meta( m_mapN0f, m_lCacheCoNLLFeats[n0_index][i] );
         }
      } // if (n0_index!=-1)

      if (n1_index!=-1) {
         if (!m_lCacheCoNLLLemma[n1_index].empty()) {
            normal_and_meta( m_mapN1l, m_lCacheCoNLLLemma[n1_index] );
         }
         if (m_lCacheCoNLLCPOS[n1_index] != CCoNLLCPOS()) {
            normal_and_meta( m_mapN1c, m_lCacheCoNLLCPOS[n1_index] );
         }
         for (i=0; i<m_lCacheCoNLLFeats[n1_index].size(); ++i) {
            normal_and_meta( m_mapN1f, m_lCacheCoNLLFeats[n1_index][i] );
         }
      } // if (n1_index!=-1)
   }
}
Exemplo n.º 6
0
inline void CDepParser::getOrUpdateStackScore( const CStateItem *item, CPackedScoreType<SCORE_TYPE, action::MAX> &retval, const unsigned &action, SCORE_TYPE amount , int round ) {

    const int &st_index = item->stackempty() ? -1 : item->stacktop(); // stack top
    const int &sth_index = st_index == -1 ? -1 : item->head(st_index); // stack top head
    const int &sthh_index = sth_index == -1 ? -1 : item->head(sth_index); // stack top head
    const int &stld_index = st_index == -1 ? -1 : item->leftdep(st_index); // leftmost dep of stack
    const int &strd_index = st_index == -1 ? -1 : item->rightdep(st_index); // rightmost dep st
    const int &stl2d_index = stld_index == -1 ? -1 : item->sibling(stld_index); // left 2ndmost dep of stack
    const int &str2d_index = strd_index == -1 ? -1 : item->sibling(strd_index); // right 2ndmost dep st
    const int &n0_index = item->size()==m_lCache.size() ? -1 : item->size(); // next
    assert(n0_index<static_cast<int>(m_lCache.size())); // the next index shouldn't exceed sentence
    const int &n0ld_index = n0_index==-1 ? -1 : item->leftdep(n0_index); // leftmost dep of next
    const int &n0l2d_index = n0ld_index==-1 ? -1 : item->sibling(n0ld_index); // leftmost dep of next
    const int &ht_index = item->headstackempty() ? -1 : item->headstacktop(); // headstack
    const int &ht2_index = item->headstacksize()<2 ? -1 : item->headstackitem(item->headstacksize()-2); // headstack 2nd
    static int n1_index;
    static int n2_index;
    static int n3_index;
    n1_index = (n0_index != -1 && n0_index+1<m_lCache.size()) ? n0_index+1 : -1 ;
    n2_index = (n0_index != -1 && n0_index+2<m_lCache.size()) ? n0_index+2 : -1 ;
    n3_index = (n0_index != -1 && n0_index+3<m_lCache.size()) ? n0_index+3 : -1 ;

    const CTaggedWord<CTag, TAG_SEPARATOR> &st_word_tag = st_index==-1 ? g_emptyTaggedWord : m_lCache[st_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &sth_word_tag = sth_index==-1 ? g_emptyTaggedWord : m_lCache[sth_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &sthh_word_tag = sthh_index==-1 ? g_emptyTaggedWord : m_lCache[sthh_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &stld_word_tag = stld_index==-1 ? g_emptyTaggedWord : m_lCache[stld_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &strd_word_tag = strd_index==-1 ? g_emptyTaggedWord : m_lCache[strd_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &stl2d_word_tag = stl2d_index==-1 ? g_emptyTaggedWord : m_lCache[stl2d_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &str2d_word_tag = str2d_index==-1 ? g_emptyTaggedWord : m_lCache[str2d_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &n0_word_tag = n0_index==-1 ? g_emptyTaggedWord : m_lCache[n0_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &n0ld_word_tag = n0ld_index==-1 ? g_emptyTaggedWord : m_lCache[n0ld_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &n0l2d_word_tag = n0l2d_index==-1 ? g_emptyTaggedWord : m_lCache[n0l2d_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &n1_word_tag = n1_index==-1 ? g_emptyTaggedWord : m_lCache[n1_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &n2_word_tag = n2_index==-1 ? g_emptyTaggedWord : m_lCache[n2_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &ht_word_tag = ht_index==-1 ? g_emptyTaggedWord : m_lCache[ht_index];
    const CTaggedWord<CTag, TAG_SEPARATOR> &ht2_word_tag = ht2_index==-1 ? g_emptyTaggedWord : m_lCache[ht2_index];

    const CWord &st_word = st_word_tag.word;
    const CWord &sth_word = sth_word_tag.word;
    const CWord &sthh_word = sthh_word_tag.word;
    const CWord &stld_word = stld_word_tag.word;
    const CWord &strd_word = strd_word_tag.word;
    const CWord &stl2d_word = stl2d_word_tag.word;
    const CWord &str2d_word = str2d_word_tag.word;
    const CWord &n0_word = n0_word_tag.word;
    const CWord &n0ld_word = n0ld_word_tag.word;
    const CWord &n0l2d_word = n0l2d_word_tag.word;
    const CWord &n1_word = n1_word_tag.word;
    const CWord &n2_word = n2_word_tag.word;
    const CWord &ht_word = ht_word_tag.word;
    const CWord &ht2_word = ht2_word_tag.word;

    const CTag &st_tag = st_word_tag.tag;
    const CTag &sth_tag = sth_word_tag.tag;
    const CTag &sthh_tag = sthh_word_tag.tag;
    const CTag &stld_tag = stld_word_tag.tag;
    const CTag &strd_tag = strd_word_tag.tag;
    const CTag &stl2d_tag = stl2d_word_tag.tag;
    const CTag &str2d_tag = str2d_word_tag.tag;
    const CTag &n0_tag = n0_word_tag.tag;
    const CTag &n0ld_tag = n0ld_word_tag.tag;
    const CTag &n0l2d_tag = n0l2d_word_tag.tag;
    const CTag &n1_tag = n1_word_tag.tag;
    const CTag &n2_tag = n2_word_tag.tag;
    const CTag &ht_tag = ht_word_tag.tag;
    const CTag &ht2_tag = ht2_word_tag.tag;

    const int &st_label = st_index==-1 ? CDependencyLabel::NONE : item->label(st_index);
    const int &sth_label = sth_index==-1 ? CDependencyLabel::NONE : item->label(sth_index);
    const int &stld_label = stld_index==-1 ? CDependencyLabel::NONE : item->label(stld_index);
    const int &strd_label = strd_index==-1 ? CDependencyLabel::NONE : item->label(strd_index);
    const int &stl2d_label = stl2d_index==-1 ? CDependencyLabel::NONE : item->label(stl2d_index);
    const int &str2d_label = str2d_index==-1 ? CDependencyLabel::NONE : item->label(str2d_index);
    const int &n0ld_label = n0ld_index==-1 ? CDependencyLabel::NONE : item->label(n0ld_index);
    const int &n0l2d_label = n0l2d_index==-1 ? CDependencyLabel::NONE : item->label(n0l2d_index);

    static int st_n0_dist;
    st_n0_dist = encodeLinkDistance(st_index, n0_index);

    const int st_rarity = st_index==-1?0:item->rightarity(st_index);
    const int st_larity = st_index==-1?0:item->leftarity(st_index);
    const int n0_larity = n0_index==-1?0:item->leftarity(n0_index);

    const CSetOfTags<CDependencyLabel> &st_rtagset = st_index==-1?CSetOfTags<CDependencyLabel>():item->righttagset(st_index);
    const CSetOfTags<CDependencyLabel> &st_ltagset = st_index==-1?CSetOfTags<CDependencyLabel>():item->lefttagset(st_index);
    const CSetOfTags<CDependencyLabel> &n0_ltagset = n0_index==-1?CSetOfTags<CDependencyLabel>():item->lefttagset(n0_index);

    static CTwoTaggedWords st_word_tag_n0_word_tag ;
    static CTwoWords st_word_n0_word ;
    if ( amount == 0 ) {
        st_word_tag_n0_word_tag.refer( &st_word_tag, &n0_word_tag );
        st_word_n0_word.refer( &st_word, &n0_word );
    }
    else {
        st_word_tag_n0_word_tag.allocate( st_word_tag, n0_word_tag );
        st_word_n0_word.allocate( st_word, n0_word );
    }

    static CTuple2<CWord, CTag> word_tag;
    static CTuple2<CWord, int> word_int;
    static CTuple2<CTag, int> tag_int;
    static CTuple3<CWord, CTag, CTag> word_tag_tag;
    static CTuple3<CWord, CWord, CTag> word_word_tag;
    static CTuple3<CWord, CWord, int> word_word_int;
    static CTuple3<CTag, CTag, int> tag_tag_int;
    static CTuple2<CWord, CSetOfTags<CDependencyLabel> > word_tagset;
    static CTuple2<CTag, CSetOfTags<CDependencyLabel> > tag_tagset;
    static CTuple3<unsigned long, unsigned long, unsigned long> int_int_int;

    static unsigned hpos, mpos, label;
    static unsigned long con;
    unsigned ac = action::getUnlabeledAction(action);

    // single
    if (st_index != -1) {
        cast_weights->m_mapSTw.getOrUpdateScore( retval, st_word, action, m_nScoreIndex, amount, round) ;
        cast_weights->m_mapSTt.getOrUpdateScore( retval, st_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTwt.getOrUpdateScore( retval, st_word_tag, action, m_nScoreIndex, amount, round) ;
    }

    if (n0_index != -1) {
        cast_weights->m_mapN0w.getOrUpdateScore( retval, n0_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN0t.getOrUpdateScore( retval, n0_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN0wt.getOrUpdateScore( retval, n0_word_tag, action, m_nScoreIndex, amount, round) ;
    }

    if (n1_index != -1) {
        cast_weights->m_mapN1w.getOrUpdateScore( retval, n1_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN1t.getOrUpdateScore( retval, n1_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN1wt.getOrUpdateScore( retval, n1_word_tag, action, m_nScoreIndex, amount, round) ;
    }

    if (n2_index != -1) {
        cast_weights->m_mapN2w.getOrUpdateScore( retval, n2_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN2t.getOrUpdateScore( retval, n2_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN2wt.getOrUpdateScore( retval, n2_word_tag, action, m_nScoreIndex, amount, round) ;
    }

    if (sth_index != -1) {
        cast_weights->m_mapSTHw.getOrUpdateScore( retval, sth_word, action, m_nScoreIndex, amount, round) ;
        cast_weights->m_mapSTHt.getOrUpdateScore( retval, sth_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTi.getOrUpdateScore( retval, st_label, action, m_nScoreIndex, amount, round) ;
    }

    if (sthh_index != -1) {
        cast_weights->m_mapSTHHw.getOrUpdateScore( retval, sthh_word, action, m_nScoreIndex, amount, round) ;
        cast_weights->m_mapSTHHt.getOrUpdateScore( retval, sthh_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTHi.getOrUpdateScore( retval, sth_label, action, m_nScoreIndex, amount, round) ;
    }

    if (stld_index != -1) {
        cast_weights->m_mapSTLDw.getOrUpdateScore( retval, stld_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTLDt.getOrUpdateScore( retval, stld_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTLDi.getOrUpdateScore( retval, stld_label, action, m_nScoreIndex, amount, round) ;
    }

    if (strd_index != -1) {
        cast_weights->m_mapSTRDw.getOrUpdateScore( retval, strd_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTRDt.getOrUpdateScore( retval, strd_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTRDi.getOrUpdateScore( retval, strd_label, action, m_nScoreIndex, amount, round) ;
    }

    if (n0ld_index != -1) {
        cast_weights->m_mapN0LDw.getOrUpdateScore( retval, n0ld_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN0LDt.getOrUpdateScore( retval, n0ld_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN0LDi.getOrUpdateScore( retval, n0ld_label, action, m_nScoreIndex, amount, round) ;
    }

    if (stl2d_index != -1) {
        cast_weights->m_mapSTL2Dw.getOrUpdateScore( retval, stl2d_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTL2Dt.getOrUpdateScore( retval, stl2d_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTL2Di.getOrUpdateScore( retval, stl2d_label, action, m_nScoreIndex, amount, round) ;
    }

    if (str2d_index != -1) {
        cast_weights->m_mapSTR2Dw.getOrUpdateScore( retval, str2d_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTR2Dt.getOrUpdateScore( retval, str2d_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTR2Di.getOrUpdateScore( retval, str2d_label, action, m_nScoreIndex, amount, round) ;
    }

    if (n0l2d_index != -1) {
        cast_weights->m_mapN0L2Dw.getOrUpdateScore( retval, n0l2d_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN0L2Dt.getOrUpdateScore( retval, n0l2d_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN0L2Di.getOrUpdateScore( retval, n0l2d_label, action, m_nScoreIndex, amount, round) ;
    }

    // s0 and n0
    if (st_index != -1) {
        cast_weights->m_mapSTwtN0wt.getOrUpdateScore( retval, st_word_tag_n0_word_tag, action, m_nScoreIndex, amount, round );
        refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &st_tag);
        cast_weights->m_mapSTwtN0w.getOrUpdateScore( retval, word_word_tag, action, m_nScoreIndex, amount, round ) ;
        refer_or_allocate_tuple3(word_word_tag, &st_word, &n0_word, &n0_tag);
        cast_weights->m_mapSTwN0wt.getOrUpdateScore( retval, word_word_tag, action, m_nScoreIndex, amount, round ) ;
        refer_or_allocate_tuple3(word_tag_tag, &st_word, &st_tag, &n0_tag);
        cast_weights->m_mapSTwtN0t.getOrUpdateScore( retval, word_tag_tag, action, m_nScoreIndex, amount, round ) ;
        refer_or_allocate_tuple3(word_tag_tag, &n0_word, &st_tag, &n0_tag);
        cast_weights->m_mapSTtN0wt.getOrUpdateScore( retval, word_tag_tag, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTwN0w.getOrUpdateScore( retval, st_word_n0_word, action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTtN0t.getOrUpdateScore( retval, CTagSet<CTag, 2>(encodeTags(st_tag,n0_tag)), action, m_nScoreIndex, amount, round ) ;
    }

    if (st_index != -1 && n0_index != -1) {
        cast_weights->m_mapN0tN1t.getOrUpdateScore( retval, CTagSet<CTag, 2>(encodeTags(n0_tag,n1_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN0tN1tN2t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(n0_tag,n1_tag,n2_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTtN0tN1t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,n0_tag,n1_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTtN0tN0LDt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,n0_tag,n0ld_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapN0tN0LDtN0L2Dt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(n0_tag,n0ld_tag,n0l2d_tag)), action, m_nScoreIndex, amount, round ) ;
    }
    if (st_index!=-1) {
        cast_weights->m_mapSTHtSTtN0t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(sth_tag,st_tag,n0_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTHHtSTHtSTt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(sthh_tag, sth_tag,st_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTtSTLDtN0t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,stld_tag,n0_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTtSTLDtSTL2Dt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,stld_tag,stl2d_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTtSTRDtN0t.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,strd_tag,n0_tag)), action, m_nScoreIndex, amount, round ) ;
        cast_weights->m_mapSTtSTRDtSTR2Dt.getOrUpdateScore( retval, CTagSet<CTag, 3>(encodeTags(st_tag,strd_tag,str2d_tag)), action, m_nScoreIndex, amount, round ) ;
    }

    // distance
    if (st_index!=-1 && n0_index!=-1) {
        refer_or_allocate_tuple2(word_int, &st_word, &st_n0_dist);
        cast_weights->m_mapSTwd.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round) ;
        refer_or_allocate_tuple2(tag_int, &st_tag, &st_n0_dist);
        cast_weights->m_mapSTtd.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
        refer_or_allocate_tuple2(word_int, &n0_word, &st_n0_dist);
        cast_weights->m_mapN0wd.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round ) ;
        refer_or_allocate_tuple2(tag_int, &n0_tag, &st_n0_dist);
        cast_weights->m_mapN0td.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
        refer_or_allocate_tuple3(word_word_int, &st_word, &n0_word, &st_n0_dist);
        cast_weights->m_mapSTwN0wd.getOrUpdateScore( retval, word_word_int, action, m_nScoreIndex, amount, round ) ;
        refer_or_allocate_tuple3(tag_tag_int, &st_tag, &n0_tag, &st_n0_dist);
        cast_weights->m_mapSTtN0td.getOrUpdateScore( retval, tag_tag_int, action, m_nScoreIndex, amount, round ) ;
    }

    // st arity
    if (st_index != -1) {
        refer_or_allocate_tuple2(word_int, &st_word, &st_rarity);
        cast_weights->m_mapSTwra.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round) ;
        refer_or_allocate_tuple2(tag_int, &st_tag, &st_rarity);
        cast_weights->m_mapSTtra.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
        refer_or_allocate_tuple2(word_int, &st_word, &st_larity);
        cast_weights->m_mapSTwla.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round) ;
        refer_or_allocate_tuple2(tag_int, &st_tag, &st_larity);
        cast_weights->m_mapSTtla.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
    }

    // n0 arity
    if (n0_index!=-1) {
        refer_or_allocate_tuple2(word_int, &n0_word, &n0_larity);
        cast_weights->m_mapN0wla.getOrUpdateScore( retval, word_int, action, m_nScoreIndex, amount, round) ;
        refer_or_allocate_tuple2(tag_int, &n0_tag, &n0_larity);
        cast_weights->m_mapN0tla.getOrUpdateScore( retval, tag_int, action, m_nScoreIndex, amount, round ) ;
    }

    // st labelset
    if (st_index != -1) {
        refer_or_allocate_tuple2(word_tagset, &st_word, &st_rtagset);
        cast_weights->m_mapSTwrp.getOrUpdateScore( retval, word_tagset, action, m_nScoreIndex, amount, round) ;
        refer_or_allocate_tuple2(tag_tagset, &st_tag, &st_rtagset);
        cast_weights->m_mapSTtrp.getOrUpdateScore( retval, tag_tagset, action, m_nScoreIndex, amount, round ) ;

        refer_or_allocate_tuple2(word_tagset, &st_word, &st_ltagset);
        cast_weights->m_mapSTwlp.getOrUpdateScore( retval, word_tagset, action, m_nScoreIndex, amount, round) ;
        refer_or_allocate_tuple2(tag_tagset, &st_tag, &st_ltagset);
        cast_weights->m_mapSTtlp.getOrUpdateScore( retval, tag_tagset, action, m_nScoreIndex, amount, round ) ;
    }

    // n0 labelset
    if (n0_index != -1) {
        refer_or_allocate_tuple2(word_tagset, &n0_word, &n0_ltagset);
        cast_weights->m_mapN0wlp.getOrUpdateScore( retval, word_tagset, action, m_nScoreIndex, amount, round) ;
        refer_or_allocate_tuple2(tag_tagset, &n0_tag, &n0_ltagset);
        cast_weights->m_mapN0tlp.getOrUpdateScore( retval, tag_tagset, action, m_nScoreIndex, amount, round ) ;
    }

    if (ac == action::ARC_LEFT || ac == action::NO_ACTION) {
        hpos = m_lCache[n0_index].tag.code();
        mpos = m_lCache[st_index].tag.code();
        label = action::getLabel(action);
        transfer(hpos, mpos, label,item->constituent(n0_index), false, con);
        refer_or_allocate_tuple3(int_int_int, &(item->constituent(st_index)), &(item->constituent(n0_index)), &con);
        cast_weights->m_mapCFG.getOrUpdateScore( retval, int_int_int, action::NO_ACTION, m_nScoreIndex, amount, round ) ;
    }

    if (ac == action::ARC_RIGHT || ac == action::NO_ACTION) {
        hpos = m_lCache[st_index].tag.code();
        mpos = m_lCache[n0_index].tag.code();
        label = action::getLabel(action);
        transfer(hpos, mpos, label,item->constituent(st_index), true, con);
        refer_or_allocate_tuple3(int_int_int, &(item->constituent(st_index)), &(item->constituent(n0_index)), &con);
        cast_weights->m_mapCFG.getOrUpdateScore( retval, int_int_int, action::NO_ACTION, m_nScoreIndex, amount, round ) ;
    }

    if (m_bCoNLL) {

        static unsigned i;

        if (st_index!=-1) {
            if (!m_lCacheCoNLLLemma[st_index].empty()) cast_weights->m_mapSTl.getOrUpdateScore( retval, m_lCacheCoNLLLemma[st_index], action, m_nScoreIndex, amount, round) ;
            if (m_lCacheCoNLLCPOS[st_index] != CCoNLLCPOS()) cast_weights->m_mapSTc.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[st_index], action, m_nScoreIndex, amount, round) ;
            for (i=0; i<m_lCacheCoNLLFeats[st_index].size(); ++i)
                cast_weights->m_mapSTf.getOrUpdateScore( retval, m_lCacheCoNLLFeats[st_index][i], action, m_nScoreIndex, amount, round) ;
        } // if (st_index!=-1)

        if (n0_index!=-1) {
            if (!m_lCacheCoNLLLemma[n0_index].empty()) cast_weights->m_mapN0l.getOrUpdateScore( retval, m_lCacheCoNLLLemma[n0_index], action, m_nScoreIndex, amount, round) ;
            if (m_lCacheCoNLLCPOS[n0_index] != CCoNLLCPOS()) cast_weights->m_mapN0c.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[n0_index], action, m_nScoreIndex, amount, round) ;
            for (i=0; i<m_lCacheCoNLLFeats[n0_index].size(); ++i)
                cast_weights->m_mapN0f.getOrUpdateScore( retval, m_lCacheCoNLLFeats[n0_index][i], action, m_nScoreIndex, amount, round) ;
        } // if (n0_index!=-1)

        if (n1_index!=-1) {
            if (!m_lCacheCoNLLLemma[n1_index].empty()) cast_weights->m_mapN1l.getOrUpdateScore( retval, m_lCacheCoNLLLemma[n1_index], action, m_nScoreIndex, amount, round) ;
            if (m_lCacheCoNLLCPOS[n1_index] != CCoNLLCPOS()) cast_weights->m_mapN1c.getOrUpdateScore( retval, m_lCacheCoNLLCPOS[n1_index], action, m_nScoreIndex, amount, round) ;
            for (i=0; i<m_lCacheCoNLLFeats[n1_index].size(); ++i)
                cast_weights->m_mapN1f.getOrUpdateScore( retval, m_lCacheCoNLLFeats[n1_index][i], action, m_nScoreIndex, amount, round) ;
        } // if (n1_index!=-1)
    }
}
Exemplo n.º 7
0
SCORE_TYPE CTagger::getOrUpdateSeparateScore( const CStringVector *sentence, const CSubStateItem *item, unsigned long index, SCORE_TYPE amount, unsigned long round ) {
   static SCORE_TYPE nReturn ;
   static unsigned long start_0;
   static unsigned long start_1, end_1, length_1;
   static unsigned long start_2, end_2, length_2;

   // about the words
   assert(amount!=0||index==item->size()-1||index==item->size());
   start_0 = index==item->size() ? 0 : item->getWordStart( index ) ;

   start_1 = index > 0 ? item->getWordStart( index-1 ) : 0 ;
   end_1 = index > 0 ? item->getWordEnd( index-1 ) : 0 ;
   assert(index==item->size()||index==0 || end_1 == start_0-1);
   length_1 = index > 0 ? item->getWordLength( index-1 ) : 0;

   start_2 = index > 1 ? item->getWordStart( index-2 ) : 0 ;
   end_2 = index > 1 ? item->getWordEnd( index-2 ) : 0 ;
   assert(index<2 || end_2 == start_1-1);
   length_2 = index > 1 ? item->getWordLength( index-2 ) : 0;

   const CWord &word_1 = index>0 ? find_or_replace_word_cache( start_1, end_1 ) : g_emptyWord;
   const CWord &word_2 = index>1 ? find_or_replace_word_cache( start_2, end_2 ) : g_emptyWord;

   // about the length
   if( length_1 > LENGTH_MAX-1 ) length_1 = LENGTH_MAX-1 ;
   if( length_2 > LENGTH_MAX-1 ) length_2 = LENGTH_MAX-1 ;

   // about the chars
   const CWord &first_char_0 = index<item->size() ? find_or_replace_word_cache( start_0, start_0 ) : g_emptyWord ;
   const CWord &first_char_1 = index>0 ? find_or_replace_word_cache( start_1, start_1 ) : g_emptyWord;

   const CWord &last_char_1 = index>0 ? find_or_replace_word_cache( end_1, end_1 ) : g_emptyWord;
   const CWord &last_char_2 = index>1 ? find_or_replace_word_cache( end_2, end_2 ) : g_emptyWord;
   const CWord &two_char = index>0&&index<item->size() ? find_or_replace_word_cache( end_1, start_0 ) : g_emptyWord ;
   const CWord &word_1_first_char_0 = index>0&&index<item->size() ? find_or_replace_word_cache( start_1, start_0 ) : g_emptyWord;
   const CWord &word_1_last_char_2 = index>1 ? find_or_replace_word_cache( end_2, end_1 ) : g_emptyWord;
   const CWord &three_char = ( length_1==1 && index>1 && index<item->size() ) ? find_or_replace_word_cache( end_2, start_0 ) : g_emptyWord;

   static CTwoWords word_2_word_1, first_char_1_last_char_1, first_char_0_first_char_1, last_char_1_last_char_2 ;
   if (amount==0&&index>0) {
      word_2_word_1.refer( &word_1 , &word_2 ) ;
      first_char_1_last_char_1.refer( &first_char_1 , &last_char_1 ) ;
      first_char_0_first_char_1.refer( &first_char_0 , &first_char_1 ) ;
      last_char_1_last_char_2.refer( &last_char_1 , &last_char_2 ) ;
   }
   else {
      word_2_word_1.allocate( word_1, word_2 ) ;
      first_char_1_last_char_1.allocate( first_char_1, last_char_1 ) ;
      first_char_0_first_char_1.allocate( first_char_0, first_char_1 ) ;
      last_char_1_last_char_2.allocate( last_char_1, last_char_2 ) ;
   }

   // about the tags
   const CTag &tag_0 = index<item->size() ? item->getTag( index ) : g_beginTag;
   const CTag &tag_1 = index>0 ? item->getTag(index-1) : g_beginTag;
   const CTag &tag_2 = index>1 ? item->getTag(index-2) : g_beginTag;

   static CTaggedWord<CTag, TAG_SEPARATOR> wt1, wt2;
   static CTwoTaggedWords wt12;

   unsigned long long first_char_cat_0 = m_weights->m_mapCharTagDictionary.lookup(first_char_0) | (static_cast<unsigned long long>(1)<<tag_0.code()) ;
   unsigned long long last_char_cat_1 = m_weights->m_mapCharTagDictionary.lookup(last_char_1) | (static_cast<unsigned long long>(1)<<tag_1.code()) ;

   static CTagSet<CTag, 2> tag_0_tag_1, tag_0_tag_2;
   static CTagSet<CTag, 3> tag_0_tag_1_tag_2;
   tag_0_tag_1.load( encodeTags(tag_0, tag_1) );
   tag_0_tag_2.load( encodeTags(tag_0, tag_2) );
   tag_0_tag_1_tag_2.load( encodeTags(tag_0, tag_1, tag_2) );

   static int j ;

   // adding scores with features for last word
   if (index>0) {
      nReturn = m_weights->m_mapSeenWords.getOrUpdateScore( word_1 , m_nScoreIndex , amount , round ) ;
      if (index>1) nReturn += m_weights->m_mapLastWordByWord.getOrUpdateScore( word_2_word_1 , m_nScoreIndex , amount , round ) ;

      if ( length_1 == 1 ) {
         nReturn += m_weights->m_mapOneCharWord.getOrUpdateScore( word_1 , m_nScoreIndex , amount , round ) ;
      }
      else {
         nReturn += m_weights->m_mapFirstAndLastChars.getOrUpdateScore( first_char_1_last_char_1 , m_nScoreIndex , amount , round ) ;

         nReturn += m_weights->m_mapLengthByFirstChar.getOrUpdateScore( std::make_pair(first_char_1, length_1) , m_nScoreIndex , amount , round ) ;
         nReturn += m_weights->m_mapLengthByLastChar.getOrUpdateScore( std::make_pair(last_char_1, length_1) , m_nScoreIndex , amount , round ) ;
      }

      if (index>1) {
         nReturn += m_weights->m_mapCurrentWordLastChar.getOrUpdateScore( word_1_last_char_2 , m_nScoreIndex , amount , round ) ;
         nReturn += m_weights->m_mapLastWordByLastChar.getOrUpdateScore( last_char_1_last_char_2 , m_nScoreIndex , amount , round ) ;

         nReturn += m_weights->m_mapLengthByLastWord.getOrUpdateScore( std::make_pair(word_2, length_1) , m_nScoreIndex , amount , round ) ;
         nReturn += m_weights->m_mapLastLengthByWord.getOrUpdateScore( std::make_pair(word_1, length_2), m_nScoreIndex , amount , round ) ;
      }

      nReturn += m_weights->m_mapCurrentTag.getOrUpdateScore( std::make_pair(word_1, tag_1) , m_nScoreIndex , amount , round ) ;

      if ( length_1 <= 2 ) nReturn += m_weights->m_mapLastTagByWord.getOrUpdateScore( std::make_pair(word_1, tag_2) , m_nScoreIndex , amount , round ) ;

      if (index>1) {
         if ( length_1 <= 2 ) nReturn += m_weights->m_mapTagByWordAndPrevChar.getOrUpdateScore( std::make_pair(word_1_last_char_2, tag_1) , m_nScoreIndex , amount , round ) ;
         if ( length_1 == 1 && index<item->size() ) nReturn += m_weights->m_mapTagOfOneCharWord.getOrUpdateScore( std::make_pair(three_char, tag_1) , m_nScoreIndex , amount , round ) ;
      }

      nReturn += m_weights->m_mapTagByLastChar.getOrUpdateScore( std::make_pair(last_char_1, tag_1) , m_nScoreIndex , amount , round ) ;
      nReturn += m_weights->m_mapTagByLastCharCat.getOrUpdateScore( std::make_pair(last_char_cat_1, tag_1) , m_nScoreIndex , amount , round ) ;

      for (j=0; j<length_1-1; ++j) {
         wt1.load(find_or_replace_word_cache(start_1+j, start_1+j), tag_1);
         wt2.load(last_char_1);//
         if (amount==0) { wt12.refer(&wt1, &wt2); } else { wt12.allocate(wt1, wt2); }
         nReturn += m_weights->m_mapTaggedCharByLastChar.getOrUpdateScore(wt12, m_nScoreIndex, amount, round) ;
      }
   }

   // all about the current word
   nReturn += m_weights->m_mapLastTagByTag.getOrUpdateScore( tag_0_tag_1, m_nScoreIndex , amount , round ) ;

   if ( length_1 <= 2 ) nReturn += m_weights->m_mapTagByLastWord.getOrUpdateScore( std::make_pair(word_1, tag_0) , m_nScoreIndex , amount , round ) ;

   if ( index > 0 ) {
      nReturn += m_weights->m_mapLastTwoTagsByTag.getOrUpdateScore( tag_0_tag_1_tag_2, m_nScoreIndex , amount , round ) ;
   }

if (index<item->size()) {
   if ( index>0 ) {
      nReturn += m_weights->m_mapSeparateChars.getOrUpdateScore( two_char , m_nScoreIndex , amount , round ) ;

      nReturn += m_weights->m_mapLastWordFirstChar.getOrUpdateScore( word_1_first_char_0 , m_nScoreIndex , amount , round ) ;

      nReturn += m_weights->m_mapFirstCharLastWordByWord.getOrUpdateScore( first_char_0_first_char_1 , m_nScoreIndex , amount , round ) ;

      if ( length_1 <= 2 ) nReturn += m_weights->m_mapTagByWordAndNextChar.getOrUpdateScore( std::make_pair(word_1_first_char_0, tag_1) , m_nScoreIndex , amount , round ) ;
   }

   nReturn += m_weights->m_mapTagByFirstChar.getOrUpdateScore( std::make_pair(first_char_0, tag_0) , m_nScoreIndex , amount , round ) ;
   nReturn += m_weights->m_mapTagByFirstCharCat.getOrUpdateScore( std::make_pair(first_char_cat_0, tag_0) , m_nScoreIndex , amount , round ) ;

   nReturn += m_weights->m_mapTagByChar.getOrUpdateScore( std::make_pair(first_char_0, tag_0), m_nScoreIndex , amount , round ) ;

   if (index>0) {
      wt1.load(last_char_1, tag_1);
      wt2.load(first_char_0, tag_0);
      if (amount==0) { wt12.refer(&wt1, &wt2); } else { wt12.allocate(wt1, wt2); }
      nReturn += m_weights->m_mapTaggedSeparateChars.getOrUpdateScore( wt12, m_nScoreIndex , amount , round ) ;
   }
}

//   if (index>0) nReturn += m_weights->m_mapTagWordTag.getOrUpdateScore( std::make_pair(word_1, tag_0_tag_2) , m_nScoreIndex , amount , round ) ;
//   if (index>1) nReturn += m_weights->m_mapWordTagTag.getOrUpdateScore( std::make_pair(word_2, tag_0_tag_1) , m_nScoreIndex , amount , round ) ;

   return nReturn;
}