示例#1
0
float
M2MFstAligner::maximization(bool lastiter)
{
    //Maximization. Simple count normalization.  Probably get an improvement
    // by using a more sophisticated regularization approach.
    map<LogArc::Label,LogWeight>::iterator it;
    float change = abs(total.Value() - prevTotal.Value());
    //cout << "Total: " << total << " Change: " << abs(total.Value()-prevTotal.Value()) << endl;
    prevTotal = total;

    //Normalize and iterate to the next model.  We apply it dynamically
    // during the expectation step.
    for (it = prev_alignment_model.begin();
            it != prev_alignment_model.end(); it++) {
        alignment_model[(*it).first] = Divide((*it).second, total);
        (*it).second = LogWeight::Zero();
    }

    for (int i = 0; i < fsas.size(); i++) {
        for (StateIterator<VectorFst<LogArc> > siter(fsas[i]);
                !siter.Done(); siter.Next()) {
            LogArc::StateId q = siter.Value();
            for (MutableArcIterator<VectorFst<LogArc> > aiter(&fsas[i], q); !aiter.Done(); aiter.Next()) {
                LogArc arc = aiter.Value();
                arc.weight = alignment_model[arc.ilabel];
                aiter.SetValue(arc);
            }
        }
    }

    total = LogWeight::Zero();
    return change;
}
示例#2
0
void M2MFstAligner::expectation( ){
  /*
    Here we compute the arc posteriors.  This routine is almost 
    fun to implement in the FST paradigm.
  */
  for( unsigned int i=0; i<fsas.size(); i++ ){
    //Compute Forward and Backward probabilities
    ShortestDistance( fsas.at(i), &alpha );
    ShortestDistance( fsas.at(i), &beta, true );

    //Compute the normalized Gamma probabilities and 
    // update our running tally
    for( StateIterator<VectorFst<LogArc> > siter(fsas.at(i)); !siter.Done(); siter.Next() ){
      LogArc::StateId q = siter.Value();
      for( ArcIterator<VectorFst<LogArc> > aiter(fsas.at(i),q); !aiter.Done(); aiter.Next() ){
	const LogArc&      arc = aiter.Value();
	const LogWeight& gamma = Divide(Times(Times(alpha[q], arc.weight), beta[arc.nextstate]), beta[0]); 
	//Check for any BadValue results, otherwise add to the tally.
        //We call this 'prev_alignment_model' which may seem misleading, but
        // this conventions leads to 'alignment_model' being the final version.
	if( gamma.Value()==gamma.Value() ){
	  prev_alignment_model[arc.ilabel] = Plus(prev_alignment_model[arc.ilabel], gamma);
	  total = Plus(total, gamma);
	}
      }
    }
    alpha.clear();
    beta.clear();
  }
}
示例#3
0
void TcpAcceptCallback(ComAPIHandle ch)
{
	ulong				ipaddr;
	VuSessionEntity*	s;
	VuSessionsIterator	siter(vuGlobalGroup);

	// need to find session...
	ipaddr = ComAPIQuery(ch, COMAPI_CONNECTION_ADDRESS);

	// map ipaddr to session
	for (s = siter.GetFirst(); s; s = siter.GetNext()) 
	{
		if (s->Id().creator_.value_ == ipaddr) 
		{
			MonoPrint("TcpAcceptCallback connection -- made to 0x%x ch = %p\n", ipaddr, ch);
			s->SetReliableCommsHandle(ch, F4CommsMaxTCPMessageSize, F4CommsIdealTCPPacketSize);
			s->SetReliableCommsStatus(VU_CONN_ACTIVE);
			// Request global entities from this guy
			VuMessage *req = new VuGetRequest(VU_GET_GLOBAL_ENTS,s);
			req->RequestReliableTransmit();
			VuMessageQueue::PostVuMessage(req);
			return;
		}
	}
	MonoPrint("TcpAcceptCallback error -- couldn't find session 0x%x\n", ipaddr);
	// should we disconnect here?
}
示例#4
0
M2MFstAligner::M2MFstAligner(string _model_file)
{
    VectorFst<LogArc> *model = VectorFst<LogArc>::Read(_model_file);
    for (StateIterator<VectorFst<LogArc> > siter(*model);
            !siter.Done(); siter.Next()) {
        LogArc::StateId q = siter.Value();
        for (ArcIterator<VectorFst<LogArc> > aiter(*model, q);
                !aiter.Done(); aiter.Next()) {
            const LogArc & arc = aiter.Value();
            alignment_model.insert(pair<LogArc::Label,LogWeight> (arc.ilabel, arc.weight));
        }
    }
    isyms = (SymbolTable *) model->InputSymbols();
    int i = 0;
    eps = isyms->Find(i);       //Can't write '0' here for some reason...
    skip = isyms->Find(1);
    vector<string> seps = split(isyms->Find(2), "_");
    seq1_sep = seps[0];
    seq2_sep = seps[1];
    s1s2_sep = isyms->Find(3);
    vector<string> params = split(isyms->Find(4), "_");
    seq1_del = params[0].compare("true") ? false : true;
    seq2_del = params[1].compare("true") ? false : true;
    seq1_max = atoi(params[2].c_str());
    seq2_max = atoi(params[3].c_str());

}
/*
 *
 * Calculates the expected counts
 * based on the observed counts class
 * member. It must be called after
 * calculate observed counts for 
 * meaningful results.
 */
void WFST_Trainer_Local::calculate_expected_counts() {
  int arc_id = 0;

  for (StateIterator<VectorFst<LogArc> > siter(*fst); !siter.Done(); siter.Next()) {
    int state_id = siter.Value();
    int tmp_arc_id = arc_id;

    vector<double> total_traversals(symbol_bound,-DBL_MAX);

    for (ArcIterator<VectorFst<LogArc> > aiter(*fst,state_id); !aiter.Done(); aiter.Next()) {

      const LogArc &arc = aiter.Value();
      total_traversals[arc.ilabel] 
	= logadd(total_traversals[arc.ilabel],this->observed_counts[tmp_arc_id]);
      tmp_arc_id += 1;
    }
    for (ArcIterator<VectorFst<LogArc> > aiter(*fst,state_id); !aiter.Done(); aiter.Next()) {
      const LogArc &arc = aiter.Value();
      double arc_prob = arc.weight.Value();
     
      this->expected_counts[arc_id] = logadd(this->expected_counts[arc_id],
					     total_traversals[arc.ilabel] - arc_prob);
      arc_id += 1;
    }
  }
}
示例#6
0
void ARPA2WFST::phi_ify( ){
  for( StateIterator<VectorFst<StdArc> > siter(arpafst); !siter.Done(); 
       siter.Next() ){
    StdArc::StateId st = siter.Value();
    if( arpafst.Final(st)==StdArc::Weight::Zero() ){
      _get_final_bow( st );
    }
  }
}
示例#7
0
void
M2MFstAligner::write_lattice(string lattice)
{
    //Write out the entire training set in lattice format
    //Perform the union first.  This output can then
    // be plugged directly in to a counter to obtain expected
    // alignment counts for the EM-trained corpus.  Yields
    // far higher-quality joint n-gram models, which are also
    // more robust for smaller training corpora.
    //Make sure you call this BEFORE any call to
    // write_all_alignments
    // as the latter function will override some of the weights

    //Chaining the standard Union operation, including using a
    // rational FST still performs very poorly in the log semiring.
    //Presumably it's running push or something at each step.  It
    // should be fine to do that just once at the end.
    //Rolling our own union turns out to be MUCH faster.
    VectorFst<LogArc> ufst;
    ufst.AddState();
    ufst.SetStart(0);
    int total_states = 0;
    for (int i = 0; i < fsas.size(); i++) {
        TopSort(&fsas[i]);
        for (StateIterator<VectorFst<LogArc> > siter(fsas[i]);
                !siter.Done(); siter.Next()) {
            LogArc::StateId q = siter.Value();
            LogArc::StateId r;
            if (q == 0)
                r = 0;
            else
                r = ufst.AddState();

            for (ArcIterator <VectorFst<LogArc> > aiter(fsas[i], q);
                    !aiter.Done(); aiter.Next()) {
                const LogArc & arc = aiter.Value();
                ufst.AddArc(r,
                            LogArc(arc.ilabel, arc.ilabel, arc.weight,
                                   arc.nextstate + total_states));
            }
            if (fsas[i].Final(q) != LogWeight::Zero())
                ufst.SetFinal(r, LogWeight::One());
        }
        total_states += fsas[i].NumStates() - 1;
    }
    //Normalize weights
    Push(&ufst, REWEIGHT_TO_INITIAL);
    //Write the resulting lattice to disk
    ufst.Write(lattice);
    //Write the syms table too.
    isyms->WriteText("lattice.syms");
    return;
}
示例#8
0
float M2MFstAligner::maximization( bool lastiter ){
  //Maximization. Standard approach is simple count normalization.  
  //The 'penalize' option penalizes links by total length.  Results seem to be inconclusive.
  //  Probably get an improvement by distinguishing between gaps and insertions, etc.
  bool cond = false;
  float change = abs(total.Value()-prevTotal.Value());
  if( cond==false ){
    map<LogArc::Label,LogWeight>::iterator it;
    //cout << "Total: " << total << " Change: " << abs(total.Value()-prevTotal.Value()) << endl;
    prevTotal = total;

    //Normalize and iterate to the next model.  We apply it dynamically 
    // during the expectation step.
    for( it=prev_alignment_model.begin(); it != prev_alignment_model.end(); it++ ){
      alignment_model[(*it).first] = Divide((*it).second,total);
      (*it).second = LogWeight::Zero();
    }
  }else{
    _conditional_max( true );
  }

 
  for( unsigned int i=0; i<fsas.size(); i++ ){
    for( StateIterator<VectorFst<LogArc> > siter(fsas[i]); !siter.Done(); siter.Next() ){
      LogArc::StateId q = siter.Value();
      for( MutableArcIterator<VectorFst<LogArc> > aiter(&fsas[i], q); !aiter.Done(); aiter.Next() ){
	LogArc arc = aiter.Value();
	if( penalize_em==true ){
	  LabelDatum* ld = &penalties[arc.ilabel];
	  if( ld->lhs>1 && ld->rhs>1 ){
	    arc.weight = 99; 
	  }else if( ld->lhsE==false && ld->rhsE==false ){
	    arc.weight = arc.weight.Value() * ld->tot;
	  }
	  /*
	    else{
	    arc.weight = arc.weight.Value() * (ld->tot+10);
	  }
	  */
	  if( arc.weight == LogWeight::Zero() || arc.weight != arc.weight )
	    arc.weight = 99;
	}else{
	  arc.weight = alignment_model[arc.ilabel];
	}
	aiter.SetValue(arc);
      }
    }
  }

  total = LogWeight::Zero();
  return change;
}
示例#9
0
// This gets called as a result of us calling ComDPLAYOpen and getting a modem connection
// ret == 0 means success
void ModemConnectCallback(ComAPIHandle ch, int ret)
{
	ulong				ipaddr;

	ipaddr = ComAPIQuery(ch, COMAPI_CONNECTION_ADDRESS);

	ShiAssert ( ch == FalconGlobalUDPHandle );		// We should only have one connection!

	// need to find session... There should be only two (us and them)
	VuEnterCriticalSection();
	VuSessionEntity*	s;
	VuSessionsIterator	siter(vuGlobalGroup);
	for (s = siter.GetFirst(); s; s = siter.GetNext()) 
	{
		if (s != FalconLocalSession) 
		{
			if (ret == 0) 
			{
				MonoPrint("ModemConnectCallback invoked: connected!\n");
				s->SetReliableCommsHandle(ch, F4CommsMaxTCPMessageSize, F4CommsIdealTCPPacketSize);
				s->SetReliableCommsStatus(VU_CONN_ACTIVE);
				// Request global entities from this guy
				VuMessage *req = new VuGetRequest(VU_GET_GLOBAL_ENTS,s);
				req->RequestReliableTransmit();
				VuMessageQueue::PostVuMessage(req);
				if (gConnectionStatus == F4COMMS_PENDING)
					F4CommsConnectionCallback(F4COMMS_CONNECTED);
			}
			else 
			{
				MonoPrint("ModemConnectCallback invoked: bad connection.\n");
				s->SetReliableCommsHandle(NULL);
				s->SetReliableCommsStatus(VU_CONN_INACTIVE);
			}
			VuExitCriticalSection();
			return;
		}
	}
	if (gConnectionStatus == F4COMMS_PENDING)
		F4CommsConnectionCallback(F4COMMS_CONNECTED);
	// Add this connection to our "dangling connection" list
	AddDanglingSession(NULL, ch,  ipaddr, ipaddr);
	MonoPrint("ModemConnectCallback invoked: saving handle..\n");
	VuExitCriticalSection();
}
示例#10
0
M2MFstAligner::M2MFstAligner( string _model_file, bool _penalize, bool _penalize_em, bool _restrict  ){
  /*
    Initialize the aligner with a previously trained model.
    The model requires that the first several symbols in the 
    symbols table contain the separator and other bookkeeping info.
  */

  restrict    = _restrict;
  penalize    = _penalize;
  penalize_em = _penalize_em;
  penalties.set_empty_key(0);
  VectorFst<LogArc>* model = VectorFst<LogArc>::Read( _model_file );
  for( StateIterator<VectorFst<LogArc> > siter(*model); !siter.Done(); siter.Next() ){
    LogArc::StateId q = siter.Value();
    for( ArcIterator<VectorFst<LogArc> > aiter(*model, q); !aiter.Done(); aiter.Next() ){
      const LogArc& arc = aiter.Value();
      alignment_model.insert( pair<LogArc::Label,LogWeight>(arc.ilabel,arc.weight) );
    }
  }      
  isyms = (SymbolTable*)model->InputSymbols();
  int i = 0;
  eps      = isyms->Find(i);//Can't write '0' here for some reason...
  skip     = isyms->Find(1);
  string tie = "_"; //tie to pack parameters

  string sseps = isyms->Find(2);
  vector<string> seps = tokenize_utf8_string( &sseps, &tie );
  seq1_sep = seps[0];
  seq2_sep = seps[1];
  s1s2_sep = isyms->Find(3);

  string sparams = isyms->Find(4);
  vector<string> params = tokenize_utf8_string( &sparams, &tie );
  seq1_del = params[0].compare("true") ? false : true;
  seq2_del = params[1].compare("true") ? false : true;
  seq1_max = atoi(params[2].c_str());
  seq2_max = atoi(params[3].c_str());

}
/*
 * Calculates the observed counts (without parallelization)
 *
 * @param exemplar_num - the training examplar number
 * @param fst - the FST in with FeatureArcs
 * @param fst_log - the FST with the actual weights in log space
 * @param alpha - the alpha values
 * @param beta - the beta values
 *
 */
void WFST_Trainer_Local::calculate_observed_counts(int exemplar_num, 
					    VectorFst<FeatureArc> *fst, 
					    VectorFst<LogArc> *fst_log, 
					    vector<LogWeight> *alpha, 
					    vector<LogWeight> *beta) 
{
  //normalizing constant
  LogWeight Z = Times((*alpha)[0], (*beta)[0]);
  
  int arc_id = 0;
  for (StateIterator<VectorFst<FeatureArc> > siter(*fst); 
       !siter.Done(); siter.Next()) {
    int state_id = siter.Value();
    map<int,int> arc_mapper;
    int tmp = arc_id;
    
    for (ArcIterator<VectorFst<FeatureArc> > aiter1(*fst,state_id); 
	 !aiter1.Done(); aiter1.Next()) {
      const FeatureArc &arc = aiter1.Value();
      int old_arc_id = round(exp(-arc.weight.Value2().Value()));
      arc_mapper[tmp] = old_arc_id - 1;
      ++tmp;
    }

    for (ArcIterator<VectorFst<LogArc> > aiter2(*fst_log,state_id); 
	 !aiter2.Done(); aiter2.Next()) {
      const LogArc &arc = aiter2.Value();
      int old_arc_id2 = arc_mapper[arc_id];
      LogWeight val = ((Times(Times((*alpha)[state_id],arc.weight.Value()), 
			      (*beta)[arc.nextstate])));

      double log_val = Divide(val,Z).Value();
      this->observed_counts[old_arc_id2] = logadd(this->observed_counts[old_arc_id2],-log_val);
      ++arc_id;
    }
  }
}
示例#12
0
int EndCommsStuff (void)
{
	if (gMainThread){
		gMainThread->LeaveGame();
	}

	// KCK HACK: To avoid vu's problem with shutting down comms when remote sessions are active
	VuSessionsIterator	siter(vuGlobalGroup);
	FalconSessionEntity	*cs;
	for (cs = (FalconSessionEntity*) siter.GetFirst(); cs != NULL;){
		FalconSessionEntity	*oldCs = cs;
		cs = (FalconSessionEntity*) siter.GetNext();
		oldCs->JoinGame(NULL);
	}
	// END HACK

	if (gConnectionStatus == F4COMMS_CONNECTED){
		gMainThread->DeinitComms();
	}

	CleanupComms();

	return (TRUE);
}
示例#13
0
void M2MFstAligner::Sequences2FST( VectorFst<LogArc>* fst, vector<string>* seq1, vector<string>* seq2 ){
  /*
    Build an FST that represents all possible alignments between seq1 and seq2, given the 
     parameter values input by the user.  Here we encode the input and output labels, in fact
     creating a WFSA.  This simplifies the training process, but means that we can only 
     easily compute a joint maximization.  In practice joint maximization seems to give the 
     best results anyway, so it probably doesn't matter.

    Note: this also performs the initizization routine.  It performs a UNIFORM initialization
     meaning that every non-null alignment sequence is eventually initialized to 1/Num(unique_alignments).
     It might be more appropriate to consider subsequence length here, but for now we stick 
     to the m2m-aligner approach.

    TODO: Add an FST version and support for conditional maximization.  May be useful for languages
     like Japanese where there is a distinct imbalance in the seq1->seq2 length correspondences.
  */
  int istate=0; int ostate=0;
  for( unsigned int i=0; i<=seq1->size(); i++ ){
    for( unsigned int j=0; j<=seq2->size(); j++ ){
      fst->AddState();
      istate = i*(seq2->size()+1)+j;

      //Epsilon arcs for seq1
      if( seq1_del==true )
	for( unsigned int l=1; l<=seq2_max; l++ ){
	  if( j+l<=seq2->size() ){
	    vector<string> subseq2( seq2->begin()+j, seq2->begin()+j+l );
	    int is = isyms->AddSymbol(skip+s1s2_sep+vec2str(subseq2, seq2_sep));
	    ostate = i*(seq2->size()+1) + (j+l);
	    LogArc arc( is, is, 99, ostate );
	    fst->AddArc( istate, arc );
	    /*
	    if( prev_alignment_model.find(arc.ilabel)==prev_alignment_model.end() ){
	      prev_alignment_model.insert( pair<LogArc::Label,LogWeight>(arc.ilabel,arc.weight) );
	      _compute_penalties( arc.ilabel, 1, l, true, false );
	    }else{
	      prev_alignment_model[arc.ilabel] = Plus(prev_alignment_model[arc.ilabel],arc.weight);
	    }
	    total = Plus( total, arc.weight );
	    */
	  }
	}

      //Epsilon arcs for seq2
      if( seq2_del==true )
	for( unsigned int k=1; k<=seq1_max; k++ ){
	  if( i+k<=seq1->size() ){
	    vector<string> subseq1( seq1->begin()+i, seq1->begin()+i+k );
	    int is = isyms->AddSymbol(vec2str(subseq1, seq1_sep)+s1s2_sep+skip);
	    ostate = (i+k)*(seq2->size()+1) + j;
	    LogArc arc( is, is, 99, ostate );
	    fst->AddArc( istate, arc );
	    /*
	    if( prev_alignment_model.find(arc.ilabel)==prev_alignment_model.end() ){
	      prev_alignment_model.insert( pair<LogArc::Label,LogWeight>(arc.ilabel,arc.weight) );
	      _compute_penalties( arc.ilabel, k, 1, false, true );
	    }else{
	      prev_alignment_model[arc.ilabel] = Plus(prev_alignment_model[arc.ilabel],arc.weight);
	    }
	    total = Plus(total, arc.weight);
	    */
	  }
	}

      //All the other arcs
      for( unsigned int k=1; k<=seq1_max; k++ ){
	for( unsigned int l=1; l<=seq2_max; l++ ){
	  if( i+k<=seq1->size() && j+l<=seq2->size() ){
	    vector<string> subseq1( seq1->begin()+i, seq1->begin()+i+k );
	    string s1 = vec2str(subseq1, seq1_sep);
	    vector<string> subseq2( seq2->begin()+j, seq2->begin()+j+l );
	    string s2 = vec2str(subseq2, seq2_sep);
	    //This says only 1-M and N-1 allowed, no M-N links!
	    if( restrict==true && l>1 && k>1)
	      continue;
	    int is = isyms->AddSymbol(s1+s1s2_sep+s2);
	    ostate = (i+k)*(seq2->size()+1) + (j+l);
	    LogArc arc( is, is, LogWeight::One().Value()*(k+l), ostate );
	    fst->AddArc( istate, arc );
	    //During the initialization phase, just count non-eps transitions
	    //We currently initialize to uniform probability so there is also 
            // no need to tally anything here.
	    /*
	    if( prev_alignment_model.find(arc.ilabel)==prev_alignment_model.end() ){
	      prev_alignment_model.insert( pair<LogArc::Label,LogWeight>(arc.ilabel, arc.weight) );
	      _compute_penalties( arc.ilabel, k, l, false, false );
	    }else{
	      prev_alignment_model[arc.ilabel] = Plus(prev_alignment_model[arc.ilabel],arc.weight);
	    }
	    total = Plus( total, arc.weight );
	    */
	  }
	}
      }

    }
  }

  fst->SetStart(0);
  fst->SetFinal( ((seq1->size()+1)*(seq2->size()+1))-1, LogWeight::One() );
  //Unless seq1_del==true && seq2_del==true we will have unconnected states
  // thus we need to run connect to clean out these states
  if( seq1_del==false || seq2_del==false )
    Connect(fst);

  //Only add arcs that are in the FINAL fst to the model
  for( StateIterator<VectorFst<LogArc> > siter(*fst); !siter.Done(); siter.Next() ){
    LogArc::StateId q = siter.Value();
    for( ArcIterator<VectorFst<LogArc> > aiter(*fst, q); !aiter.Done(); aiter.Next() ){
      const LogArc& arc = aiter.Value();
      if( prev_alignment_model.find(arc.ilabel)==prev_alignment_model.end() ){
	prev_alignment_model.insert( pair<LogArc::Label,LogWeight>(arc.ilabel, arc.weight) );
	string sym = isyms->Find(arc.ilabel);
	size_t del = sym.find("}");
	size_t ski = sym.find("_");
	size_t chu = sym.find("|");
	int k=1; int l=1;
	bool xd = false; bool yd = false;
	if( chu!=string::npos ){
	  if( chu<del )
	    k += 1;
	  else
	    l += 1;
	}
	if( ski!=string::npos ){
	  if( ski<del )
	    xd = true;
	  else
	    yd = true;
	}
	_compute_penalties( arc.ilabel, k, l, false, false );
      }else{
	prev_alignment_model[arc.ilabel] = Plus(prev_alignment_model[arc.ilabel],arc.weight);
      }
      total = Plus( total, arc.weight );
    }
  }

  return;
}
示例#14
0
vector<PathData> M2MFstAligner::write_alignment(const VectorFst<LogArc> &ifst,
        int nbest)
{
    //Generic alignment generator
    VectorFst<StdArc> fst;
    Map(ifst, &fst, LogToStdMapper());

    for (StateIterator<VectorFst<StdArc> > siter(fst); !siter.Done();
            siter.Next()) {
        StdArc::StateId q = siter.Value();
        for (MutableArcIterator<VectorFst<StdArc> > aiter(&fst, q);
                !aiter.Done(); aiter.Next()) {
            //Prior to decoding we make several 'heuristic' modifications to the weights:
            // 1. A multiplier is applied to any multi-token substrings
            // 2. Any LogWeight::Zero() arc weights are reset to '99'.
            //    We are basically resetting 'Infinity' values to a 'smallest non-Infinity'
            //     so that the ShortestPath algorithm actually produces something no matter what.
            // 3. Any arcs that consist of subseq1:subseq2 being the same length and subseq1>1
            //       are set to '99' this forces shortestpath to choose arcs where one of the
            //       following conditions holds true
            //      * len(subseq1)>1 && len(subseq2)!=len(subseq1)
            //      * len(subseq2)>1 && len(subseq1)!=len(subseq2)
            //      * len(subseq1)==len(subseq2)==1
            //I suspect these heuristics can be eliminated with a better choice of the initialization
            // function and maximization function, but this is the way that m2m-aligner works, so
            // it makes sense for our first cut implementation.
            //In any case, this guarantees that M2MFstAligner produces results identical to those
            // produced by m2m-aligner - but with a bit more reliability.
            //UPDATE: this now produces a better alignment than m2m-aligner.
            //  The maxl heuristic is still in place.  The aligner will produce *better* 1-best alignments
            //  *without* the maxl heuristic below, BUT this comes at the cost of producing a less
            //  flexible corpus.  That is, for a small training corpus like nettalk, if we use the
            //  best alignment we wind up with more 'chunks' and thus get a worse coverage for unseen
            //  data.  Using the aignment lattices to train the joint ngram model solves this problem.
            //  Oh baby.  Can't wait to for everyone to see the paper!
            //NOTE: this is going to fail if we encounter any alignments in a new test item that never
            // occurred in the original model.
            StdArc
            arc = aiter.Value();
            int
            maxl = get_max_length(isyms->Find(arc.ilabel));
            if (maxl == -1) {
                arc.weight = 999;
            }
            else {
                //Optionally penalize m-to-1 / 1-to-m links.  This produces
                // WORSE 1-best alignments, but results in better joint n-gram
                // models for small training corpora when using only the 1-best
                // alignment.  By further favoring 1-to-1 alignments the 1-best
                // alignment corpus results in a more flexible joint n-gram model
                // with regard to previously unseen data.
                //if( penalize==true ){
                arc.weight = alignment_model[arc.ilabel].Value() * maxl;
                //}else{
                //For larger corpora this is probably unnecessary.
                //arc.weight = alignment_model[arc.ilabel].Value();
                //}
            }
            if (arc.weight == LogWeight::Zero())
                arc.weight = 999;
            if (arc.weight != arc.weight)
                arc.weight = 999;
            aiter.SetValue(arc);
        }
    }

    VectorFst<StdArc> shortest;
    ShortestPath(fst, &shortest, nbest);
    RmEpsilon(&shortest);
    //Skip empty results.  This should only happen
    // in the following situations:
    //  1. seq1_del=false && len(seq1)<len(seq2)
    //  2. seq2_del=false && len(seq1)>len(seq2)
    //In both 1.and 2. the issue is that we need to
    // insert a 'skip' in order to guarantee at least
    // one valid alignment path through seq1*seq2, but
    // user params didn't allow us to.
    //Probably better to insert these where necessary
    // during initialization, regardless of user prefs.
    if (shortest.NumStates() == 0) {
        vector<PathData> dummy;
        return dummy;
    }
    FstPathFinder
    pathfinder(skipSeqs);
    pathfinder.isyms = isyms;
    pathfinder.findAllStrings(shortest);
    return pathfinder.paths;
}
示例#15
0
// This gets called as a result of us calling ComTCPOpenConnect (attempting to connect TCP)
// ret == 0 means success
void TcpConnectCallback(ComAPIHandle ch, int ret)
{
	ulong				ipaddr;

	ipaddr = ComAPIQuery(ch, COMAPI_CONNECTION_ADDRESS);

	MonoPrint("Calling TcpConnectCallback.\n");

	if (ret != 0)
	{
		MonoPrint ("RequestConnection failed %d, will retry\n", ret);
		return;
	}

	// Check if we're connecting to a server first...
	if (FalconConnectionType == FCT_SERVER_AVAILABLE)
	{
		FalconGlobalTCPHandle = ch;
		if (ch)
		{
			FalconServerTCPStatus = VU_CONN_ACTIVE;
			if (gConnectionStatus == F4COMMS_PENDING)
				F4CommsConnectionCallback(F4COMMS_CONNECTED);
		}
		else
		{
			FalconServerTCPStatus = VU_CONN_INACTIVE;	// This would be bad..
			F4CommsConnectionCallback(F4COMMS_ERROR_COULDNT_CONNECT_TO_SERVER);
		}
		return;
	}

	// Otherwise, look for the correct session
	VuEnterCriticalSection();
	VuSessionEntity*	s;
	VuSessionsIterator	siter(vuGlobalGroup);
	for (s = siter.GetFirst(); s; s = siter.GetNext()) 
	{
		if (s->Id().creator_.value_ == ipaddr) 
		{
			RemoveDanglingSession(s);
			if (ch && s->GetCommsHandle() == ch)
				s->SetCommsStatus(VU_CONN_ACTIVE);
			if (ch && s->GetReliableCommsHandle() == ch)
				s->SetReliableCommsStatus(VU_CONN_ACTIVE);
			VuExitCriticalSection();
			if (gConnectionStatus == F4COMMS_PENDING)
				F4CommsConnectionCallback(F4COMMS_CONNECTED);
			// Request global entities from this guy
			VuMessage *req = new VuGetRequest(VU_GET_GLOBAL_ENTS,s);
			req->RequestReliableTransmit();
			VuMessageQueue::PostVuMessage(req);
			return;
		}
	}

	// We're connected (although we're not yet ready to send)
	if (gConnectionStatus == F4COMMS_PENDING){
		F4CommsConnectionCallback(F4COMMS_CONNECTED);
	}

	// Add this connection to our "dangling connection" list
	AddDanglingSession(NULL, ch, ipaddr, ipaddr);
	VuExitCriticalSection();
}
//
//  First characters in scripts.
//  Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
//  The vector is sorted according to this index's collation.
//
//  This code is too slow to use, so for now hard code the data.
//    Hard coded implementation is follows.
//
UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) {

    if (U_FAILURE(status)) {
        return NULL;
    }

    UnicodeString results[USCRIPT_CODE_LIMIT];
    UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a");

    UnicodeSetIterator siter(*TO_TRY);
    while (siter.next()) {
        const UnicodeString &current = siter.getString();
        Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A);
        if (r < 0) {  // TODO fix; we only want "real" script characters, not
                      // symbols.
            continue;
        }

        int script = uscript_getScript(current.char32At(0), &status);
        if (results[script].length() == 0) {
            results[script] = current;
        }
        else if (ruleBasedCollator->compare(current, results[script]) < 0) {
            results[script] = current;
        }
    }

    UnicodeSet extras;
    UnicodeSet expansions;
    RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator);
    const UCollator *uRuleBasedCollator = rbc->getUCollator();
    ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
    extras.addAll(expansions).removeAll(*TO_TRY);
    if (extras.size() != 0) {
        const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status);
        UnicodeSetIterator extrasIter(extras);
        while (extrasIter.next()) {
            const UnicodeString &current = extrasIter.next();
            if (!TO_TRY->containsAll(current))
                continue;
            if (!normalizer->isNormalized(current, status) ||
                ruleBasedCollator->compare(current, LOWER_A) < 0) {
                continue;
            }
            int script = uscript_getScript(current.char32At(0), &status);
            if (results[script].length() == 0) {
                results[script] = current;
            } else if (ruleBasedCollator->compare(current, results[script]) < 0) {
                results[script] = current;
            }
        }
    }

    UVector *dest = new UVector(status);
    dest->setDeleter(uprv_deleteUObject);
    for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
        if (results[i].length() > 0) {
            dest->addElement(results[i].clone(), status);
        }
    }
    dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status);
    return dest;
}