示例#1
0
float language_model::prob_calc(token_list tokens) const
{
    if (tokens.size() == 0)
        throw language_model_exception{"prob_calc: tokens is empty!"};

    if (tokens.size() == 1)
    {
        auto opt = lm_[0].find(token_list{tokens[0]});
        if (opt)
            return opt->prob;
        return unk_node_.prob;
    }
    else
    {
        auto opt = lm_[tokens.size() - 1].find(tokens);
        if (opt)
            return opt->prob;

        auto hist = tokens;
        hist.pop_back();
        tokens.pop_front();
        if (tokens.size() == 1)
        {
            auto opt = lm_[0].find(hist[0]);
            if (!opt)
                hist[0] = vocabulary_.at("<unk>");
        }

        opt = lm_[hist.size() - 1].find(hist);
        if (opt)
            return opt->backoff + prob_calc(tokens);
        return prob_calc(tokens);
    }
}
示例#2
0
bool is_equal(token_list& op1, token_list& op2)
{
    int n = op1.size();
    
    if (n != op2.size())
    {
        return false;
    }
    
    for (int i = 0; i < n; i++)
    {
        if 
        (
            op1[i].get_type() != op2[i].get_type() ||
            (
                op1[i].get_type() == token_type::VAR && 
                op2[i].get_type() == token_type::VAR &&
                op1[i].get_var_name() != op2[i].get_var_name()
            )
        )
        return false;

    }
    return true;
}
示例#3
0
		void clean_token_list(token_list &tokens_) {
			token_list clean_tokens;
			for (token_entry current_token = tokens_.begin(); current_token != tokens_.end(); ++current_token) {
				if (current_token->type == type::EMPTY)
					continue;
				clean_tokens.push_back(*current_token);
			}
			tokens_ = clean_tokens;
		}
示例#4
0
float language_model::log_prob(const token_list& tokens) const
{
    float prob = 0.0f;

    lm::lm_state state;
    lm::lm_state state_next;
    for (const auto& token : tokens.tokens())
    {
        prob += score(state, token, state_next);
        state = state_next;
    }

    return prob;
}
示例#5
0
float language_model::log_prob(const token_list& tokens) const
{
    using diff_type = decltype(tokens.tokens().begin())::difference_type;
    float prob = 0.0f;

    // tokens < N
    for (uint64_t i = 0; i < N_ - 1 && i < tokens.size(); ++i)
    {
        prob += prob_calc(tokens.tokens().begin(),
                          tokens.tokens().begin() + static_cast<diff_type>(i)
                              + 1);
    }

    // tokens >= N
    for (uint64_t i = N_ - 1; i < tokens.size(); ++i)
    {
        prob += prob_calc(
            tokens.tokens().begin() + static_cast<diff_type>(i - N_ + 1),
            tokens.tokens().begin() + static_cast<diff_type>(i) + 1);
    }

    return prob;
}
示例#6
0
void carma::rules::script_command_rule::apply(carma::compiler::context& a_scope, token_list &tokens_, token_entry& start_entry_, token_entry& end_entry_) {
    auto types = script_command_parser::AllTypesForName(start_entry_->val);
    if (types.size() == 0) {
        throw compiler::exception::syntax_error("Not a valid script command ("+ start_entry_->val +")");
    }

    bool valid = false;
    std::vector<std::string> errors_found;
    auto next = std::next(start_entry_);
    for (auto type : types) {
        if (type == script_command::Type::NONE) {
            // always valid ?
            valid = true;
            return;
        }
        else if (type == script_command::Type::LEFT_RIGHT) {
            bool rightSide = false;
            bool leftSide = false;
            // check right side
            if (next != end_entry_) {
                if (next->val != "}" && next->val != ")" && next->val != "]" && next->val != ";"
                    && next->val != "=" && next->val != "<" && next->val != ">") {
                    rightSide = true;
                }
                else {
                    errors_found.push_back("Invalid Syntax for binary script command " + start_entry_->val + " With next: " + next->val);
                }
            }
            else {
                errors_found.push_back("Missing argument for binary script command " + start_entry_->val + " End of statement?");
            }
            // Check left side
            if (start_entry_ == tokens_.begin()) {
                // nothing in front so... error
                errors_found.push_back("Missing argument for binary script command " + start_entry_->val);
            }
            else {
                auto prev = std::prev(start_entry_);
                if (prev->val != "{" && prev->val != "(" && prev->val != "[" && prev->val != ";"
                    && prev->val != "=" && prev->val != "<" && prev->val != ">") {
                    leftSide = true;
                }
                else {
                    errors_found.push_back("Invalid Syntax for binary script command " + start_entry_->val + " With prev: " + prev->val);
                }
            }
            if (leftSide && rightSide)
                valid = true;
        }
        else if (type == script_command::Type::RIGHT) {
            // validate that we have some statement to our right
            if (next != end_entry_) {
                if (next->val != "}" && next->val != ")" && next->val != "]" && next->val != ";"
                    && next->val != "=" && next->val != "<" && next->val != ">") {
                    valid = true;
                }
                else {
                    errors_found.push_back("Invalid Syntax for unary script command " + start_entry_->val + " With next: " + next->val);
                }
            }
            else {
                errors_found.push_back("Missing argument for unary script command " + start_entry_->val + " End of statement?");
            }
        }

    };
    if (!valid) {
        if (errors_found.size() == 0)
            errors_found.push_back("Unknown error");
        throw compiler::exception::syntax_error("Syntax Error at script command (" + start_entry_->val + ")" + errors_found.at(0));
    }
}
示例#7
0
void FilterParser::Parse( const token_list& tokens )
{
   filters.Clear();

   int filterType = UnknownFilterType;
   String filterName;
   int filterSize = 0;
   KernelFilter::coefficient_matrix    filterMatrix;
   SeparableFilter::coefficient_vector rowVector, colVector;

   int state = FilterTypeState;

   for ( token_list::const_iterator i = tokens.Begin(); i != tokens.End(); )
   {
      switch ( state )
      {
      case FilterTypeState:
         {
            if ( i->token == "KernelFilter" )
               filterType = KernelFilterType;
            else if ( i->token == "SeparableFilter" )
               filterType = SeparableFilterType;
            else if ( i->token == '{' || i->token == '}' )
               PARSE_ERROR( "Misplaced bracket", i );
            else if ( !i->token.IsValidIdentifier() )
               PARSE_ERROR( "Invalid filter type \'" + i->token + '\'', i );
            else
               PARSE_ERROR( "Unknown filter type \'" + i->token + '\'', i );

            if ( ++i == tokens.End() || i->token != '{' )
               PARSE_ERROR( "Expected left bracket", i );
            state = FilterParameterState;

            ++i;
         }
         break;

      case FilterParameterState:
         {
            if ( i->token == '}' )
            {
               if ( filterName.IsEmpty() )
                  PARSE_ERROR( "Missing filter name", i );
               if ( filterSize == 0 )
                  PARSE_ERROR( "Empty filter definition", i );

               if ( filterType == KernelFilterType )
               {
                  if ( filterMatrix.IsEmpty() )
                     PARSE_ERROR( "Missing kernel filter coefficients", i );

                  filters.Add( Filter( KernelFilter( filterMatrix, filterName ) ) );
               }
               else if ( filterType == SeparableFilterType )
               {
                  if ( rowVector.IsEmpty() || colVector.IsEmpty() )
                     PARSE_ERROR( "Missing separable filter coefficients", i );

                  filters.Add( Filter( SeparableFilter( rowVector, colVector, filterName ) ) );
               }
               else
                  PARSE_ERROR( "Internal parser error", i );

               filterName.Clear();
               filterSize = 0;
               filterMatrix = KernelFilter::coefficient_matrix();
               rowVector = colVector = SeparableFilter::coefficient_vector();

               state = FilterTypeState;
               ++i;
            }
            else
            {
               token_list::const_iterator j = i; ++j;
               token_list::const_iterator k = tokens.End();
               CaptureParameterValueTokens( j, k );
               int n = Distance( j, k );

               if ( i->token == "name" )
               {
                  if ( n == 0 )
                     PARSE_ERROR( "Expected a filter name", i );
                  if ( !filterName.IsEmpty() )
                     PARSE_ERROR( "Duplicate filter name", i );
                  filterName = j->token;
                  for ( ; ++j < k; )
                     filterName += ' ' + j->token;
               }
               else if ( i->token == "coefficients" )
               {
                  if ( filterType != KernelFilterType )
                     PARSE_ERROR( "Invalid kernel filter parameter", i );
                  if ( !filterMatrix.IsEmpty() )
                     PARSE_ERROR( "Duplicate kernel filter coefficients", i );

                  token_list::const_iterator p = j;
                  GenericVector<KernelFilter::coefficient> C( n );
                  for ( KernelFilter::coefficient* c = C.Begin(); c < C.End(); ++c, ++p )
                     *c = p->token.ToFloat();

                  int numberOfCoefficients = filterSize*filterSize;
                  if ( numberOfCoefficients != 0 )
                  {
                     if ( numberOfCoefficients != n )
                        PARSE_ERROR( "Incongruent kernel filter size; expected " + String( numberOfCoefficients ) + " coefficients", j );
                  }
                  else
                  {
                     filterSize = TruncI( Sqrt( double( n ) ) );
                     if ( filterSize*filterSize != n )
                        PARSE_ERROR( "Non-square kernel filter defined", j );
                     if ( filterSize < 3 )
                        PARSE_ERROR( "The kernel filter is too small - 3x3 is the minimum required", j );
                     if ( (filterSize & 0x01) == 0 )
                        PARSE_ERROR( "Invalid even kernel filter dimension (" + String( filterSize ) + ')', j );
                  }

                  filterMatrix = KernelFilter::coefficient_matrix( C.Begin(), filterSize, filterSize );
                  j = p;
               }
               else if ( i->token == "row-vector" )
               {
                  if ( filterType != SeparableFilterType )
                     PARSE_ERROR( "Invalid separable filter parameter", i );
                  if ( !rowVector.IsEmpty() )
                     PARSE_ERROR( "Duplicate row vector specification", i );

                  token_list::const_iterator p = j;
                  rowVector = SeparableFilter::coefficient_vector( n );
                  for ( SeparableFilter::coefficient* c = rowVector.Begin(); c < rowVector.End(); ++c, ++p )
                     *c = p->token.ToFloat();

                  if ( filterSize != 0 )
                  {
                     if ( filterSize != n )
                        PARSE_ERROR( "Incongruent separable row filter length; expected " + String( filterSize ) + " coefficients", j );
                  }
                  else
                  {
                     if ( n < 3 )
                        PARSE_ERROR( "Too few row filter coefficients specified - three or more coefficients are required", j );
                     if ( (n & 0x01) == 0 )
                        PARSE_ERROR( "Invalid even row filter length (" + String( n ) + ')', j );
                     filterSize = n;
                  }

                  j = p;
               }
               else if ( i->token == "col-vector" || i->token == "column-vector" )
               {
                  if ( filterType != SeparableFilterType )
                     PARSE_ERROR( "Invalid separable filter parameter", i );
                  if ( !colVector.IsEmpty() )
                     PARSE_ERROR( "Duplicate column vector specification", i );

                  token_list::const_iterator p = j;
                  colVector = SeparableFilter::coefficient_vector( n );
                  for ( SeparableFilter::coefficient* c = colVector.Begin(); c != colVector.End(); ++c, ++p )
                     *c = p->token.ToFloat();

                  if ( filterSize != 0 )
                  {
                     if ( filterSize != n )
                        PARSE_ERROR( "Incongruent separable column filter length; expected " + String( filterSize ) + " coefficients", j );
                  }
                  else
                  {
                     if ( n < 3 )
                        PARSE_ERROR( "Too few column filter coefficients specified - three or more coefficients are required", j );
                     if ( (n & 0x01) == 0 )
                        PARSE_ERROR( "Invalid even column filter length (" + String( n ) + ')', j );
                     filterSize = n;
                  }

                  j = p;
               }
               else
                  PARSE_ERROR( "Unknown filter parameter '" + i->token + '\'', i );

               int d = Distance( i, j );
               i = k;
               if ( d > 1 ) // j-i > 1 if and only if value is enclosed by brackets
                  ++i;
            }
         }
         break;

      default:
         PARSE_ERROR( "Internal parser error", i );
      }
   }

   if ( state != FilterTypeState )
      PARSE_ERROR( "Missing right bracket", tokens.At(tokens.UpperBound()));
}