float language_model::prob_calc(token_list tokens) const { if (tokens.size() == 0) throw language_model_exception{"prob_calc: tokens is empty!"}; if (tokens.size() == 1) { auto opt = lm_[0].find(token_list{tokens[0]}); if (opt) return opt->prob; return unk_node_.prob; } else { auto opt = lm_[tokens.size() - 1].find(tokens); if (opt) return opt->prob; auto hist = tokens; hist.pop_back(); tokens.pop_front(); if (tokens.size() == 1) { auto opt = lm_[0].find(hist[0]); if (!opt) hist[0] = vocabulary_.at("<unk>"); } opt = lm_[hist.size() - 1].find(hist); if (opt) return opt->backoff + prob_calc(tokens); return prob_calc(tokens); } }
bool is_equal(token_list& op1, token_list& op2) { int n = op1.size(); if (n != op2.size()) { return false; } for (int i = 0; i < n; i++) { if ( op1[i].get_type() != op2[i].get_type() || ( op1[i].get_type() == token_type::VAR && op2[i].get_type() == token_type::VAR && op1[i].get_var_name() != op2[i].get_var_name() ) ) return false; } return true; }
void clean_token_list(token_list &tokens_) { token_list clean_tokens; for (token_entry current_token = tokens_.begin(); current_token != tokens_.end(); ++current_token) { if (current_token->type == type::EMPTY) continue; clean_tokens.push_back(*current_token); } tokens_ = clean_tokens; }
float language_model::log_prob(const token_list& tokens) const { float prob = 0.0f; lm::lm_state state; lm::lm_state state_next; for (const auto& token : tokens.tokens()) { prob += score(state, token, state_next); state = state_next; } return prob; }
float language_model::log_prob(const token_list& tokens) const { using diff_type = decltype(tokens.tokens().begin())::difference_type; float prob = 0.0f; // tokens < N for (uint64_t i = 0; i < N_ - 1 && i < tokens.size(); ++i) { prob += prob_calc(tokens.tokens().begin(), tokens.tokens().begin() + static_cast<diff_type>(i) + 1); } // tokens >= N for (uint64_t i = N_ - 1; i < tokens.size(); ++i) { prob += prob_calc( tokens.tokens().begin() + static_cast<diff_type>(i - N_ + 1), tokens.tokens().begin() + static_cast<diff_type>(i) + 1); } return prob; }
void carma::rules::script_command_rule::apply(carma::compiler::context& a_scope, token_list &tokens_, token_entry& start_entry_, token_entry& end_entry_) { auto types = script_command_parser::AllTypesForName(start_entry_->val); if (types.size() == 0) { throw compiler::exception::syntax_error("Not a valid script command ("+ start_entry_->val +")"); } bool valid = false; std::vector<std::string> errors_found; auto next = std::next(start_entry_); for (auto type : types) { if (type == script_command::Type::NONE) { // always valid ? valid = true; return; } else if (type == script_command::Type::LEFT_RIGHT) { bool rightSide = false; bool leftSide = false; // check right side if (next != end_entry_) { if (next->val != "}" && next->val != ")" && next->val != "]" && next->val != ";" && next->val != "=" && next->val != "<" && next->val != ">") { rightSide = true; } else { errors_found.push_back("Invalid Syntax for binary script command " + start_entry_->val + " With next: " + next->val); } } else { errors_found.push_back("Missing argument for binary script command " + start_entry_->val + " End of statement?"); } // Check left side if (start_entry_ == tokens_.begin()) { // nothing in front so... error errors_found.push_back("Missing argument for binary script command " + start_entry_->val); } else { auto prev = std::prev(start_entry_); if (prev->val != "{" && prev->val != "(" && prev->val != "[" && prev->val != ";" && prev->val != "=" && prev->val != "<" && prev->val != ">") { leftSide = true; } else { errors_found.push_back("Invalid Syntax for binary script command " + start_entry_->val + " With prev: " + prev->val); } } if (leftSide && rightSide) valid = true; } else if (type == script_command::Type::RIGHT) { // validate that we have some statement to our right if (next != end_entry_) { if (next->val != "}" && next->val != ")" && next->val != "]" && next->val != ";" && next->val != "=" && next->val != "<" && next->val != ">") { valid = true; } else { errors_found.push_back("Invalid Syntax for unary script command " + start_entry_->val + " With next: " + next->val); } } else { errors_found.push_back("Missing argument for unary script command " + start_entry_->val + " End of statement?"); } } }; if (!valid) { if (errors_found.size() == 0) errors_found.push_back("Unknown error"); throw compiler::exception::syntax_error("Syntax Error at script command (" + start_entry_->val + ")" + errors_found.at(0)); } }
void FilterParser::Parse( const token_list& tokens ) { filters.Clear(); int filterType = UnknownFilterType; String filterName; int filterSize = 0; KernelFilter::coefficient_matrix filterMatrix; SeparableFilter::coefficient_vector rowVector, colVector; int state = FilterTypeState; for ( token_list::const_iterator i = tokens.Begin(); i != tokens.End(); ) { switch ( state ) { case FilterTypeState: { if ( i->token == "KernelFilter" ) filterType = KernelFilterType; else if ( i->token == "SeparableFilter" ) filterType = SeparableFilterType; else if ( i->token == '{' || i->token == '}' ) PARSE_ERROR( "Misplaced bracket", i ); else if ( !i->token.IsValidIdentifier() ) PARSE_ERROR( "Invalid filter type \'" + i->token + '\'', i ); else PARSE_ERROR( "Unknown filter type \'" + i->token + '\'', i ); if ( ++i == tokens.End() || i->token != '{' ) PARSE_ERROR( "Expected left bracket", i ); state = FilterParameterState; ++i; } break; case FilterParameterState: { if ( i->token == '}' ) { if ( filterName.IsEmpty() ) PARSE_ERROR( "Missing filter name", i ); if ( filterSize == 0 ) PARSE_ERROR( "Empty filter definition", i ); if ( filterType == KernelFilterType ) { if ( filterMatrix.IsEmpty() ) PARSE_ERROR( "Missing kernel filter coefficients", i ); filters.Add( Filter( KernelFilter( filterMatrix, filterName ) ) ); } else if ( filterType == SeparableFilterType ) { if ( rowVector.IsEmpty() || colVector.IsEmpty() ) PARSE_ERROR( "Missing separable filter coefficients", i ); filters.Add( Filter( SeparableFilter( rowVector, colVector, filterName ) ) ); } else PARSE_ERROR( "Internal parser error", i ); filterName.Clear(); filterSize = 0; filterMatrix = KernelFilter::coefficient_matrix(); rowVector = colVector = SeparableFilter::coefficient_vector(); state = FilterTypeState; ++i; } else { token_list::const_iterator j = i; ++j; token_list::const_iterator k = tokens.End(); CaptureParameterValueTokens( j, k ); int n = Distance( j, k ); if ( i->token == "name" ) { if ( n == 0 ) PARSE_ERROR( "Expected a filter name", i ); if ( !filterName.IsEmpty() ) PARSE_ERROR( "Duplicate filter name", i ); filterName = j->token; for ( ; ++j < k; ) filterName += ' ' + j->token; } else if ( i->token == "coefficients" ) { if ( filterType != KernelFilterType ) PARSE_ERROR( "Invalid kernel filter parameter", i ); if ( !filterMatrix.IsEmpty() ) PARSE_ERROR( "Duplicate kernel filter coefficients", i ); token_list::const_iterator p = j; GenericVector<KernelFilter::coefficient> C( n ); for ( KernelFilter::coefficient* c = C.Begin(); c < C.End(); ++c, ++p ) *c = p->token.ToFloat(); int numberOfCoefficients = filterSize*filterSize; if ( numberOfCoefficients != 0 ) { if ( numberOfCoefficients != n ) PARSE_ERROR( "Incongruent kernel filter size; expected " + String( numberOfCoefficients ) + " coefficients", j ); } else { filterSize = TruncI( Sqrt( double( n ) ) ); if ( filterSize*filterSize != n ) PARSE_ERROR( "Non-square kernel filter defined", j ); if ( filterSize < 3 ) PARSE_ERROR( "The kernel filter is too small - 3x3 is the minimum required", j ); if ( (filterSize & 0x01) == 0 ) PARSE_ERROR( "Invalid even kernel filter dimension (" + String( filterSize ) + ')', j ); } filterMatrix = KernelFilter::coefficient_matrix( C.Begin(), filterSize, filterSize ); j = p; } else if ( i->token == "row-vector" ) { if ( filterType != SeparableFilterType ) PARSE_ERROR( "Invalid separable filter parameter", i ); if ( !rowVector.IsEmpty() ) PARSE_ERROR( "Duplicate row vector specification", i ); token_list::const_iterator p = j; rowVector = SeparableFilter::coefficient_vector( n ); for ( SeparableFilter::coefficient* c = rowVector.Begin(); c < rowVector.End(); ++c, ++p ) *c = p->token.ToFloat(); if ( filterSize != 0 ) { if ( filterSize != n ) PARSE_ERROR( "Incongruent separable row filter length; expected " + String( filterSize ) + " coefficients", j ); } else { if ( n < 3 ) PARSE_ERROR( "Too few row filter coefficients specified - three or more coefficients are required", j ); if ( (n & 0x01) == 0 ) PARSE_ERROR( "Invalid even row filter length (" + String( n ) + ')', j ); filterSize = n; } j = p; } else if ( i->token == "col-vector" || i->token == "column-vector" ) { if ( filterType != SeparableFilterType ) PARSE_ERROR( "Invalid separable filter parameter", i ); if ( !colVector.IsEmpty() ) PARSE_ERROR( "Duplicate column vector specification", i ); token_list::const_iterator p = j; colVector = SeparableFilter::coefficient_vector( n ); for ( SeparableFilter::coefficient* c = colVector.Begin(); c != colVector.End(); ++c, ++p ) *c = p->token.ToFloat(); if ( filterSize != 0 ) { if ( filterSize != n ) PARSE_ERROR( "Incongruent separable column filter length; expected " + String( filterSize ) + " coefficients", j ); } else { if ( n < 3 ) PARSE_ERROR( "Too few column filter coefficients specified - three or more coefficients are required", j ); if ( (n & 0x01) == 0 ) PARSE_ERROR( "Invalid even column filter length (" + String( n ) + ')', j ); filterSize = n; } j = p; } else PARSE_ERROR( "Unknown filter parameter '" + i->token + '\'', i ); int d = Distance( i, j ); i = k; if ( d > 1 ) // j-i > 1 if and only if value is enclosed by brackets ++i; } } break; default: PARSE_ERROR( "Internal parser error", i ); } } if ( state != FilterTypeState ) PARSE_ERROR( "Missing right bracket", tokens.At(tokens.UpperBound())); }