// returns true if a template is valid bool rl_valid_template( production *prod ) { bool numeric_pref = false; bool var_pref = false; int num_actions = 0; for ( action *a = prod->action_list; a; a = a->next ) { num_actions++; if ( a->type == MAKE_ACTION ) { if ( a->preference_type == NUMERIC_INDIFFERENT_PREFERENCE_TYPE ) { numeric_pref = true; } else if ( a->preference_type == BINARY_INDIFFERENT_PREFERENCE_TYPE ) { if ( rhs_value_is_symbol( a->referent ) && ( rhs_value_to_symbol( a->referent )->id.common_symbol_info.symbol_type == VARIABLE_SYMBOL_TYPE ) ) var_pref = true; } } } return ( ( num_actions == 1 ) && ( numeric_pref || var_pref ) ); }
void add_bound_variables_in_rhs_value(agent* thisAgent, rhs_value rv, tc_number tc, cons** var_list) { cons* fl; cons* c; Symbol* sym; if (rhs_value_is_symbol(rv)) { /*- ordinary values (i.e., symbols) -*/ sym = rhs_value_to_symbol(rv); if (sym->symbol_type == VARIABLE_SYMBOL_TYPE) { sym->mark_if_unmarked(thisAgent, tc, var_list); } } else { /*- function calls -*/ fl = rhs_value_to_funcall_list(rv); for (c = fl->rest; c != NIL; c = c->rest) { add_bound_variables_in_rhs_value(thisAgent, static_cast<char*>(c->first), tc, var_list); } } }
void add_all_variables_in_action(agent* thisAgent, action* a, tc_number tc, cons** var_list) { Symbol* id; if (a->type == MAKE_ACTION) { /*- ordinary make actions -*/ id = rhs_value_to_symbol(a->id); if (id->is_variable()) { id->mark_if_unmarked(thisAgent, tc, var_list); } add_all_variables_in_rhs_value(thisAgent, a->attr, tc, var_list); add_all_variables_in_rhs_value(thisAgent, a->value, tc, var_list); if (preference_is_binary(a->preference_type)) { add_all_variables_in_rhs_value(thisAgent, a->referent, tc, var_list); } } else { /*- function call actions -*/ add_all_variables_in_rhs_value(thisAgent, a->value, tc, var_list); } }
char first_letter_from_rhs_value(rhs_value rv) { if (rhs_value_is_symbol(rv)) { return first_letter_from_symbol(rhs_value_to_symbol(rv)); } return '*'; /* function calls, reteloc's, unbound variables */ }
Bool all_variables_in_rhs_value_bound (rhs_value rv, tc_number tc) { cons *c; list *fl; Symbol *sym; if (rhs_value_is_funcall(rv)) { /* --- function calls --- */ fl = rhs_value_to_funcall_list (rv); for (c=fl->rest; c!=NIL; c=c->rest) if (! all_variables_in_rhs_value_bound (static_cast<char *>(c->first), tc)) return FALSE; return TRUE; } else { /* --- ordinary (symbol) rhs values --- */ sym = rhs_value_to_symbol (rv); if (sym->common.symbol_type==VARIABLE_SYMBOL_TYPE) return (sym->var.tc_num == tc); return TRUE; } }
// performs the rl update at a state void rl_perform_update( agent *my_agent, double op_value, bool op_rl, Symbol *goal, bool update_efr ) { bool using_gaps = ( my_agent->rl_params->temporal_extension->get_value() == soar_module::on ); if ( !using_gaps || op_rl ) { rl_data *data = goal->id.rl_info; if ( !data->prev_op_rl_rules->empty() ) { rl_et_map::iterator iter; double alpha = my_agent->rl_params->learning_rate->get_value(); double lambda = my_agent->rl_params->et_decay_rate->get_value(); double gamma = my_agent->rl_params->discount_rate->get_value(); double tolerance = my_agent->rl_params->et_tolerance->get_value(); double theta = my_agent->rl_params->meta_learning_rate->get_value(); // if temporal_discount is off, don't discount for gaps unsigned int effective_age = data->hrl_age + 1; if (my_agent->rl_params->temporal_discount->get_value() == soar_module::on) { effective_age += data->gap_age; } double discount = pow( gamma, static_cast< double >( effective_age ) ); // notify of gap closure if ( data->gap_age && using_gaps && my_agent->sysparams[ TRACE_RL_SYSPARAM ] ) { char buf[256]; SNPRINTF( buf, 254, "gap ended (%c%llu)", goal->id.name_letter, static_cast<long long unsigned>(goal->id.name_number) ); print( my_agent, buf ); xml_generate_warning( my_agent, buf ); } // Iterate through eligibility_traces, decay traces. If less than TOLERANCE, remove from map. if ( lambda == 0 ) { if ( !data->eligibility_traces->empty() ) { data->eligibility_traces->clear(); } } else { for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); ) { iter->second *= lambda; iter->second *= discount; if ( iter->second < tolerance ) { data->eligibility_traces->erase( iter++ ); } else { ++iter; } } } // Update trace for just fired prods double sum_old_ecr = 0.0; double sum_old_efr = 0.0; if ( !data->prev_op_rl_rules->empty() ) { double trace_increment = ( 1.0 / static_cast<double>( data->prev_op_rl_rules->size() ) ); rl_rule_list::iterator p; for ( p=data->prev_op_rl_rules->begin(); p!=data->prev_op_rl_rules->end(); p++ ) { sum_old_ecr += (*p)->rl_ecr; sum_old_efr += (*p)->rl_efr; iter = data->eligibility_traces->find( (*p) ); if ( iter != data->eligibility_traces->end() ) { iter->second += trace_increment; } else { (*data->eligibility_traces)[ (*p) ] = trace_increment; } } } // For each prod with a trace, perform update { double old_ecr, old_efr; double delta_ecr, delta_efr; double new_combined, new_ecr, new_efr; double delta_t = (data->reward + discount * op_value) - (sum_old_ecr + sum_old_efr); for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); iter++ ) { production *prod = iter->first; // get old vals old_ecr = prod->rl_ecr; old_efr = prod->rl_efr; // Adjust alpha based on decay policy // Miller 11/14/2011 double adjusted_alpha; switch (my_agent->rl_params->decay_mode->get_value()) { case rl_param_container::exponential_decay: adjusted_alpha = 1.0 / (prod->rl_update_count + 1.0); break; case rl_param_container::logarithmic_decay: adjusted_alpha = 1.0 / (log(prod->rl_update_count + 1.0) + 1.0); break; case rl_param_container::delta_bar_delta_decay: { // Note that in this case, x_i = 1.0 for all productions that are being updated. // Those values have been included here for consistency with the algorithm as described in the delta bar delta paper. prod->rl_delta_bar_delta_beta = prod->rl_delta_bar_delta_beta + theta * delta_t * 1.0 * prod->rl_delta_bar_delta_h; adjusted_alpha = exp(prod->rl_delta_bar_delta_beta); double decay_term = 1.0 - adjusted_alpha * 1.0 * 1.0; if (decay_term < 0.0) decay_term = 0.0; prod->rl_delta_bar_delta_h = prod->rl_delta_bar_delta_h * decay_term + adjusted_alpha * delta_t * 1.0; break; } case rl_param_container::normal_decay: default: adjusted_alpha = alpha; break; } // calculate updates delta_ecr = ( adjusted_alpha * iter->second * ( data->reward - sum_old_ecr ) ); if ( update_efr ) { delta_efr = ( adjusted_alpha * iter->second * ( ( discount * op_value ) - sum_old_efr ) ); } else { delta_efr = 0.0; } // calculate new vals new_ecr = ( old_ecr + delta_ecr ); new_efr = ( old_efr + delta_efr ); new_combined = ( new_ecr + new_efr ); // print as necessary if ( my_agent->sysparams[ TRACE_RL_SYSPARAM ] ) { std::ostringstream ss; ss << "RL update " << prod->name->sc.name << " " << old_ecr << " " << old_efr << " " << old_ecr + old_efr << " -> " << new_ecr << " " << new_efr << " " << new_combined ; std::string temp_str( ss.str() ); print( my_agent, "%s\n", temp_str.c_str() ); xml_generate_message( my_agent, temp_str.c_str() ); // Log update to file if the log file has been set std::string log_path = my_agent->rl_params->update_log_path->get_value(); if (!log_path.empty()) { std::ofstream file(log_path.c_str(), std::ios_base::app); file << ss.str() << std::endl; file.close(); } } // Change value of rule symbol_remove_ref( my_agent, rhs_value_to_symbol( prod->action_list->referent ) ); prod->action_list->referent = symbol_to_rhs_value( make_float_constant( my_agent, new_combined ) ); prod->rl_update_count += 1; prod->rl_ecr = new_ecr; prod->rl_efr = new_efr; // change documentation if ( my_agent->rl_params->meta->get_value() == soar_module::on ) { if ( prod->documentation ) { free_memory_block_for_string( my_agent, prod->documentation ); } std::stringstream doc_ss; const std::vector<std::pair<std::string, param_accessor<double> *> > &documentation_params = my_agent->rl_params->get_documentation_params(); for (std::vector<std::pair<std::string, param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin(); doc_params_it != documentation_params.end(); ++doc_params_it) { doc_ss << doc_params_it->first << "=" << doc_params_it->second->get_param(prod) << ";"; } prod->documentation = make_memory_block_for_string(my_agent, doc_ss.str().c_str()); /* std::string rlupdates( "rlupdates=" ); std::string val; to_string( static_cast< uint64_t >( prod->rl_update_count ), val ); rlupdates.append( val ); prod->documentation = make_memory_block_for_string( my_agent, rlupdates.c_str() ); */ } // Change value of preferences generated by current instantiations of this rule if ( prod->instantiations ) { for ( instantiation *inst = prod->instantiations; inst; inst = inst->next ) { for ( preference *pref = inst->preferences_generated; pref; pref = pref->inst_next ) { symbol_remove_ref( my_agent, pref->referent ); pref->referent = make_float_constant( my_agent, new_combined ); } } } } } } data->gap_age = 0; data->hrl_age = 0; data->reward = 0.0; } }
Symbol *instantiate_rhs_value (rhs_value rv, goal_stack_level new_id_level, char new_id_letter, struct token_struct *tok, wme *w) { list *fl; list *arglist; cons *c, *prev_c, *arg_cons; rhs_function *rf; Symbol *result; bool nil_arg_found; if (rhs_value_is_symbol(rv)) { result = rhs_value_to_symbol(rv); symbol_add_ref (result); return result; } if (rhs_value_is_unboundvar(rv)) { long index; Symbol *sym; index = rhs_value_to_unboundvar(rv); if (firer_highest_rhs_unboundvar_index < index) firer_highest_rhs_unboundvar_index = index; sym = *(current_agent(rhs_variable_bindings)+index); if (!sym) { sym = make_new_identifier (new_id_letter, new_id_level); *(current_agent(rhs_variable_bindings)+index) = sym; return sym; } else if (sym->common.symbol_type==VARIABLE_SYMBOL_TYPE) { new_id_letter = *(sym->var.name + 1); sym = make_new_identifier (new_id_letter, new_id_level); *(current_agent(rhs_variable_bindings)+index) = sym; return sym; } else { symbol_add_ref (sym); return sym; } } if (rhs_value_is_reteloc(rv)) { result = get_symbol_from_rete_loc ((unsigned short) rhs_value_to_reteloc_levels_up(rv), (byte)rhs_value_to_reteloc_field_num(rv), tok, w); symbol_add_ref (result); return result; } fl = rhs_value_to_funcall_list(rv); rf = fl->first; /* --- build up a list of the argument values --- */ prev_c = NIL; nil_arg_found = FALSE; arglist = NIL; /* unnecessary, but gcc -Wall warns without it */ for (arg_cons=fl->rest; arg_cons!=NIL; arg_cons=arg_cons->rest) { allocate_cons (&c); c->first = instantiate_rhs_value (arg_cons->first, new_id_level, new_id_letter, tok, w); if (! c->first) nil_arg_found = TRUE; if (prev_c) prev_c->rest = c; else arglist = c; prev_c = c; } if (prev_c) prev_c->rest = NIL; else arglist = NIL; /* --- if all args were ok, call the function --- */ if (!nil_arg_found) result = (*(rf->f))(arglist); else result = NIL; /* --- scan through arglist, dereference symbols and deallocate conses --- */ for (c=arglist; c!=NIL; c=c->rest) if (c->first) symbol_remove_ref ((Symbol *)(c->first)); free_list (arglist); return result; }