// returns true if a template is valid
bool rl_valid_template( production *prod )
{
	bool numeric_pref = false;
	bool var_pref = false;
	int num_actions = 0;

	for ( action *a = prod->action_list; a; a = a->next ) 
	{
		num_actions++;
		if ( a->type == MAKE_ACTION )
		{
			if ( a->preference_type == NUMERIC_INDIFFERENT_PREFERENCE_TYPE )
			{
				numeric_pref = true;
			}
			else if ( a->preference_type == BINARY_INDIFFERENT_PREFERENCE_TYPE )
			{	
				if ( rhs_value_is_symbol( a->referent ) && ( rhs_value_to_symbol( a->referent )->id.common_symbol_info.symbol_type == VARIABLE_SYMBOL_TYPE ) )
					var_pref = true;
			}
		}
	}

	return ( ( num_actions == 1 ) && ( numeric_pref || var_pref ) );
}
Example #2
0
void add_bound_variables_in_rhs_value(agent* thisAgent,
                                      rhs_value rv, tc_number tc,
                                      cons** var_list)
{
    cons* fl;
    cons* c;
    Symbol* sym;

    if (rhs_value_is_symbol(rv))
    {
        /*- ordinary values (i.e., symbols) -*/
        sym = rhs_value_to_symbol(rv);
        if (sym->symbol_type == VARIABLE_SYMBOL_TYPE)
        {
            sym->mark_if_unmarked(thisAgent, tc, var_list);
        }
    }
    else
    {
        /*- function calls -*/
        fl = rhs_value_to_funcall_list(rv);
        for (c = fl->rest; c != NIL; c = c->rest)
        {
            add_bound_variables_in_rhs_value(thisAgent, static_cast<char*>(c->first), tc, var_list);
        }
    }
}
Example #3
0
void add_all_variables_in_action(agent* thisAgent, action* a,
                                 tc_number tc, cons** var_list)
{
    Symbol* id;

    if (a->type == MAKE_ACTION)
    {
        /*- ordinary make actions -*/
        id = rhs_value_to_symbol(a->id);
        if (id->is_variable())
        {
            id->mark_if_unmarked(thisAgent, tc, var_list);
        }
        add_all_variables_in_rhs_value(thisAgent, a->attr, tc, var_list);
        add_all_variables_in_rhs_value(thisAgent, a->value, tc, var_list);
        if (preference_is_binary(a->preference_type))
        {
            add_all_variables_in_rhs_value(thisAgent, a->referent, tc, var_list);
        }
    }
    else
    {
        /*- function call actions -*/
        add_all_variables_in_rhs_value(thisAgent, a->value, tc, var_list);
    }
}
Example #4
0
char first_letter_from_rhs_value(rhs_value rv)
{
    if (rhs_value_is_symbol(rv))
    {
        return first_letter_from_symbol(rhs_value_to_symbol(rv));
    }
    return '*'; /* function calls, reteloc's, unbound variables */
}
Example #5
0
Bool all_variables_in_rhs_value_bound (rhs_value rv, tc_number tc) {
  cons *c;
  list *fl;
  Symbol *sym;
  
  if (rhs_value_is_funcall(rv)) {
    /* --- function calls --- */
    fl = rhs_value_to_funcall_list (rv);
    for (c=fl->rest; c!=NIL; c=c->rest)
      if (! all_variables_in_rhs_value_bound (static_cast<char *>(c->first), tc))
        return FALSE;
    return TRUE;
  } else {
    /* --- ordinary (symbol) rhs values --- */
    sym = rhs_value_to_symbol (rv);
    if (sym->common.symbol_type==VARIABLE_SYMBOL_TYPE)
      return (sym->var.tc_num == tc);
    return TRUE;
  }
}
// performs the rl update at a state
void rl_perform_update( agent *my_agent, double op_value, bool op_rl, Symbol *goal, bool update_efr )
{
	bool using_gaps = ( my_agent->rl_params->temporal_extension->get_value() == soar_module::on );

	if ( !using_gaps || op_rl )
	{		
		rl_data *data = goal->id.rl_info;
		
		if ( !data->prev_op_rl_rules->empty() )
		{			
			rl_et_map::iterator iter;			
			double alpha = my_agent->rl_params->learning_rate->get_value();
			double lambda = my_agent->rl_params->et_decay_rate->get_value();
			double gamma = my_agent->rl_params->discount_rate->get_value();
			double tolerance = my_agent->rl_params->et_tolerance->get_value();
            double theta = my_agent->rl_params->meta_learning_rate->get_value();

			// if temporal_discount is off, don't discount for gaps
			unsigned int effective_age = data->hrl_age + 1;
			if (my_agent->rl_params->temporal_discount->get_value() == soar_module::on) {
				effective_age += data->gap_age;
			}
 
			double discount = pow( gamma, static_cast< double >( effective_age ) );

			// notify of gap closure
			if ( data->gap_age && using_gaps && my_agent->sysparams[ TRACE_RL_SYSPARAM ] )
			{
				char buf[256];
				SNPRINTF( buf, 254, "gap ended (%c%llu)", goal->id.name_letter, static_cast<long long unsigned>(goal->id.name_number) );

				print( my_agent, buf );
				xml_generate_warning( my_agent, buf );
			}			

			// Iterate through eligibility_traces, decay traces. If less than TOLERANCE, remove from map.
			if ( lambda == 0 )
			{
				if ( !data->eligibility_traces->empty() )
				{
					data->eligibility_traces->clear();
				}
			}
			else
			{
				for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); )
				{
					iter->second *= lambda;
					iter->second *= discount;
					if ( iter->second < tolerance ) 
					{
						data->eligibility_traces->erase( iter++ );
					}
					else 
					{
						++iter;
					}
				}
			}
			
			// Update trace for just fired prods
			double sum_old_ecr = 0.0;
			double sum_old_efr = 0.0;
			if ( !data->prev_op_rl_rules->empty() )
			{
				double trace_increment = ( 1.0 / static_cast<double>( data->prev_op_rl_rules->size() ) );
				rl_rule_list::iterator p;
				
				for ( p=data->prev_op_rl_rules->begin(); p!=data->prev_op_rl_rules->end(); p++ )
				{
					sum_old_ecr += (*p)->rl_ecr;
					sum_old_efr += (*p)->rl_efr;
					
					iter = data->eligibility_traces->find( (*p) );
					
					if ( iter != data->eligibility_traces->end() ) 
					{
						iter->second += trace_increment;
					}
					else 
					{
						(*data->eligibility_traces)[ (*p) ] = trace_increment;
					}
				}
			}
			
			// For each prod with a trace, perform update
			{
				double old_ecr, old_efr;
				double delta_ecr, delta_efr;
				double new_combined, new_ecr, new_efr;
                double delta_t = (data->reward + discount * op_value) - (sum_old_ecr + sum_old_efr);
				
				for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); iter++ )
				{	
					production *prod = iter->first;

					// get old vals
					old_ecr = prod->rl_ecr;
					old_efr = prod->rl_efr;

                    // Adjust alpha based on decay policy
                    // Miller 11/14/2011
                    double adjusted_alpha;
                    switch (my_agent->rl_params->decay_mode->get_value())
                    {
                        case rl_param_container::exponential_decay:
                            adjusted_alpha = 1.0 / (prod->rl_update_count + 1.0);
                            break;
                        case rl_param_container::logarithmic_decay:
                            adjusted_alpha = 1.0 / (log(prod->rl_update_count + 1.0) + 1.0);
                            break;
                        case rl_param_container::delta_bar_delta_decay:
                            {
                                // Note that in this case, x_i = 1.0 for all productions that are being updated.
                                // Those values have been included here for consistency with the algorithm as described in the delta bar delta paper.
                                prod->rl_delta_bar_delta_beta = prod->rl_delta_bar_delta_beta + theta * delta_t * 1.0 * prod->rl_delta_bar_delta_h;
                                adjusted_alpha = exp(prod->rl_delta_bar_delta_beta);
                                double decay_term = 1.0 - adjusted_alpha * 1.0 * 1.0;
                                if (decay_term < 0.0) decay_term = 0.0;
                                prod->rl_delta_bar_delta_h = prod->rl_delta_bar_delta_h * decay_term + adjusted_alpha * delta_t * 1.0;
                                break;
                            }
                        case rl_param_container::normal_decay:
                        default:
                            adjusted_alpha = alpha;
                            break;
                    }

                    // calculate updates
                    delta_ecr = ( adjusted_alpha * iter->second * ( data->reward - sum_old_ecr ) );

                    if ( update_efr )
                    {
                        delta_efr = ( adjusted_alpha * iter->second * ( ( discount * op_value ) - sum_old_efr ) );
                    }
                    else
					{
						delta_efr = 0.0;
					}					

					// calculate new vals
					new_ecr = ( old_ecr + delta_ecr );
					new_efr = ( old_efr + delta_efr );
					new_combined = ( new_ecr + new_efr );
					
					// print as necessary
					if ( my_agent->sysparams[ TRACE_RL_SYSPARAM ] ) 
					{
						std::ostringstream ss;						
						ss << "RL update " << prod->name->sc.name << " "
						   << old_ecr << " " << old_efr << " " << old_ecr + old_efr << " -> "
						   << new_ecr << " " << new_efr << " " << new_combined ;

						std::string temp_str( ss.str() );						
						print( my_agent, "%s\n", temp_str.c_str() );
						xml_generate_message( my_agent, temp_str.c_str() );

                        // Log update to file if the log file has been set
                        std::string log_path = my_agent->rl_params->update_log_path->get_value();
                        if (!log_path.empty()) {
                            std::ofstream file(log_path.c_str(), std::ios_base::app);
                            file << ss.str() << std::endl;
                            file.close();
                        }
                    }

                    // Change value of rule
                    symbol_remove_ref( my_agent, rhs_value_to_symbol( prod->action_list->referent ) );
                    prod->action_list->referent = symbol_to_rhs_value( make_float_constant( my_agent, new_combined ) );
                    prod->rl_update_count += 1;
                    prod->rl_ecr = new_ecr;
                    prod->rl_efr = new_efr;

                    // change documentation
                    if ( my_agent->rl_params->meta->get_value() == soar_module::on )
                    {
                        if ( prod->documentation )
                        {
                            free_memory_block_for_string( my_agent, prod->documentation );
                        }
                        std::stringstream doc_ss;
                        const std::vector<std::pair<std::string, param_accessor<double> *> > &documentation_params = my_agent->rl_params->get_documentation_params();
                        for (std::vector<std::pair<std::string, param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
                                doc_params_it != documentation_params.end(); ++doc_params_it) {
                            doc_ss << doc_params_it->first << "=" << doc_params_it->second->get_param(prod) << ";";
                        }
                        prod->documentation = make_memory_block_for_string(my_agent, doc_ss.str().c_str());

                        /*
						std::string rlupdates( "rlupdates=" );
						std::string val;
						to_string( static_cast< uint64_t >( prod->rl_update_count ), val );
						rlupdates.append( val );

						prod->documentation = make_memory_block_for_string( my_agent, rlupdates.c_str() );
                        */
					}

					// Change value of preferences generated by current instantiations of this rule
					if ( prod->instantiations )
					{
						for ( instantiation *inst = prod->instantiations; inst; inst = inst->next )
						{
							for ( preference *pref = inst->preferences_generated; pref; pref = pref->inst_next )
							{
								symbol_remove_ref( my_agent, pref->referent );
								pref->referent = make_float_constant( my_agent, new_combined );
							}
						}
					}	
				}
			}
		}

		data->gap_age = 0;
		data->hrl_age = 0;
		data->reward = 0.0;
	}
}
Example #7
0
Symbol *instantiate_rhs_value (rhs_value rv, goal_stack_level new_id_level,
                               char new_id_letter,
                               struct token_struct *tok, wme *w) {
  list *fl;
  list *arglist;
  cons *c, *prev_c, *arg_cons;
  rhs_function *rf;
  Symbol *result;
  bool nil_arg_found;
  
  if (rhs_value_is_symbol(rv)) {
    result = rhs_value_to_symbol(rv);
    symbol_add_ref (result);
    return result;
  }

  if (rhs_value_is_unboundvar(rv)) {
    long index;
    Symbol *sym;

    index = rhs_value_to_unboundvar(rv);
    if (firer_highest_rhs_unboundvar_index < index)
      firer_highest_rhs_unboundvar_index = index;
    sym = *(current_agent(rhs_variable_bindings)+index);

    if (!sym) {
      sym = make_new_identifier (new_id_letter, new_id_level);
      *(current_agent(rhs_variable_bindings)+index) = sym;
      return sym;
    } else if (sym->common.symbol_type==VARIABLE_SYMBOL_TYPE) {
      new_id_letter = *(sym->var.name + 1);
      sym = make_new_identifier (new_id_letter, new_id_level);
      *(current_agent(rhs_variable_bindings)+index) = sym;
      return sym;
    } else {
      symbol_add_ref (sym);
      return sym;
    }
  }

  if (rhs_value_is_reteloc(rv)) {
    result = get_symbol_from_rete_loc ((unsigned short) rhs_value_to_reteloc_levels_up(rv),
                                       (byte)rhs_value_to_reteloc_field_num(rv),
                                       tok, w);
    symbol_add_ref (result);
    return result;
  }

  fl = rhs_value_to_funcall_list(rv);
  rf = fl->first;

  /* --- build up a list of the argument values --- */
  prev_c = NIL;
  nil_arg_found = FALSE;
  arglist = NIL; /* unnecessary, but gcc -Wall warns without it */
  for (arg_cons=fl->rest; arg_cons!=NIL; arg_cons=arg_cons->rest) {
    allocate_cons (&c);
    c->first = instantiate_rhs_value (arg_cons->first, new_id_level,
                                      new_id_letter, tok, w);
    if (! c->first) nil_arg_found = TRUE;
    if (prev_c) prev_c->rest = c; else arglist = c;
    prev_c = c;
  }
  if (prev_c) prev_c->rest = NIL; else arglist = NIL;

  /* --- if all args were ok, call the function --- */
  if (!nil_arg_found)
    result = (*(rf->f))(arglist);
  else
    result = NIL;

  /* --- scan through arglist, dereference symbols and deallocate conses --- */
  for (c=arglist; c!=NIL; c=c->rest)
    if (c->first) symbol_remove_ref ((Symbol *)(c->first));
  free_list (arglist);

  return result;
}