int main(int argc, char **argv) { int i, len; double eval, clk; long long ncycles_ref, counter; double eptime; double add_delay; struct cfg cf; char buf[256]; struct recfilter loop_error; struct PFD phase_detector; useconds_t usleep_time; struct sched_param sparam; #if RTPP_DEBUG double sleep_time, filter_lastval; #endif memset(&cf, 0, sizeof(cf)); cf.stable = malloc(sizeof(struct rtpp_cfg_stable)); if (cf.stable == NULL) { err(1, "can't allocate memory for the struct rtpp_cfg_stable"); /* NOTREACHED */ } memset(cf.stable, '\0', sizeof(struct rtpp_cfg_stable)); cf.stable->ctrl_socks = malloc(sizeof(struct rtpp_list)); if (cf.stable->ctrl_socks == NULL) { err(1, "can't allocate memory for the struct rtpp_cfg_stable"); /* NOTREACHED */ } memset(cf.stable->ctrl_socks, '\0', sizeof(struct rtpp_list)); RTPP_LIST_RESET(cf.stable->ctrl_socks); init_config(&cf, argc, argv); seedrandom(); cf.stable->sessions_ht = rtpp_hash_table_ctor(); if (cf.stable->sessions_ht == NULL) { err(1, "can't allocate memory for the hash table"); /* NOTREACHED */ } cf.stable->rtpp_stats = rtpp_stats_ctor(); if (cf.stable->rtpp_stats == NULL) { err(1, "can't allocate memory for the stats data"); /* NOTREACHED */ } init_port_table(&cf); if (rtpp_controlfd_init(&cf) != 0) { err(1, "can't inilialize control socket%s", cf.stable->ctrl_socks->len > 1 ? "s" : ""); } if (cf.stable->nodaemon == 0) { if (rtpp_daemon(0, 0) == -1) err(1, "can't switch into daemon mode"); /* NOTREACHED */ } if (rtpp_notify_init() != 0) errx(1, "can't start notification thread"); cf.stable->glog = rtpp_log_open(cf.stable, "rtpproxy", NULL, LF_REOPEN); rtpp_log_setlevel(cf.stable->glog, cf.stable->log_level); _sig_cf = &cf; atexit(ehandler); rtpp_log_write(RTPP_LOG_INFO, cf.stable->glog, "rtpproxy started, pid %d", getpid()); i = open(cf.stable->pid_file, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE); if (i >= 0) { len = sprintf(buf, "%u\n", (unsigned int)getpid()); write(i, buf, len); close(i); } else { rtpp_log_ewrite(RTPP_LOG_ERR, cf.stable->glog, "can't open pidfile for writing"); } signal(SIGHUP, sighup); signal(SIGINT, fatsignal); signal(SIGKILL, fatsignal); signal(SIGPIPE, SIG_IGN); signal(SIGTERM, fatsignal); signal(SIGXCPU, fatsignal); signal(SIGXFSZ, fatsignal); signal(SIGVTALRM, fatsignal); signal(SIGPROF, fatsignal); signal(SIGUSR1, fatsignal); signal(SIGUSR2, fatsignal); if (cf.stable->sched_policy != SCHED_OTHER) { sparam.sched_priority = sched_get_priority_max(cf.stable->sched_policy); if (sched_setscheduler(0, cf.stable->sched_policy, &sparam) == -1) { rtpp_log_ewrite(RTPP_LOG_ERR, cf.stable->glog, "sched_setscheduler(SCHED_%s, %d)", (cf.stable->sched_policy == SCHED_FIFO) ? "FIFO" : "RR", sparam.sched_priority); } } if (cf.stable->run_uname != NULL || cf.stable->run_gname != NULL) { if (drop_privileges(&cf) != 0) { rtpp_log_ewrite(RTPP_LOG_ERR, cf.stable->glog, "can't switch to requested user/group"); exit(1); } } set_rlimits(&cf); cf.sessinfo.sessions[0] = NULL; cf.sessinfo.nsessions = 0; cf.rtp_nsessions = 0; rtpp_command_async_init(&cf); rtpp_proc_async_init(&cf); counter = 0; recfilter_init(&loop_error, 0.96, 0.0, 0); PFD_init(&phase_detector, 2.0); #ifdef HAVE_SYSTEMD_SD_DAEMON_H sd_notify(0, "READY=1"); #endif #ifdef RTPP_CHECK_LEAKS rtpp_memdeb_setbaseln(); #endif for (;;) { eptime = getdtime(); clk = (eptime + cf.stable->sched_offset) * cf.stable->target_pfreq; ncycles_ref = llrint(clk); eval = PFD_get_error(&phase_detector, clk); #if RTPP_DEBUG filter_lastval = loop_error.lastval; #endif if (eval != 0.0) { recfilter_apply(&loop_error, sigmoid(eval)); } #if RTPP_DEBUG if (counter % (unsigned int)cf.stable->target_pfreq == 0 || counter < 1000) { rtpp_log_write(RTPP_LOG_DBUG, cf.stable->glog, "run %lld ncycles %f raw error1 %f, filter lastval %f, filter nextval %f", counter, clk, eval, filter_lastval, loop_error.lastval); } #endif add_delay = freqoff_to_period(cf.stable->target_pfreq, 1.0, loop_error.lastval); usleep_time = add_delay * 1000000.0; #if RTPP_DEBUG if (counter % (unsigned int)cf.stable->target_pfreq == 0 || counter < 1000) { rtpp_log_write(RTPP_LOG_DBUG, cf.stable->glog, "run %lld filter lastval %f, filter nextval %f, error %f", counter, filter_lastval, loop_error.lastval, sigmoid(eval)); rtpp_log_write(RTPP_LOG_DBUG, cf.stable->glog, "run %lld extra sleeping time %llu", counter, usleep_time); } sleep_time = getdtime(); #endif rtpp_proc_async_wakeup(cf.stable->rtpp_proc_cf, counter, ncycles_ref); usleep(usleep_time); #if RTPP_DEBUG sleep_time = getdtime() - sleep_time; if (counter % (unsigned int)cf.stable->target_pfreq == 0 || counter < 1000 || sleep_time > add_delay * 2.0) { rtpp_log_write(RTPP_LOG_DBUG, cf.stable->glog, "run %lld sleeping time required %llu sleeping time actual %f, CSV: %f,%f,%f", \ counter, usleep_time, sleep_time, (double)counter / cf.stable->target_pfreq, ((double)usleep_time) / 1000.0, sleep_time * 1000.0); } #endif counter += 1; if (cf.stable->slowshutdown != 0) { pthread_mutex_lock(&cf.sessinfo.lock); if (cf.sessinfo.nsessions == 0) { /* The below unlock is not necessary, but does not hurt either */ pthread_mutex_unlock(&cf.sessinfo.lock); rtpp_log_write(RTPP_LOG_INFO, cf.stable->glog, "deorbiting-burn sequence completed, exiting"); break; } pthread_mutex_unlock(&cf.sessinfo.lock); } } #ifdef HAVE_SYSTEMD_SD_DAEMON_H sd_notify(0, "STATUS=Exited"); #endif #ifdef RTPP_CHECK_LEAKS exit(rtpp_memdeb_dumpstats(&cf) == 0 ? 0 : 1); #else exit(0); #endif }
bool LogisticRegression::train(LabelledRegressionData trainingData){ const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumInputDimensions(); const unsigned int K = trainingData.getNumTargetDimensions(); trained = false; trainingResults.clear(); if( M == 0 ){ errorLog << "train(LabelledRegressionData trainingData) - Training data has zero samples!" << endl; return false; } if( K == 0 ){ errorLog << "train(LabelledRegressionData trainingData) - The number of target dimensions is not 1!" << endl; return false; } numFeatures = N; numOutputDimensions = 1; //Logistic Regression will have 1 output inputVectorRanges.clear(); targetVectorRanges.clear(); //Scale the training and validation data, if needed if( useScaling ){ //Find the ranges for the input data inputVectorRanges = trainingData.getInputRanges(); //Find the ranges for the target data targetVectorRanges = trainingData.getTargetRanges(); //Scale the training data trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0); } //Reset the weights Random rand; w0 = rand.getRandomNumberUniform(-0.1,0.1); w.resize(N); for(UINT j=0; j<N; j++){ w[j] = rand.getRandomNumberUniform(-0.1,0.1); } double error = 0; double lastSquaredError = 0; double delta = 0; UINT iter = 0; bool keepTraining = true; Random random; vector< UINT > randomTrainingOrder(M); TrainingResult result; trainingResults.reserve(M); //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent totalSquaredTrainingError = 0; for(UINT m=0; m<M; m++){ //Select the random sample UINT i = randomTrainingOrder[m]; //Compute the error, given the current weights VectorDouble x = trainingData[i].getInputVector(); VectorDouble y = trainingData[i].getTargetVector(); double h = w0; for(UINT j=0; j<N; j++){ h += x[j] * w[j]; } error = y[0] - sigmoid( h ); totalSquaredTrainingError += SQR(error); //Update the weights for(UINT j=0; j<N; j++){ w[j] += learningRate * error * x[j]; } w0 += learningRate * error; } //Compute the error delta = fabs( totalSquaredTrainingError-lastSquaredError ); lastSquaredError = totalSquaredTrainingError; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumIterations ){ keepTraining = false; } if( isinf( totalSquaredTrainingError ) || isnan( totalSquaredTrainingError ) ){ errorLog << "train(LabelledRegressionData &trainingData) - Training failed! Total squared error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << endl; return false; } //Store the training results rootMeanSquaredTrainingError = sqrt( totalSquaredTrainingError / double(M) ); result.setRegressionResult(iter,totalSquaredTrainingError,rootMeanSquaredTrainingError); trainingResults.push_back( result ); //Notify any observers of the new training data trainingResultsObserverManager.notifyObservers( result ); trainingLog << "Epoch: " << iter << " SSE: " << totalSquaredTrainingError << " Delta: " << delta << endl; } //Flag that the algorithm has been trained regressionData.resize(1,0); trained = true; return trained; }
/** * @function addFootstep * @brief Need more explanation? */ void ZMPWalkGenerator::addFootstep(const Footprint& fp) { // initialize our timer GaitTimer timer; timer.single_support_time = min_single_support_time; timer.double_support_time = min_double_support_time; timer.startup_time = walk_startup_time; timer.startup_time = walk_shutdown_time; // grab the initial position of the zmp const ZMPReferenceContext start_context = getLastRef(); // figure out the stances for this movement stance_t double_stance = fp.is_left ? DOUBLE_RIGHT : DOUBLE_LEFT; stance_t single_stance = fp.is_left ? SINGLE_RIGHT : SINGLE_LEFT; if (step_height == 0) { single_stance = double_stance; } // figure out swing foot and stance foot for accessing int swing_foot = fp.is_left ? 0 : 1; int stance_foot = 1-swing_foot; // figure out where our body is going to end up if we put our foot there quat body_rot_end = quat::slerp(start_context.feet[swing_foot].rotation(), fp.transform.rotation(), 0.5); // figure out the start and end positions of the zmp vec3 zmp_end = start_context.feet[stance_foot] * vec3(zmpoff_x, zmpoff_y, 0); vec3 zmp_start = vec3(start_context.pX, start_context.pY, 0); // figure out how far the swing foot will be moving double dist = (fp.transform.translation() - start_context.feet[swing_foot].translation()).norm(); // turns out that extracting plane angles from 3d rotations is a bit annoying. oh well. vec3 rotation_intermediate = fp.transform.rotFwd() * vec3(1.0, 0.0, 0.0) + start_context.feet[swing_foot].rotFwd() * vec3(1.0, 0.0, 0.0); double dist_theta = atan2(rotation_intermediate.y(), rotation_intermediate.x()); // hooray for bad code! // figure out how long we spend in each stage // TODO: // dist for double support should be distance ZMP moves // no theta or step height needed for double support // // dist for single support should be distance SWING FOOT moves // that also includes change in theta, and step height size_t double_ticks = timer.compute_double(dist); size_t single_ticks = timer.compute_single(dist, dist_theta, step_height); //size_t double_ticks = TRAJ_FREQ_HZ * min_double_support_time; //size_t single_ticks = TRAJ_FREQ_HZ * min_single_support_time; for (size_t i = 0; i < double_ticks; i++) { // sigmoidally interopolate things like desired ZMP and body // rotation. we run the sigmoid across both double and isngle // support so we don't try to whip the body across for the // split-second we're in double support. double u = double(i) / double(double_ticks - 1); double c = sigmoid(u); double uu = double(i) / double(double_ticks + single_ticks - 1); double cc = sigmoid(uu); ZMPReferenceContext cur_context = start_context; cur_context.stance = double_stance; vec3 cur_zmp = zmp_start + (zmp_end - zmp_start) * c; cur_context.pX = cur_zmp.x(); cur_context.pY = cur_zmp.y(); // ANA'S COMMENT //cur_context.state.body_rot = quat::slerp(start_context.state.body_rot, // body_rot_end, cc); // END ANA'S COMMENT ref.push_back(cur_context); } double swing_foot_traj[single_ticks][3]; double swing_foot_angle[single_ticks]; // note: i'm not using the swing_foot_angle stuff cause it seemed to not work swing2Cycloids(start_context.feet[swing_foot].translation().x(), start_context.feet[swing_foot].translation().y(), start_context.feet[swing_foot].translation().z(), fp.transform.translation().x(), fp.transform.translation().y(), fp.transform.translation().z(), single_ticks, fp.is_left, step_height, swing_foot_traj, swing_foot_angle); quat foot_start_rot = start_context.feet[swing_foot].rotation(); quat foot_end_rot = fp.transform.rotation(); // we want to have a small deadband at the front and end when we rotate the foot around // so it has no rotational velocity during takeoff and landing size_t rot_dead_ticks = 0.1 * TRAJ_FREQ_HZ; if (single_ticks < 4*rot_dead_ticks) { rot_dead_ticks = single_ticks / 4; } assert(single_ticks > rot_dead_ticks * 2); size_t central_ticks = single_ticks - 2*rot_dead_ticks; for (size_t i = 0; i < single_ticks; i++) { double uu = double(i + double_ticks) / double(double_ticks + single_ticks - 1); double cc = sigmoid(uu); double ru; if (i < rot_dead_ticks) { ru = 0; } else if (i < rot_dead_ticks + central_ticks) { ru = double(i - rot_dead_ticks) / double(central_ticks - 1); } else { ru = 1; } ref.push_back(getLastRef()); ZMPReferenceContext& cur_context = ref.back(); cur_context.stance = single_stance; // ANA'S C OMMENT // cur_context.state.body_rot = quat::slerp(start_context.state.body_rot, // body_rot_end, cc); // END ANA'S COMMENT cur_context.feet[swing_foot].setRotation(quat::slerp(foot_start_rot, foot_end_rot, ru)); cur_context.feet[swing_foot].setTranslation(vec3(swing_foot_traj[i][0], swing_foot_traj[i][1], swing_foot_traj[i][2])); } // finally, update the first step variable if necessary if (first_step_index == size_t(-1)) { // we haven't taken a first step yet first_step_index = ref.size() - 1; } }
double Perceptron::diffSigmoid(double lambda, double y) { return sigmoid(lambda,y)*(1-sigmoid(lambda,y)); }
// =============================================== Back propagation. void threaded_run_bp(ThreadInfo *info, float black_moku, Stone next_player, int end_ply, BOOL board_on_child, BlockOffset child_offset, TreeBlock *b) { TreeHandle *s = info->s; TreePool *p = &s->p; // Dynkomi is adjusted if the win rate is too high. float komi = s->common_params->komi + s->common_variants->dynkomi; float black_count_playout = sigmoid(black_moku - komi); float black_count; // If there is network that predicts moku, then we should combine the results. if (b->has_score && s->params.use_cnn_final_score && end_ply >= s->params.min_ply_to_use_cnn_final_score) { // Final score = final_mixture_ratio * win_rate_prediction + (1.0 - final_mixture_ratio) * playout_result. float cnn_final_playout = sigmoid(b->score - komi); black_count = s->params.final_mixture_ratio * cnn_final_playout + (1 - s->params.final_mixture_ratio) * black_count_playout; } else { black_count = black_count_playout; } // Rave moves encoded in the board. int rave_moves[BOUND_COORD]; if (s->params.use_rave) memset(rave_moves, 0, sizeof(rave_moves)); TreeBlock *curr; BlockOffset curr_offset; if (board_on_child) { curr = b; curr_offset = child_offset; } else { curr = b->parent; curr_offset = b->parent_offset; } // Backprop from b. while (curr != NULL) { Stat* stat = &curr->data.stats[curr_offset]; // Add total first, otherwise the winning rate might go over 1. __sync_add_and_fetch(&stat->total, 1); inc_atomic_float(&stat->black_win, black_count); // stat->total += 1; // stat->black_win += black_count; // __sync_add_and_fetch(&stat->black_win, black_count); // Then update rave, if rave mode is open if (s->params.use_rave) { // Update rave moves. rave_moves[curr->data.moves[curr_offset]] = 1; // Loop over existing moves. for (int i = 0; i < curr->n; ++i) { Coord m = curr->data.moves[i]; if (rave_moves[m]) { Stat *rave_stat = &curr->data.rave_stats[i]; __sync_add_and_fetch(&rave_stat->total, 1); inc_atomic_float(&rave_stat->black_win, black_count); // __sync_add_and_fetch(&rave_stat->black_win, black_count); } } } curr_offset = curr->parent_offset; curr = curr->parent; } if (s->params.use_online_model) { // Update the online model. Stone player = (board_on_child ? OPPONENT(next_player) : next_player); update_online_model(info, player, b); } }
static void update_online_model(ThreadInfo *info, Stone player, TreeBlock *b) { TreeHandle *s = info->s; TreePool *p = &s->p; if (b == NULL) error("update_online_model: input b cannot be NULL!"); pthread_mutex_lock(&s->mutex_online_model); // Backprop from b. // Note that for any new tree block, its first opp_preds will be update here. (So a policy will never // avoid a nonleaf child just because its opp_preds is "empty"). for (;b != p->root; b = b->parent, player = OPPONENT(player)) { TreeBlock * parent = b->parent; BlockOffset parent_offset = b->parent_offset; const Stat* stat = &b->parent->data.stats[parent_offset]; if (b->extra == NULL) continue; // Compute online prediction. PRINT_DEBUG("In update_online_model, compute online prediction..\n"); float pred = s->model_bias; for (int i = 0; i < BOARD_SIZE * BOARD_SIZE; i ++) { pred += s->model_weights[i] * b->extra[i]; } pred = sigmoid(pred); // For the parent, it is the win rate from opponent point of view. b->parent->data.opp_preds[parent_offset] = pred; PRINT_DEBUG("In update_online_model, finish computing online prediction..\n"); BOOL update_model = FALSE; float weight = 0.0; float target = -1.0; if (s->params.life_and_death_mode) { // if the current b/w number has zero, then it is an end state and we should update the model, otherwise pass. const ProveNumber *pn = &b->parent->cnn_data.ps[parent_offset]; if (pn == NULL) error("update_online_model: pn cannot be NULL!"); PRINT_DEBUG("In update_online_model with life_and_death_mode is on. Before we set target..\n"); if ((pn->b == 0 && player == S_BLACK) || (pn->w == 0 && player == S_WHITE)) target = 1.0; if ((pn->b == 0 && player == S_WHITE) || (pn->w == 0 && player == S_BLACK)) target = 0.0; if (target > 0) update_model = TRUE; weight = 10.0; } else { // Normal mode. // Get actual win rate. float win_rate = ((float)stat->black_win) / stat->total; if (player == S_WHITE) win_rate = 1 - win_rate; update_model = stat->total > 30; target = win_rate; weight = min(stat->total, 1000); } // One must make sure that if update_model is TRUE, target must be meaningful. PRINT_DEBUG("In update_online_model, accumulate the error ...\n"); float err = target - pred; if (target >= 0) { // Mean average. s->model_acc_err += fabs(err); s->model_count_err ++; } // Gradient update. if (target >= 0 && update_model) { PRINT_DEBUG("In update_online_model, update the model ..."); // Update the weights as well. // y = f(w . x + b) // objective: min ||y - f(w.x+b)||^2, // Note that: err = y - f(w.x+b), f' = f * (1 - f) // grad w_i = - err * f(1-f) * x_i // grad b = - err * f(1-f) float alpha = err * pred * (1 - pred) * weight * s->params.online_model_alpha; for (int i = 0; i < BOARD_SIZE * BOARD_SIZE; ++i) { // Add negative gradient. s->model_weights[i] += alpha * b->extra[i]; } s->model_bias += alpha; } } pthread_mutex_unlock(&s->mutex_online_model); }
void LogisticRegressionByNonlinearConjugateGradient::train() { double fval = 0; /* Minimize the cross-entropy error function defined by * E (W) = -ln p (T|w1,w2,...,wK) / nSample * Gradient: G = X * (V - Y) / nSample */ // W = repmat(zeros(nFea, 1), new int[]{1, K}); DenseMatrix& W = *new DenseMatrix(nFeature + 1, nClass, 0); Matrix& A = *new DenseMatrix(nExample, nClass, 0); // A = X.transpose().multiply(W); computeActivation(A, *X, W); Matrix& V = sigmoid(A); Matrix& G = W.copy(); // G = X.multiply(V.subtract(Y)).scalarMultiply(1.0 / nSample); Matrix& VMinusY = *new DenseMatrix(nExample, nClass, 0); minus(VMinusY, V, *Y); // mtimes(G, XT, VMinusY); computeGradient(G, *X, VMinusY); timesAssign(G, 1.0 / nExample); // fval = -sum(sum(times(Y, log(plus(V, eps))))).getEntry(0, 0) / nSample; Matrix& YLogV = *new DenseMatrix(nExample, nClass); Matrix& VPlusEps = *new DenseMatrix(nExample, nClass); Matrix& LogV = *new DenseMatrix(nExample, nClass); plus(VPlusEps, V, eps); log(LogV, VPlusEps); times(YLogV, *Y, LogV); fval = -sum(sum(YLogV)) / nExample; bool* flags = new bool[2]; while (true) { NonlinearConjugateGradient::run(G, fval, epsilon, W, flags); /*disp("W:"); disp(W);*/ if (flags[0]) break; // A = X.transpose().multiply(W); computeActivation(A, *X, W); /*disp("A:"); disp(A);*/ // V = sigmoid(A); sigmoid(V, A); // fval = -sum(sum(times(Y, log(plus(V, eps))))).getEntry(0, 0) / nSample; plus(VPlusEps, V, eps); /*disp("V:"); disp(V);*/ log(LogV, VPlusEps); times(YLogV, *Y, LogV); /*disp("YLogV:"); disp(YLogV);*/ fval = -sum(sum(YLogV)) / nExample; // fprintf("fval: %.4f\n", fval); if (flags[1]) { // G = rdivide(X.multiply(V.subtract(Y)), nSample); minus(VMinusY, V, *Y); // mtimes(G, XT, VMinusY); computeGradient(G, *X, VMinusY); timesAssign(G, 1.0 / nExample); } } double** WData = W.getData(); double** thisWData = new double*[nFeature]; for (int feaIdx = 0; feaIdx < nFeature; feaIdx++) { thisWData[feaIdx] = WData[feaIdx]; } this->W = new DenseMatrix(thisWData, nFeature, nClass); b = WData[nFeature]; // delete &W; delete &A; delete &V; delete &G; delete &VMinusY; delete &YLogV; delete &VPlusEps; delete &LogV; delete[] flags; }