コード例 #1
0
ファイル: main.c プロジェクト: yzy5768/rtpproxy
int
main(int argc, char **argv)
{
    int i, len;
    double eval, clk;
    long long ncycles_ref, counter;
    double eptime;
    double add_delay;
    struct cfg cf;
    char buf[256];
    struct recfilter loop_error;
    struct PFD phase_detector;
    useconds_t usleep_time;
    struct sched_param sparam;
#if RTPP_DEBUG
    double sleep_time, filter_lastval;
#endif
    memset(&cf, 0, sizeof(cf));

    cf.stable = malloc(sizeof(struct rtpp_cfg_stable));
    if (cf.stable == NULL) {
         err(1, "can't allocate memory for the struct rtpp_cfg_stable");
         /* NOTREACHED */
    }
    memset(cf.stable, '\0', sizeof(struct rtpp_cfg_stable));
    cf.stable->ctrl_socks = malloc(sizeof(struct rtpp_list));
    if (cf.stable->ctrl_socks == NULL) {
         err(1, "can't allocate memory for the struct rtpp_cfg_stable");
         /* NOTREACHED */
    }
    memset(cf.stable->ctrl_socks, '\0', sizeof(struct rtpp_list));
    RTPP_LIST_RESET(cf.stable->ctrl_socks);    

    init_config(&cf, argc, argv);

    seedrandom();

    cf.stable->sessions_ht = rtpp_hash_table_ctor();
    if (cf.stable->sessions_ht == NULL) {
        err(1, "can't allocate memory for the hash table");
         /* NOTREACHED */
    }
    cf.stable->rtpp_stats = rtpp_stats_ctor();
    if (cf.stable->rtpp_stats == NULL) {
        err(1, "can't allocate memory for the stats data");
         /* NOTREACHED */
    }
    init_port_table(&cf);

    if (rtpp_controlfd_init(&cf) != 0) {
        err(1, "can't inilialize control socket%s",
          cf.stable->ctrl_socks->len > 1 ? "s" : "");
    }

    if (cf.stable->nodaemon == 0) {
	if (rtpp_daemon(0, 0) == -1)
	    err(1, "can't switch into daemon mode");
	    /* NOTREACHED */
    }

    if (rtpp_notify_init() != 0)
        errx(1, "can't start notification thread");

    cf.stable->glog = rtpp_log_open(cf.stable, "rtpproxy", NULL, LF_REOPEN);
    rtpp_log_setlevel(cf.stable->glog, cf.stable->log_level);
    _sig_cf = &cf;
    atexit(ehandler);
    rtpp_log_write(RTPP_LOG_INFO, cf.stable->glog, "rtpproxy started, pid %d", getpid());

    i = open(cf.stable->pid_file, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE);
    if (i >= 0) {
	len = sprintf(buf, "%u\n", (unsigned int)getpid());
	write(i, buf, len);
	close(i);
    } else {
	rtpp_log_ewrite(RTPP_LOG_ERR, cf.stable->glog, "can't open pidfile for writing");
    }

    signal(SIGHUP, sighup);
    signal(SIGINT, fatsignal);
    signal(SIGKILL, fatsignal);
    signal(SIGPIPE, SIG_IGN);
    signal(SIGTERM, fatsignal);
    signal(SIGXCPU, fatsignal);
    signal(SIGXFSZ, fatsignal);
    signal(SIGVTALRM, fatsignal);
    signal(SIGPROF, fatsignal);
    signal(SIGUSR1, fatsignal);
    signal(SIGUSR2, fatsignal);

    if (cf.stable->sched_policy != SCHED_OTHER) {
        sparam.sched_priority = sched_get_priority_max(cf.stable->sched_policy);
        if (sched_setscheduler(0, cf.stable->sched_policy, &sparam) == -1) {
            rtpp_log_ewrite(RTPP_LOG_ERR, cf.stable->glog, "sched_setscheduler(SCHED_%s, %d)",
              (cf.stable->sched_policy == SCHED_FIFO) ? "FIFO" : "RR", sparam.sched_priority);
        }
    }

    if (cf.stable->run_uname != NULL || cf.stable->run_gname != NULL) {
	if (drop_privileges(&cf) != 0) {
	    rtpp_log_ewrite(RTPP_LOG_ERR, cf.stable->glog,
	      "can't switch to requested user/group");
	    exit(1);
	}
    }
    set_rlimits(&cf);

    cf.sessinfo.sessions[0] = NULL;
    cf.sessinfo.nsessions = 0;
    cf.rtp_nsessions = 0;

    rtpp_command_async_init(&cf);
    rtpp_proc_async_init(&cf);

    counter = 0;
    recfilter_init(&loop_error, 0.96, 0.0, 0);
    PFD_init(&phase_detector, 2.0);
#ifdef HAVE_SYSTEMD_SD_DAEMON_H
    sd_notify(0, "READY=1");
#endif
#ifdef RTPP_CHECK_LEAKS
    rtpp_memdeb_setbaseln();
#endif
    for (;;) {
	eptime = getdtime();

        clk = (eptime + cf.stable->sched_offset) * cf.stable->target_pfreq;

        ncycles_ref = llrint(clk);

        eval = PFD_get_error(&phase_detector, clk);

#if RTPP_DEBUG
        filter_lastval = loop_error.lastval;
#endif

        if (eval != 0.0) {
            recfilter_apply(&loop_error, sigmoid(eval));
        }

#if RTPP_DEBUG
        if (counter % (unsigned int)cf.stable->target_pfreq == 0 || counter < 1000) {
          rtpp_log_write(RTPP_LOG_DBUG, cf.stable->glog, "run %lld ncycles %f raw error1 %f, filter lastval %f, filter nextval %f",
            counter, clk, eval, filter_lastval, loop_error.lastval);
        }
#endif
        add_delay = freqoff_to_period(cf.stable->target_pfreq, 1.0, loop_error.lastval);
        usleep_time = add_delay * 1000000.0;
#if RTPP_DEBUG
        if (counter % (unsigned int)cf.stable->target_pfreq == 0 || counter < 1000) {
            rtpp_log_write(RTPP_LOG_DBUG, cf.stable->glog, "run %lld filter lastval %f, filter nextval %f, error %f",
              counter, filter_lastval, loop_error.lastval, sigmoid(eval));
            rtpp_log_write(RTPP_LOG_DBUG, cf.stable->glog, "run %lld extra sleeping time %llu", counter, usleep_time);
        }
        sleep_time = getdtime();
#endif
        rtpp_proc_async_wakeup(cf.stable->rtpp_proc_cf, counter, ncycles_ref);
        usleep(usleep_time);
#if RTPP_DEBUG
        sleep_time = getdtime() - sleep_time;
        if (counter % (unsigned int)cf.stable->target_pfreq == 0 || counter < 1000 || sleep_time > add_delay * 2.0) {
            rtpp_log_write(RTPP_LOG_DBUG, cf.stable->glog, "run %lld sleeping time required %llu sleeping time actual %f, CSV: %f,%f,%f", \
              counter, usleep_time, sleep_time, (double)counter / cf.stable->target_pfreq, ((double)usleep_time) / 1000.0, sleep_time * 1000.0);
        }
#endif
        counter += 1;
        if (cf.stable->slowshutdown != 0) {
            pthread_mutex_lock(&cf.sessinfo.lock);
            if (cf.sessinfo.nsessions == 0) {
                /* The below unlock is not necessary, but does not hurt either */
                pthread_mutex_unlock(&cf.sessinfo.lock);
                rtpp_log_write(RTPP_LOG_INFO, cf.stable->glog,
                  "deorbiting-burn sequence completed, exiting");
                break;
            }
            pthread_mutex_unlock(&cf.sessinfo.lock);
        }
    }

#ifdef HAVE_SYSTEMD_SD_DAEMON_H
    sd_notify(0, "STATUS=Exited");
#endif

#ifdef RTPP_CHECK_LEAKS
    exit(rtpp_memdeb_dumpstats(&cf) == 0 ? 0 : 1);
#else
    exit(0);
#endif
}
コード例 #2
0
bool LogisticRegression::train(LabelledRegressionData trainingData){
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumInputDimensions();
    const unsigned int K = trainingData.getNumTargetDimensions();
    trained = false;
    trainingResults.clear();
    
    if( M == 0 ){
        errorLog << "train(LabelledRegressionData trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( K == 0 ){
        errorLog << "train(LabelledRegressionData trainingData) - The number of target dimensions is not 1!" << endl;
        return false;
    }
    
    numFeatures = N;
    numOutputDimensions = 1; //Logistic Regression will have 1 output
    inputVectorRanges.clear();
    targetVectorRanges.clear();
    
    //Scale the training and validation data, if needed
	if( useScaling ){
		//Find the ranges for the input data
        inputVectorRanges = trainingData.getInputRanges();
        
        //Find the ranges for the target data
		targetVectorRanges = trainingData.getTargetRanges();
        
		//Scale the training data
		trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
	}
    
    //Reset the weights
    Random rand;
    w0 = rand.getRandomNumberUniform(-0.1,0.1);
    w.resize(N);
    for(UINT j=0; j<N; j++){
        w[j] = rand.getRandomNumberUniform(-0.1,0.1);
    }

    double error = 0;
    double lastSquaredError = 0;
    double delta = 0;
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    vector< UINT > randomTrainingOrder(M);
    TrainingResult result;
    trainingResults.reserve(M);
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        totalSquaredTrainingError = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            VectorDouble x = trainingData[i].getInputVector();
            VectorDouble y = trainingData[i].getTargetVector();
            double h = w0;
            for(UINT j=0; j<N; j++){
                h += x[j] * w[j];
            }
            error = y[0] - sigmoid( h );
            totalSquaredTrainingError += SQR(error);
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                w[j] += learningRate * error * x[j];
            }
            w0 += learningRate * error;
        }
        
        //Compute the error
        delta = fabs( totalSquaredTrainingError-lastSquaredError );
        lastSquaredError = totalSquaredTrainingError;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumIterations ){
            keepTraining = false;
        }
        
        if( isinf( totalSquaredTrainingError ) || isnan( totalSquaredTrainingError ) ){
            errorLog << "train(LabelledRegressionData &trainingData) - Training failed! Total squared error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << endl;
            return false;
        }
        
        //Store the training results
        rootMeanSquaredTrainingError = sqrt( totalSquaredTrainingError / double(M) );
        result.setRegressionResult(iter,totalSquaredTrainingError,rootMeanSquaredTrainingError);
        trainingResults.push_back( result );
        
        //Notify any observers of the new training data
        trainingResultsObserverManager.notifyObservers( result );
        
        trainingLog << "Epoch: " << iter << " SSE: " << totalSquaredTrainingError << " Delta: " << delta << endl;
    }
    
    //Flag that the algorithm has been trained
    regressionData.resize(1,0);
    trained = true;
    return trained;
}
コード例 #3
0
/**
 * @function addFootstep
 * @brief Need more explanation?
 */
void ZMPWalkGenerator::addFootstep(const Footprint& fp) {

  // initialize our timer
  GaitTimer timer;
  timer.single_support_time = min_single_support_time;
  timer.double_support_time = min_double_support_time;
  timer.startup_time = walk_startup_time;
  timer.startup_time = walk_shutdown_time;
  
  // grab the initial position of the zmp
  const ZMPReferenceContext start_context = getLastRef();
  
  // figure out the stances for this movement
  stance_t double_stance = fp.is_left ? DOUBLE_RIGHT : DOUBLE_LEFT;
  stance_t single_stance = fp.is_left ? SINGLE_RIGHT : SINGLE_LEFT;
  if (step_height == 0) { single_stance = double_stance; }
  
  // figure out swing foot and stance foot for accessing
  int swing_foot = fp.is_left ? 0 : 1;
  int stance_foot = 1-swing_foot;
  
  // figure out where our body is going to end up if we put our foot there
  quat body_rot_end = quat::slerp(start_context.feet[swing_foot].rotation(),
				  fp.transform.rotation(),
				  0.5);
  
  // figure out the start and end positions of the zmp
  vec3 zmp_end = start_context.feet[stance_foot] * vec3(zmpoff_x, zmpoff_y, 0);
  vec3 zmp_start = vec3(start_context.pX, start_context.pY, 0);
  
  
  // figure out how far the swing foot will be moving
  double dist = (fp.transform.translation() - start_context.feet[swing_foot].translation()).norm();

  // turns out that extracting plane angles from 3d rotations is a bit annoying. oh well.
  vec3 rotation_intermediate =
    fp.transform.rotFwd() * vec3(1.0, 0.0, 0.0) +
    start_context.feet[swing_foot].rotFwd() * vec3(1.0, 0.0, 0.0);
  double dist_theta = atan2(rotation_intermediate.y(), rotation_intermediate.x()); // hooray for bad code!
  
  // figure out how long we spend in each stage
  // TODO:
  //  dist for double support should be distance ZMP moves
  //  no theta or step height needed for double support
  //
  //  dist for single support should be distance SWING FOOT moves
  //  that also includes change in theta, and step height
  size_t double_ticks = timer.compute_double(dist);
  size_t single_ticks = timer.compute_single(dist, dist_theta, step_height);
  
  //size_t double_ticks = TRAJ_FREQ_HZ * min_double_support_time;
  //size_t single_ticks = TRAJ_FREQ_HZ * min_single_support_time;
  
  for (size_t i = 0; i < double_ticks; i++) {
    // sigmoidally interopolate things like desired ZMP and body
    // rotation. we run the sigmoid across both double and isngle
    // support so we don't try to whip the body across for the
    // split-second we're in double support.
    double u = double(i) / double(double_ticks - 1);
    double c = sigmoid(u);
    
    double uu = double(i) / double(double_ticks + single_ticks - 1);
    double cc = sigmoid(uu);
    
    ZMPReferenceContext cur_context = start_context;
    cur_context.stance = double_stance;
    
    
    vec3 cur_zmp = zmp_start + (zmp_end - zmp_start) * c;
    cur_context.pX = cur_zmp.x();
    cur_context.pY = cur_zmp.y();
    // ANA'S COMMENT	
    //cur_context.state.body_rot = quat::slerp(start_context.state.body_rot, 
	//				     body_rot_end, cc);
    // END ANA'S COMMENT
    
    ref.push_back(cur_context);
  }
  
  double swing_foot_traj[single_ticks][3];
  double swing_foot_angle[single_ticks];
  
  // note: i'm not using the swing_foot_angle stuff cause it seemed to not work
  swing2Cycloids(start_context.feet[swing_foot].translation().x(),
		 start_context.feet[swing_foot].translation().y(),
		 start_context.feet[swing_foot].translation().z(),
		 fp.transform.translation().x(),
		 fp.transform.translation().y(),
		 fp.transform.translation().z(),
		 single_ticks,
		 fp.is_left,
		 step_height,
		 swing_foot_traj,
		 swing_foot_angle);
  
  
  quat foot_start_rot = start_context.feet[swing_foot].rotation();
  quat foot_end_rot = fp.transform.rotation();
  
  // we want to have a small deadband at the front and end when we rotate the foot around
  // so it has no rotational velocity during takeoff and landing
  size_t rot_dead_ticks = 0.1 * TRAJ_FREQ_HZ; 
  
  if (single_ticks < 4*rot_dead_ticks) {
    rot_dead_ticks = single_ticks / 4;
  }
  
  assert(single_ticks > rot_dead_ticks * 2);
  
  size_t central_ticks = single_ticks - 2*rot_dead_ticks;
  
  for (size_t i = 0; i < single_ticks; i++) {
    
    double uu = double(i + double_ticks) / double(double_ticks + single_ticks - 1);
    double cc = sigmoid(uu);
    
    double ru;
    
    if (i < rot_dead_ticks) {
      ru = 0;
    } else if (i < rot_dead_ticks + central_ticks) {
      ru = double(i - rot_dead_ticks) / double(central_ticks - 1);
    } else {
      ru = 1;
    }
    
    
    ref.push_back(getLastRef());
    ZMPReferenceContext& cur_context = ref.back();
    cur_context.stance = single_stance;
    // ANA'S C OMMENT 
//    cur_context.state.body_rot = quat::slerp(start_context.state.body_rot, 
//					     body_rot_end, cc);
    // END ANA'S COMMENT
    
    cur_context.feet[swing_foot].setRotation(quat::slerp(foot_start_rot, 
							 foot_end_rot,
							 ru));
    
    cur_context.feet[swing_foot].setTranslation(vec3(swing_foot_traj[i][0],
						     swing_foot_traj[i][1],
						     swing_foot_traj[i][2]));
  }
  
  // finally, update the first step variable if necessary
  if (first_step_index == size_t(-1)) { // we haven't taken a first step yet
    first_step_index = ref.size() - 1;
  }
}
コード例 #4
0
ファイル: perceptron.cpp プロジェクト: JuanCrg90/Perceptron
double Perceptron::diffSigmoid(double lambda, double y)
{
    return sigmoid(lambda,y)*(1-sigmoid(lambda,y));
}
コード例 #5
0
// =============================================== Back propagation.
void threaded_run_bp(ThreadInfo *info, float black_moku, Stone next_player, int end_ply, BOOL board_on_child, BlockOffset child_offset, TreeBlock *b) {
  TreeHandle *s = info->s;
  TreePool *p = &s->p;

  // Dynkomi is adjusted if the win rate is too high.
  float komi = s->common_params->komi + s->common_variants->dynkomi;
  float black_count_playout = sigmoid(black_moku - komi);
  float black_count;

  // If there is network that predicts moku, then we should combine the results.
  if (b->has_score && s->params.use_cnn_final_score && end_ply >= s->params.min_ply_to_use_cnn_final_score) {
    // Final score = final_mixture_ratio * win_rate_prediction + (1.0 - final_mixture_ratio) * playout_result.
    float cnn_final_playout = sigmoid(b->score - komi);
    black_count = s->params.final_mixture_ratio * cnn_final_playout + (1 - s->params.final_mixture_ratio) * black_count_playout;
  } else {
    black_count = black_count_playout;
  }

  // Rave moves encoded in the board.
  int rave_moves[BOUND_COORD];
  if (s->params.use_rave) memset(rave_moves, 0, sizeof(rave_moves));

  TreeBlock *curr;
  BlockOffset curr_offset;

  if (board_on_child) {
    curr = b;
    curr_offset = child_offset;
  } else {
    curr = b->parent;
    curr_offset = b->parent_offset;
  }

  // Backprop from b.
  while (curr != NULL) {
    Stat* stat = &curr->data.stats[curr_offset];

    // Add total first, otherwise the winning rate might go over 1.
    __sync_add_and_fetch(&stat->total, 1);
    inc_atomic_float(&stat->black_win, black_count);
    // stat->total += 1;
    // stat->black_win += black_count;
    // __sync_add_and_fetch(&stat->black_win, black_count);

    // Then update rave, if rave mode is open
    if (s->params.use_rave) {
      // Update rave moves.
      rave_moves[curr->data.moves[curr_offset]] = 1;

      // Loop over existing moves.
      for (int i = 0; i < curr->n; ++i) {
        Coord m = curr->data.moves[i];
        if (rave_moves[m]) {
          Stat *rave_stat = &curr->data.rave_stats[i];
          __sync_add_and_fetch(&rave_stat->total, 1);
          inc_atomic_float(&rave_stat->black_win, black_count);
          // __sync_add_and_fetch(&rave_stat->black_win, black_count);
        }
      }
    }

    curr_offset = curr->parent_offset;
    curr = curr->parent;
  }

  if (s->params.use_online_model) {
    // Update the online model.
    Stone player = (board_on_child ? OPPONENT(next_player) : next_player);
    update_online_model(info, player, b);
  }
}
コード例 #6
0
static void update_online_model(ThreadInfo *info, Stone player, TreeBlock *b) {
  TreeHandle *s = info->s;
  TreePool *p = &s->p;
  if (b == NULL) error("update_online_model: input b cannot be NULL!");

  pthread_mutex_lock(&s->mutex_online_model);

  // Backprop from b.
  // Note that for any new tree block, its first opp_preds will be update here. (So a policy will never
  // avoid a nonleaf child just because its opp_preds is "empty").
  for (;b != p->root; b = b->parent, player = OPPONENT(player)) {
    TreeBlock * parent = b->parent;
    BlockOffset parent_offset = b->parent_offset;
    const Stat* stat = &b->parent->data.stats[parent_offset];

    if (b->extra == NULL) continue;

    // Compute online prediction.
    PRINT_DEBUG("In update_online_model, compute online prediction..\n");
    float pred = s->model_bias;
    for (int i = 0; i < BOARD_SIZE * BOARD_SIZE; i ++) {
      pred += s->model_weights[i] * b->extra[i];
    }
    pred = sigmoid(pred);
    // For the parent, it is the win rate from opponent point of view.
    b->parent->data.opp_preds[parent_offset] = pred;

    PRINT_DEBUG("In update_online_model, finish computing online prediction..\n");

    BOOL update_model = FALSE;
    float weight = 0.0;
    float target = -1.0;
    if (s->params.life_and_death_mode) {
      // if the current b/w number has zero, then it is an end state and we should update the model, otherwise pass.
      const ProveNumber *pn = &b->parent->cnn_data.ps[parent_offset];
      if (pn == NULL) error("update_online_model: pn cannot be NULL!");
      PRINT_DEBUG("In update_online_model with life_and_death_mode is on. Before we set target..\n");
      if ((pn->b == 0 && player == S_BLACK) || (pn->w == 0 && player == S_WHITE)) target = 1.0;
      if ((pn->b == 0 && player == S_WHITE) || (pn->w == 0 && player == S_BLACK)) target = 0.0;
      if (target > 0) update_model = TRUE;
      weight = 10.0;
    } else {
      // Normal mode.
      // Get actual win rate.
      float win_rate = ((float)stat->black_win) / stat->total;
      if (player == S_WHITE) win_rate = 1 - win_rate;

      update_model = stat->total > 30;
      target = win_rate;
      weight = min(stat->total, 1000);
    }

    // One must make sure that if update_model is TRUE, target must be meaningful.
    PRINT_DEBUG("In update_online_model, accumulate the error ...\n");
    float err = target - pred;
    if (target >= 0) {
      // Mean average.
      s->model_acc_err += fabs(err);
      s->model_count_err ++;
    }

    // Gradient update.
    if (target >= 0 && update_model) {
      PRINT_DEBUG("In update_online_model, update the model ...");
      // Update the weights as well.
      // y = f(w . x + b)
      // objective:  min ||y - f(w.x+b)||^2,
      //   Note that: err = y - f(w.x+b), f' = f * (1 - f)
      // grad w_i = - err * f(1-f) * x_i
      // grad b = - err * f(1-f)
      float alpha = err * pred * (1 - pred) * weight * s->params.online_model_alpha;

      for (int i = 0; i < BOARD_SIZE * BOARD_SIZE;  ++i) {
        // Add negative gradient.
        s->model_weights[i] += alpha * b->extra[i];
      }
      s->model_bias += alpha;
    }
  }

  pthread_mutex_unlock(&s->mutex_online_model);
}
void LogisticRegressionByNonlinearConjugateGradient::train() {

	double fval = 0;

	/* Minimize the cross-entropy error function defined by
	 * E (W) = -ln p (T|w1,w2,...,wK) / nSample
	 * Gradient: G = X * (V - Y) / nSample
	 */
	// W = repmat(zeros(nFea, 1), new int[]{1, K});
	DenseMatrix& W = *new DenseMatrix(nFeature + 1, nClass, 0);
	Matrix& A = *new DenseMatrix(nExample, nClass, 0);
	// A = X.transpose().multiply(W);
	computeActivation(A, *X, W);
	Matrix& V = sigmoid(A);
	Matrix& G = W.copy();

	// G = X.multiply(V.subtract(Y)).scalarMultiply(1.0 / nSample);
	Matrix&  VMinusY = *new DenseMatrix(nExample, nClass, 0);
	minus(VMinusY, V, *Y);
	// mtimes(G, XT, VMinusY);
	computeGradient(G, *X, VMinusY);
	timesAssign(G, 1.0 / nExample);


	// fval = -sum(sum(times(Y, log(plus(V, eps))))).getEntry(0, 0) / nSample;
	Matrix& YLogV = *new DenseMatrix(nExample, nClass);
	Matrix& VPlusEps = *new DenseMatrix(nExample, nClass);
	Matrix& LogV = *new DenseMatrix(nExample, nClass);
	plus(VPlusEps, V, eps);
	log(LogV, VPlusEps);
	times(YLogV, *Y, LogV);
	fval = -sum(sum(YLogV)) / nExample;

	bool* flags = new bool[2];
	while (true) {
		NonlinearConjugateGradient::run(G, fval, epsilon, W, flags);
		/*disp("W:");
		disp(W);*/
		if (flags[0])
			break;
		// A = X.transpose().multiply(W);
		computeActivation(A, *X, W);
		/*disp("A:");
		disp(A);*/
		// V = sigmoid(A);
		sigmoid(V, A);
		// fval = -sum(sum(times(Y, log(plus(V, eps))))).getEntry(0, 0) / nSample;
		plus(VPlusEps, V, eps);
		/*disp("V:");
		disp(V);*/
		log(LogV, VPlusEps);
		times(YLogV, *Y, LogV);
		/*disp("YLogV:");
		disp(YLogV);*/
		fval = -sum(sum(YLogV)) / nExample;
		// fprintf("fval: %.4f\n", fval);
		if (flags[1]) {
			// G = rdivide(X.multiply(V.subtract(Y)), nSample);
			minus(VMinusY, V, *Y);
			// mtimes(G, XT, VMinusY);
			computeGradient(G, *X, VMinusY);
			timesAssign(G, 1.0 / nExample);
		}
	}
	double** WData = W.getData();
	double** thisWData = new double*[nFeature];
	for (int feaIdx = 0; feaIdx < nFeature; feaIdx++) {
		thisWData[feaIdx] = WData[feaIdx];
	}
	this->W = new DenseMatrix(thisWData, nFeature, nClass);
	b = WData[nFeature];

	// delete &W;
	delete &A;
	delete &V;
	delete &G;
	delete &VMinusY;
	delete &YLogV;
	delete &VPlusEps;
	delete &LogV;
	delete[] flags;

}