int Transform:: AutoScaleData() { double min=stat.minValue(); addScalar(-1*min); double max=stat.maxValue(); if (max>0.0) ScaleData(1/max); return 0; }
void LogisticRegression::Test() { cout << "Test ......." << endl; ifstream fin(this->str_test_file_.c_str()); if(!fin) { cerr << "Can not read test data file !" << endl; exit(0); } string line; while(getline(fin, line)) { vector<double> vd_data = split2double(line, "\t"); int label = (int)vd_data.back(); vd_data.pop_back(); this->vec_test_label_.push_back(label); this->vec_test_data_.push_back(vd_data); } fin.close(); ScaleData(this->vec_test_data_); //test int i_error_count = 0; for(int i = 0; i < this->vec_test_data_.size(); i++) { double dou_prob = Sigmoid(this->vec_test_data_[i]); int i_ret_label = dou_prob > 0.5 ? 1 : 0; if(i_ret_label != this->vec_test_label_[i]) { i_error_count ++; } } cout << "the error rate of this test is " << 1.0 * i_error_count / this->vec_test_data_.size() << endl; }
void LogisticRegression::TrainModel() { cout << "Train ...." << endl; //load data string line; ifstream fin(this->str_train_data_file_.c_str()); if(!fin) { cerr << "Can not read train data file !" << endl; exit(0); } while(getline(fin, line)) { vector<double> vd_data = split2double(line, "\t"); int label = (int)vd_data.back(); vd_data.pop_back(); this->vec_train_label_.push_back(label); this->vec_train_data_.push_back(vd_data); } fin.close(); //data scale InitScale(this->vec_train_data_); ScaleData(this->vec_train_data_); //train size_t i_round = 0; while(i_round < this->i_max_round) { cout << "Rount [" << i_round << "]....." << endl; vector<double> douVec_gradient(this->vec_train_data_[0].size(), 0); double dou_bias_gradient = 0; double dou_cost = 0; for(int i = 0; i < this->vec_train_data_.size(); i++) { double dou_h = Sigmoid(this->vec_train_data_[i]); //calculate cost for each data dou_cost += (this->vec_train_label_[i]*log(dou_h) + (1-this->vec_train_label_[i])*log(1-dou_h)); //calculate gradient for each data for(int j = 0; j < douVec_gradient.size(); j++) { douVec_gradient[j] += (this->vec_train_label_[i] - dou_h)*this->vec_train_data_[i][j]; } dou_bias_gradient += (this->vec_train_label_[i] - dou_h); } //L2 regularization double dou_regularization = 0; for(int i = 0; i < this->douVec_weights_.size(); i++) { dou_regularization += pow(this->douVec_weights_[i], 2); } //final cost dou_cost = -1.0 * dou_cost / this->douVec_weights_.size() + this->dou_lambda_*dou_regularization/(2*this->douVec_weights_.size()); cout << "Cost J = " << dou_cost << endl; //update string str_new_weights("new weights is [ "); string str_gradient("gradient is ["); for(int k = 0; k < douVec_gradient.size(); k++) { this->douVec_weights_[k] += this->dou_step_*(douVec_gradient[k]+this->dou_lambda_*this->douVec_weights_[k])/this->vec_train_data_.size(); str_new_weights = str_new_weights + double2string(this->douVec_weights_[k]) + " "; str_gradient = str_gradient + double2string(douVec_gradient[k]) + " "; } this->dou_bias_ += this->dou_step_*dou_bias_gradient/this->vec_train_data_.size(); str_new_weights = str_new_weights + double2string(this->dou_bias_) + "]"; str_gradient = str_gradient + double2string(dou_bias_gradient) + "]"; cout << str_new_weights << endl; cout << str_gradient << endl; i_round += 1; } //save model cout << "save model ..." << endl; ofstream fout("./logreg_model.txt"); cout << this->douVec_weights_.size() << endl; for(int i = 0; i < this->douVec_weights_.size(); i++) { fout << this->douVec_weights_[i] << " "; } fout << this->dou_bias_ << endl; fout.close(); }