real CRelation::SeqTrainRelation() { if (know_lock == true) return 0; know_lock = true; real b_result = 0, b_false_result = 0; real curr_lr = NN::learning_rate; int total_update_cnt = (int)(1.0 / Opt::know_update_per_progress); //NN::learning_rate = Opt::init_learning_rate * (1 - update_know_cnt / (real)total_update_cnt); NN::learning_rate = Opt::init_learning_rate; if (NN::learning_rate < eps) { NN::learning_rate = curr_lr; know_lock = false; return 0; } for (int i = 0; i < list.size(); ++i) { b_result += ComputeLoss(list[i].w1, list[i].w2, list[i].r); b_false_result += ComputeLoss(SampleWordIdx(), list[i].w2, list[i].r); } srand(time(NULL)); std::vector<int> seqs; for (int i = 0; i < list.size(); ++i) seqs.push_back(i); for (int j = 0; j < Opt::know_iter; ++j) { std::random_shuffle(seqs.begin(), seqs.end()); for (int i = 0; i < list.size(); ++i) TrainRelatTriple(list[seqs[i]].w1, true, list[seqs[i]].r, list[seqs[i]].w2); } NN::learning_rate = curr_lr; update_know_cnt++; real a_result = 0, a_false_result = 0; if (rand() / (RAND_MAX + 1.0) < 0.25) { for (int i = 0; i < list.size(); ++i) { a_result += ComputeLoss(list[i].w1, list[i].w2, list[i].r); a_false_result += ComputeLoss(SampleWordIdx(), list[i].w2, list[i].r); } std::cout << "\nid: " << std::this_thread::get_id(); printf(" %dth update, %.5f", update_know_cnt + 1, NN::learning_rate); printf("\nBefore loss: %.5f\tAfter loss: %.5f\t", b_result / list.size(), a_result / list.size()); printf("Before cha: %.5f\tAfter cha: %.5f\n", (b_false_result - b_result) / list.size(), (-a_result + a_false_result) / list.size()); } know_lock = false; return Opt::lambda * a_result; }
bool SplitEvaluatorMLClass<Sample, TAppContext>::CalculateSpecificLossAndThreshold(DataSet<Sample, LabelMLClass>& dataset, std::vector<std::pair<double, int> > responses, std::pair<double, double>& score_and_threshold) { // In: samples, sorted responses, out:loss-value+threshold // 1) Calculate random thresholds and sort them double min_response = responses[0].first; double max_response = responses[responses.size()-1].first; double d = (max_response - min_response); vector<double> random_thresholds(m_appcontext->num_node_thresholds, 0.0); for (int i = 0; i < random_thresholds.size(); i++) random_thresholds[i] = (randDouble() * d) + min_response; sort(random_thresholds.begin(), random_thresholds.end()); // Declare and init some variables vector<double> RClassWeights(m_appcontext->num_classes, 0.0); vector<double> LClassWeights(m_appcontext->num_classes, 0.0); vector<int> RSamples; vector<int> LSamples; double RTotalWeight = 0.0; double LTotalWeight = 0.0; double margin = 0.0; double RLoss = 0.0, LLoss = 0.0; double BestLoss = 1e16, CombinedLoss = 0.0, TotalWeight = 0.0, BestThreshold = 0.0; bool found = false; // First, put everything in the right node RSamples.resize(responses.size()); for (int r = 0; r < responses.size(); r++) { int labelIdx = dataset[responses[r].second]->m_label.class_label; double sample_w = dataset[responses[r].second]->m_label.class_weight; RClassWeights[labelIdx] += sample_w; RTotalWeight += sample_w; RSamples[r] = responses[r].second; } // Now, iterate all responses and calculate Gini indices at the cutoff points (thresholds) int th_idx = 0; bool stop_search = false; for (int r = 0; r < responses.size(); r++) { // if the current sample is smaller than the current threshold put it to the left side if (responses[r].first <= random_thresholds[th_idx]) { int labelIdx = dataset[responses[r].second]->m_label.class_label; double cur_sample_weight = dataset[responses[r].second]->m_label.class_weight; RClassWeights[labelIdx] -= cur_sample_weight; if (RClassWeights[labelIdx] < 0.0) RClassWeights[labelIdx] = 0.0; LClassWeights[labelIdx] += cur_sample_weight; RTotalWeight -= cur_sample_weight; if (RTotalWeight < 0.0) RTotalWeight = 0.0; LTotalWeight += cur_sample_weight; LSamples.push_back(RSamples[0]); RSamples.erase(RSamples.begin()); } else { // ok, now we found the first sample having higher response than the current threshold // Reset the losses RLoss = 0.0, LLoss = 0.0; // calculate loss for left and right child nodes // RIGHT vector<double> pR(RClassWeights.size()); for (int ci = 0; ci < RClassWeights.size(); ci++) pR[ci] = RClassWeights[ci] / RTotalWeight; for (int ci = 0; ci < RClassWeights.size(); ci++) RLoss += RClassWeights[ci] * ComputeLoss(pR, ci, m_appcontext->global_loss_classification); // LEFT vector<double> pL(LClassWeights.size()); for (int ci = 0; ci < LClassWeights.size(); ci++) pL[ci] = LClassWeights[ci] / LTotalWeight; for (int ci = 0; ci < LClassWeights.size(); ci++) LLoss += LClassWeights[ci] * ComputeLoss(pL, ci, m_appcontext->global_loss_classification); // Total loss CombinedLoss = LLoss + RLoss; // best-search ... if (CombinedLoss < BestLoss && LTotalWeight > 0.0 && RTotalWeight > 0.0) { BestLoss = CombinedLoss; BestThreshold = random_thresholds[th_idx]; found = true; } // next, we have to find the next random threshold that is larger than the current response // -> there might be several threshold within the gap between the last response and this one. while (responses[r].first > random_thresholds[th_idx]) { if (th_idx < (random_thresholds.size()-1)) { th_idx++; r--; } else { stop_search = true; break; // all thresholds tested } } // now, we can go on with the next response ... } if (stop_search) break; } score_and_threshold.first = BestLoss; score_and_threshold.second = BestThreshold; return found; }