void LR::calc_loss(double *loss, float *acc) { double neg_log_likeli = 0.0; int err_num = 0; //对每个样本的类别计算 for (size_t i = 0; i < samp_class_vec.size(); i++) { int samp_class = samp_class_vec[i]; sparse_feat samp_feat = samp_feat_vec[i]; //获得类别i下所有的样本 vector<float> score_vec = calc_score(samp_feat); //计算该类别所有样本的分数 int pred_class = score_to_class(score_vec); //将分数映射到类别 if (pred_class != samp_class) err_num += 1; vector<float> softmax_vec = softmax(score_vec); for (int j = 0; j < class_set_size; j++) { if (j == samp_class) { double pj = softmax_vec[j]; double temp = pj < LOG_LIM ? LOG_LIM : pj; neg_log_likeli += log(temp); } } } *loss = -neg_log_likeli ; // CE(Cross Entropy) loss equals negative log-likelihood in LR *acc = 1 - (float)err_num / samp_class_vec.size(); }
void SoftmaxLayer::runForwardImplementation(Bundle& bundle) { auto& inputActivationsVector = bundle[ "inputActivations"].get<MatrixVector>(); auto& outputActivationsVector = bundle["outputActivations"].get<MatrixVector>(); assert(inputActivationsVector.size() == 1); auto inputActivations = foldTime(inputActivationsVector.back()); util::log("SoftmaxLayer") << " Running forward propagation of matrix " << inputActivations.shapeString() << "\n"; if(util::isLogEnabled("SoftmaxLayer::Detail")) { util::log("SoftmaxLayer::Detail") << " input: " << inputActivations.debugString(); } auto outputActivations = softmax(inputActivations); if(util::isLogEnabled("SoftmaxLayer::Detail")) { util::log("SoftmaxLayer::Detail") << " outputs: " << outputActivations.debugString(); } saveMatrix("outputActivations", outputActivations); outputActivationsVector.push_back(unfoldTime(outputActivations, inputActivationsVector.front().size())); }
virtual LogPRowCol log_p_row_column(shared_ptr<arma::mat> z1, shared_ptr<arma::mat> z2, const ExampleIds& example_ids) { arma::mat w = (*z1) * (*z2); arma::mat y = def_data->get_mat()->cols(arma::uvec(example_ids)); arma::mat lp(y.n_rows, y.n_cols, arma::fill::zeros); for(arma::uword j=0; j<y.n_cols; ++j) { for(arma::uword i=0; i<y.n_rows; ++i) { lp(i,j) = y(i,j) * (-softmax(-w(i,j))) + (1-y(i,j)) * (-softmax(w(i,j))); } } LogPRowCol res; res.log_p_row_train = shared_ptr<arma::rowvec>( new arma::rowvec(arma::sum(lp, 0)) ); res.log_p_col_train = shared_ptr<arma::vec>( new arma::vec(arma::sum(lp, 1)) ); return res; }
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) { int g, b; for(b = 0; b < batch; ++b){ for(g = 0; g < groups; ++g){ softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); } } }
bool small_test() { const int alphabet_size = 5; const int T = 2; std::vector<float> activations = {0.1, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.6, 0.1, 0.1}; // Calculate the score analytically float expected_score; { std::vector<float> probs(activations.size()); softmax(activations.data(), alphabet_size, T, probs.data()); // Score calculation is specific to the given activations above expected_score = probs[1] * probs[7]; } std::vector<int> labels = {1, 2}; std::vector<int> label_lengths = {2}; std::vector<int> lengths; lengths.push_back(T); float score; ctcComputeInfo info; info.loc = CTC_CPU; info.num_threads = 1; size_t cpu_alloc_bytes; throw_on_error(get_workspace_size(label_lengths.data(), lengths.data(), alphabet_size, lengths.size(), info, &cpu_alloc_bytes), "Error: get_workspace_size in small_test"); void* ctc_cpu_workspace = malloc(cpu_alloc_bytes); throw_on_error(compute_ctc_loss(activations.data(), NULL, labels.data(), label_lengths.data(), lengths.data(), alphabet_size, lengths.size(), &score, ctc_cpu_workspace, info), "Error: compute_ctc_loss in small_test"); free(ctc_cpu_workspace); score = std::exp(-score); const float eps = 1e-6; const float lb = expected_score - eps; const float ub = expected_score + eps; return (score > lb && score < ub); }
/* * Apply softmax to out, then use it as a proba distribution * to select a move in 0..8 */ void random_softmax(fann_type *out, int *x, int *y) { fann_type random_number; fann_type s; int j; softmax(out); random_number = ((fann_type) std::rand()) / ((fann_type) RAND_MAX); s = 0; for (j = 0; s < random_number; ++j) s += out[j]; *x = (j - 1) % 3; *y = (j - 1) / 3; }
void vec2output(float* output, Result& r) { //gender r.gender=softmax(output+0,2); //age r.age=0; for (int i=2;i<102;i++) r.age+=output[i]; r.age -= 1.0; // emotion; r.emotion=softmax(output+102,10); r.glasses=softmax(output+112,3); r.mask=softmax(output+115,2); // beauty r.beauty = 0; for (int i =117; i<127;i++){ r.beauty += output[i]; } r.beauty = beauty_map_linear_standards(r.beauty, r.gender == 1, r.age <= 10); // r.beauty = beauty_map_linear(r.beauty); }
void LR::update_online(int samp_class, sparse_feat &samp_feat, float learn_rate, float lambda) { vector<float> score_vec = calc_score(samp_feat); //(W'*X) vector<float> softmax_vec = softmax(score_vec); for (int i=0;i<class_set_size;i++) //对于每一个类 { float error_term=((int)(i==samp_class)-softmax_vec[i]); for (int j=0;j<samp_feat.id_vec.size();j++) //对于每个类中的每个样本,更新参数 { int feat_id=samp_feat.id_vec[j]; float feat_value=samp_feat.value_vec[j]; float delt=error_term*feat_value; //梯度 = x^i * (指示函数 - y) //float regul = lambda * omega[feat_id][i]; omega[feat_id][i]+=learn_rate*delt; //更新参数,学习率 * 梯度 } } }
static TVector<TVector<double>> CalcSoftmax(const TVector<TVector<double>>& approx, NPar::TLocalExecutor* localExecutor) { TVector<TVector<double>> probabilities = approx; const int threadCount = localExecutor->GetThreadCount() + 1; const int blockSize = (approx[0].ysize() + threadCount - 1) / threadCount; auto calcSoftmaxInBlock = [&](const int blockId) { int lastLineId = Min((blockId + 1) * blockSize, approx[0].ysize()); TVector<double> line(approx.size()); TVector<double> softmax(approx.size()); for (int lineInd = blockId * blockSize; lineInd < lastLineId; ++lineInd) { for (int dim = 0; dim < approx.ysize(); ++dim) { line[dim] = approx[dim][lineInd]; } CalcSoftmax(line, &softmax); for (int dim = 0; dim < approx.ysize(); ++dim) { probabilities[dim][lineInd] = softmax[dim]; } } }; localExecutor->ExecRange(calcSoftmaxInBlock, 0, threadCount, NPar::TLocalExecutor::WAIT_COMPLETE); return probabilities; }
SEXP vdp_softmax(SEXP matrix_M) { int dim1, dim2; double *in, *out; SEXP output, dims; /******************** input variables ********************/ in = NUMERIC_POINTER(matrix_M); dim1 = INTEGER_POINTER(GET_DIM(matrix_M))[0]; dim2 = INTEGER_POINTER(GET_DIM(matrix_M))[1]; PROTECT(dims = allocVector(INTSXP, 2)); INTEGER(dims)[0] = dim1; INTEGER(dims)[1] = dim2; /******************** output variables ********************/ PROTECT(output = NEW_NUMERIC(dim1*dim2)); SET_DIM(output, dims); out = NUMERIC_POINTER(output); softmax(dim1, dim2, in, out); UNPROTECT(2); return output; }
long detection_fprop( float* conf, //score for each class for each box, num_box * num_class * bs float* loc, //location for each box, box * 4 * bs float* res_detection, //final memory restoring boxes, bs * top_k float* prior_boxes, //num_boxes * 4 long * res_batch_len, //record count of result for each batch, bs const long num_boxes, //num_boxes, each is a potential object const long num_class, //number of class const long bs, //batch size const long nms_topk, //first top k box for nms result for each class const long image_topk, //first top k box for input image const float score_threshold, //threshold for accepting as a object for box const float nms_threshold) //threshold for two overlapped boxes, too overlapped is one object { //sorted result of index long* index_batch = malloc(bs*num_boxes*num_class*sizeof(long)); //scores to be sorted float* scores_batch = malloc(bs*num_boxes*num_class*sizeof(float)); //temp result detections for each batch, grow when iterating among classes float* temp_res_detection_batch = malloc(bs*num_class*nms_topk*6*sizeof(float)); //internal memory to restore sorted boxes for each class float* internal_detection_batch = malloc(bs*nms_topk*5*sizeof(float)); //internal memory to restore transformed location float* proposal_batch = malloc(bs*num_boxes*4*sizeof(float)); //transpose KLN to NKL float* conf_t = malloc(num_boxes * num_class * bs * sizeof(float)); float* loc_t = malloc(num_boxes * 4* bs * sizeof(float)); mkl_somatcopy('r', 't', num_boxes*num_class, bs, 1.0, conf, bs, conf_t, num_boxes*num_class); mkl_somatcopy('r', 't', num_boxes*4, bs, 1.0, loc, bs, loc_t, num_boxes*4); //loop for batch size #pragma omp parallel for for(long b=0; b<bs; ++b) //loop for batch { float* scores = scores_batch + b * num_boxes*num_class; float* temp_res_detection = temp_res_detection_batch + b * num_class*nms_topk*6; long* index = index_batch + b * num_boxes*num_class; float* internal_detection = internal_detection_batch + b * nms_topk*5; float* proposal = proposal_batch + b * num_boxes*4; //calculate class scores for this batch using softmax float* conf_batch = conf_t + b * num_boxes * num_class; softmax(conf_batch, num_boxes, num_class); //store scores in an array mkl_somatcopy('r', 't', num_boxes, num_class, 1.0, conf_batch, num_class, scores, num_boxes); //transform locations in proposal bbox_transform_inv(prior_boxes, loc_t + b * 4 * num_boxes, proposal, num_boxes); long res_len = 0; //count of feasible boxes for this image for(long c=1; c<num_class; ++c) //loop for classes { //for each class, sort out first nms_topk boxes, store result in index long sort_nums_res = get_top_N_index(scores + c*num_boxes, nms_topk, num_boxes, score_threshold, index); //store location and score for the sorted results if(sort_nums_res > 0) { //store location and score in internal_detection for overlapped check for(long i=0; i<sort_nums_res; ++i) { for(long j=0; j<4; ++j) internal_detection[i*5+j] = proposal[index[i]*4+j]; internal_detection[i*5+4] = scores[c*num_boxes+i]; } //remove overlapped box sort_nums_res = nms(internal_detection, index, nms_threshold, 1, sort_nums_res); //store result in temp memory and add class number, thus width is 6 for(long i=0; i<sort_nums_res; ++i) { float* temp = temp_res_detection + (res_len+i)*6; for(long j=0; j<5; ++j) { temp[j] = internal_detection[index[i]*5+j]; } //add class number temp[5] = c; } res_len += sort_nums_res; } } //sort out first top_k boxes for this image for(long i=0; i<res_len; ++i) { scores[i] = temp_res_detection[i*6+4]; index[i] = i; } long sort_nums_res = res_len; if(sort_nums_res>image_topk) //sort first top_k out of res_len { sort_nums_res = get_top_N_index(scores, image_topk, res_len, 0.0, index); } //store sorted result in final output float* temp = res_detection + b * image_topk * 6; for(long i=0; i<sort_nums_res; ++i) { for(long j=0; j<6; ++j) { temp[i*6+j] = temp_res_detection[index[i]*6+j]; } } res_batch_len[b] = sort_nums_res; } free(conf_t); free(loc_t); free(index_batch); free(scores_batch); free(temp_res_detection_batch); free(proposal_batch); free(internal_detection_batch); }
int main(int argc, char *argv[]){ Params params; std::map<std::string, std::string> args; readArgs(argc, argv, args); if(args.find("algo")!=args.end()){ params.algo = args["algo"]; }else{ params.algo = "qdMCNat"; } if(args.find("inst_file")!=args.end()) setParamsFromFile(args["inst_file"], args, params); else setParams(params.algo, args, params); createLogDir(params.dir_path); gen.seed(params.seed); // Load the dataset MyMatrix X_train, X_valid; VectorXd Y_train, Y_valid; loadMnist(params.ratio_train, X_train, X_valid, Y_train, Y_valid); //loadCIFAR10(params.ratio_train, X_train, X_valid, Y_train, Y_valid); //loadLightCIFAR10(params.ratio_train, X_train, X_valid, Y_train, Y_valid); // ConvNet parameters std::vector<ConvLayerParams> conv_params; ConvLayerParams conv_params1; conv_params1.Hf = 5; conv_params1.stride = 1; conv_params1.n_filter = 20; conv_params1.padding = 0; conv_params.push_back(conv_params1); ConvLayerParams conv_params2; conv_params2.Hf = 5; conv_params2.stride = 1; conv_params2.n_filter = 50; conv_params2.padding = 0; conv_params.push_back(conv_params2); std::vector<PoolLayerParams> pool_params; PoolLayerParams pool_params1; pool_params1.Hf = 2; pool_params1.stride = 2; pool_params.push_back(pool_params1); PoolLayerParams pool_params2; pool_params2.Hf = 2; pool_params2.stride = 2; pool_params.push_back(pool_params2); const unsigned n_conv_layer = conv_params.size(); for(unsigned l = 0; l < conv_params.size(); l++){ if(l==0){ conv_params[l].filter_size = conv_params[l].Hf * conv_params[l].Hf * params.img_depth; conv_params[l].N = (params.img_width - conv_params[l].Hf + 2*conv_params[l].padding)/conv_params[l].stride + 1; } else{ conv_params[l].filter_size = conv_params[l].Hf * conv_params[l].Hf * conv_params[l-1].n_filter; conv_params[l].N = (pool_params[l-1].N - conv_params[l].Hf + 2*conv_params[l].padding)/conv_params[l].stride + 1; } pool_params[l].N = (conv_params[l].N - pool_params[l].Hf)/pool_params[l].stride + 1; } // Neural Network parameters const unsigned n_training = X_train.rows(); const unsigned n_valid = X_valid.rows(); const unsigned n_feature = X_train.cols(); const unsigned n_label = Y_train.maxCoeff() + 1; params.nn_arch.insert(params.nn_arch.begin(),conv_params[n_conv_layer-1].n_filter * pool_params[n_conv_layer-1].N * pool_params[n_conv_layer-1].N); params.nn_arch.push_back(n_label); const unsigned n_layers = params.nn_arch.size(); // Optimization parameter const int n_train_batch = ceil(n_training/(float)params.train_minibatch_size); const int n_valid_batch = ceil(n_valid/(float)params.valid_minibatch_size); double prev_loss = std::numeric_limits<double>::max(); double eta = params.eta; // Create the convolutional layer std::vector<MyMatrix> conv_W(n_conv_layer); std::vector<MyMatrix> conv_W_T(n_conv_layer); std::vector<MyVector> conv_B(n_conv_layer); // Create the neural network MyMatrix W_out(params.nn_arch[n_layers-2],n_label); std::vector<MySpMatrix> W(n_layers-2); std::vector<MySpMatrix> Wt(n_layers-2); std::vector<MyVector> B(n_layers-1); double init_sigma = 0.; ActivationFunction act_func; ActivationFunction eval_act_func; if(params.act_func_name=="sigmoid"){ init_sigma = 4.0; act_func = std::bind(logistic,true,_1,_2,_3); eval_act_func = std::bind(logistic,false,_1,_2,_3); }else if(params.act_func_name=="tanh"){ init_sigma = 1.0; act_func = std::bind(my_tanh,true,_1,_2,_3); eval_act_func = std::bind(my_tanh,false,_1,_2,_3); }else if(params.act_func_name=="relu"){ init_sigma = 1.0; // TODO: Find the good value act_func = std::bind(relu,true,_1,_2,_3); eval_act_func = std::bind(relu,false,_1,_2,_3); }else{ std::cout << "Not implemented yet!" << std::endl; assert(false); } std::cout << "Initializing the network... "; params.n_params = initNetwork(params.nn_arch, params.act_func_name, params.sparsity, conv_params, pool_params, W_out, W, Wt, B, conv_W, conv_W_T, conv_B); // TODO: Init the conv bias // Deep copy of parameters for the adaptive rule std::vector<MyMatrix> mu_dW(n_layers-1); std::vector<MyVector> mu_dB(n_layers-1); MyMatrix pW_out = W_out; std::vector<MySpMatrix> pW = W; std::vector<MySpMatrix> pWt = Wt; std::vector<MyVector> pB = B; MyMatrix ppMii_out, ppM0i_out; MyVector ppM00_out; std::vector<MySpMatrix> ppMii,ppM0i; std::vector<MyVector> ppM00; MyMatrix pMii_out,pM0i_out; MyVector pM00_out; std::vector<MySpMatrix> pMii,pM0i; std::vector<MyVector> pM00; std::vector<MyMatrix> conv_ppMii, conv_ppM0i; std::vector<MyVector> conv_ppM00; std::vector<MyMatrix> conv_pMii, conv_pM0i; std::vector<MyVector> conv_pM00; // Convert the labels to one-hot vector MyMatrix one_hot = MyMatrix::Zero(n_training, n_label); labels2oneHot(Y_train,one_hot); // Configure the logger std::ostream* logger; if(args.find("verbose")!=args.end()){ getOutput("",logger); }else{ getOutput(params.file_path,logger); } double cumul_time = 0.; printDesc(params, logger); printConvDesc(params, conv_params, pool_params, logger); std::cout << "Starting the learning phase... " << std::endl; *logger << "Epoch Time(s) train_loss train_accuracy valid_loss valid_accuracy eta" << std::endl; for(unsigned i = 0; i < params.n_epoch; i++){ for(unsigned j = 0; j < n_train_batch; j++){ // Mini-batch creation unsigned curr_batch_size = 0; MyMatrix X_batch, one_hot_batch; getMiniBatch(j, params.train_minibatch_size, X_train, one_hot, params, conv_params[0], curr_batch_size, X_batch, one_hot_batch); double prev_time = gettime(); // Forward propagation for conv layer std::vector<std::vector<unsigned>> poolIdxX1(n_conv_layer); std::vector<std::vector<unsigned>> poolIdxY1(n_conv_layer); MyMatrix z0; std::vector<MyMatrix> conv_A(conv_W.size()); std::vector<MyMatrix> conv_Ap(conv_W.size()); convFprop(curr_batch_size, conv_params, pool_params, act_func, conv_W, conv_B, X_batch, conv_A, conv_Ap, z0, poolIdxX1, poolIdxY1); // Forward propagation std::vector<MyMatrix> Z(n_layers-1); std::vector<MyMatrix> A(n_layers-2); std::vector<MyMatrix> Ap(n_layers-2); fprop(params.dropout_flag, act_func, W, W_out, B, z0, Z, A, Ap); // Compute the output and the error MyMatrix out; softmax(Z[n_layers-2], out); std::vector<MyMatrix> gradB(n_layers-1); gradB[n_layers-2] = out - one_hot_batch; // Backpropagation bprop(Wt, W_out, Ap, gradB); // Backpropagation for conv layer std::vector<MyMatrix> conv_gradB(conv_W.size()); MyMatrix layer_gradB = (gradB[0] * W[0].transpose()); MyMatrix pool_gradB; layer2pool(curr_batch_size, pool_params[conv_W.size()-1].N, conv_params[conv_W.size()-1].n_filter, layer_gradB, pool_gradB); convBprop(curr_batch_size, conv_params, pool_params, conv_W_T, conv_Ap, pool_gradB, conv_gradB, poolIdxX1, poolIdxY1); if(params.algo == "bprop"){ update(eta, gradB, A, z0, params.regularizer, params.lambda, W_out, W, Wt, B); convUpdate(curr_batch_size, eta, conv_params, conv_gradB, conv_A, X_batch, "", 0., conv_W, conv_W_T, conv_B); }else{ // Compute the metric std::vector<MyMatrix> metric_gradB(n_layers-1); std::vector<MyMatrix> metric_conv_gradB(conv_params.size()); if(params.algo=="qdMCNat"){ // Monte-Carlo Approximation of the metric std::vector<MyMatrix> mc_gradB(n_layers-1); computeMcError(out, mc_gradB[n_layers-2]); // Backpropagation bprop(Wt, W_out, Ap, mc_gradB); for(unsigned k = 0; k < gradB.size(); k++){ metric_gradB[k] = mc_gradB[k].array().square(); } // Backpropagation for conv layer std::vector<MyMatrix> mc_conv_gradB(conv_W.size()); MyMatrix mc_layer_gradB = (mc_gradB[0] * W[0].transpose()); MyMatrix mc_pool_gradB; layer2pool(curr_batch_size, pool_params[conv_W.size()-1].N, conv_params[conv_W.size()-1].n_filter, mc_layer_gradB, mc_pool_gradB); convBprop(curr_batch_size, conv_params, pool_params, conv_W_T, conv_Ap, mc_pool_gradB, mc_conv_gradB, poolIdxX1, poolIdxY1); for(unsigned k = 0; k < conv_params.size(); k++){ metric_conv_gradB[k] = mc_conv_gradB[k].array().square(); } } else if(params.algo=="qdop"){ for(unsigned k = 0; k < conv_params.size(); k++){ metric_conv_gradB[k] = conv_gradB[k].array().square(); } for(unsigned k = 0; k < gradB.size(); k++){ metric_gradB[k] = gradB[k].array().square(); } } else if(params.algo=="qdNat"){ for(unsigned k = 0; k < conv_params.size(); k++){ metric_conv_gradB[k] = conv_gradB[k].array().square(); } for(unsigned k = 0; k < metric_gradB.size(); k++){ metric_gradB[k] = MyMatrix::Zero(gradB[k].rows(),gradB[k].cols()); } for(unsigned l = 0; l < n_label; l++){ MyMatrix fisher_ohbatch = MyMatrix::Zero(curr_batch_size, n_label); fisher_ohbatch.col(l).setOnes(); std::vector<MyMatrix> fgradB(n_layers-1); fgradB[n_layers-2] = out - fisher_ohbatch; bprop(Wt, W_out, Ap, fgradB); // Backpropagation for conv layer std::vector<MyMatrix> fisher_conv_gradB(conv_W.size()); MyMatrix fisher_layer_gradB = (fgradB[0] * W[0].transpose()); MyMatrix fisher_pool_gradB; layer2pool(curr_batch_size, pool_params[conv_W.size()-1].N, conv_params[conv_W.size()-1].n_filter, fisher_layer_gradB, fisher_pool_gradB); convBprop(curr_batch_size, conv_params, pool_params, conv_W_T, conv_Ap, fisher_pool_gradB, fisher_conv_gradB, poolIdxX1, poolIdxY1); for(unsigned k = 0; k < conv_params.size(); k++){ MyMatrix fisher_conv_gradB_sq = fisher_conv_gradB[k].array().square(); for(unsigned m = 0; m < out.rows(); m++){ for(unsigned f = 0; f < conv_params[k].n_filter; f++){ for(unsigned n = 0; n < conv_params[k].N * conv_params[k].N; n++){ fisher_conv_gradB_sq(f,m*conv_params[k].N*conv_params[k].N+n) *= out(m,l); } } } metric_conv_gradB[k] += fisher_conv_gradB_sq; } for(unsigned k = 0; k < W.size(); k++){ const unsigned rev_k = n_layers - k - 2; metric_gradB[rev_k] += (fgradB[rev_k].array().square().array().colwise() * out.array().col(l)).matrix(); } } } bool init_flag = false; if(i == 0 && j == 0 && !params.init_metric_id){ init_flag = true; } std::vector<MyMatrix> conv_Mii(conv_params.size()); std::vector<MyMatrix> conv_M0i(conv_params.size()); std::vector<MyVector> conv_M00(conv_params.size()); buildConvQDMetric(curr_batch_size, metric_conv_gradB, conv_A, X_batch, conv_W, params.matrix_reg, conv_Mii, conv_M0i, conv_M00); updateConvMetric(init_flag, params.metric_gamma, conv_pMii, conv_pM0i, conv_pM00, conv_Mii, conv_M0i, conv_M00); MyMatrix Mii_out, M0i_out; MyVector M00_out; std::vector<MySpMatrix> Mii(W.size()); std::vector<MySpMatrix> M0i(W.size()); std::vector<MyVector> M00(W.size()); buildQDMetric(metric_gradB, A, z0, W_out, W, params.matrix_reg, Mii_out, M0i_out, M00_out, Mii, M0i, M00); updateMetric(init_flag, params.metric_gamma, Mii_out, M0i_out, M00_out, Mii, M0i, M00, pMii_out, pM0i_out, pM00_out, pMii, pM0i, pM00); update(eta, gradB, A, z0, params.regularizer, params.lambda, W_out, W, Wt, B, Mii_out, M0i_out, M00_out, Mii, M0i, M00); } double curr_time = gettime(); cumul_time += curr_time - prev_time; if(params.minilog_flag){ double train_loss = 0.; double train_accuracy = 0.; double valid_loss = 0.; double valid_accuracy = 0.; evalModel(eval_act_func, params, n_train_batch, n_training, X_train, Y_train, conv_params, pool_params, conv_W, conv_B, W_out, W, B, train_loss, train_accuracy); evalModel(eval_act_func, params, n_valid_batch, n_valid, X_valid, Y_valid, conv_params, pool_params, conv_W, conv_B, W_out, W, B, valid_loss, valid_accuracy); // Logging *logger << i + float(j)/n_train_batch << " " << cumul_time << " " << train_loss << " " << train_accuracy << " " << valid_loss << " " << valid_accuracy << " " << eta << std::endl; } } if(!params.minilog_flag || params.adaptive_flag){ double train_loss = 0.; double train_accuracy = 0.; double valid_loss = 0.; double valid_accuracy = 0.; evalModel(eval_act_func, params, n_train_batch, n_training, X_train, Y_train, conv_params, pool_params, conv_W, conv_B, W_out, W, B, train_loss, train_accuracy); evalModel(eval_act_func, params, n_valid_batch, n_valid, X_valid, Y_valid, conv_params, pool_params, conv_W, conv_B, W_out, W, B, valid_loss, valid_accuracy); // if(params.adaptive_flag) // adaptiveRule(train_loss, prev_loss, eta, W, B, pMii, pM0i, pM00, pW, pB, ppMii, ppM0i, ppM00); // Logging if(!params.minilog_flag){ *logger << i << " " << cumul_time << " " << train_loss << " " << train_accuracy << " " << valid_loss << " " << valid_accuracy << " " << eta << std::endl; } } } }
void forward_detection_layer(const detection_layer l, network net) { int locations = l.side*l.side; int i,j; memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1); int b; if (l.softmax){ for(b = 0; b < l.batch; ++b){ int index = b*l.inputs; for (i = 0; i < locations; ++i) { int offset = i*l.classes; softmax(l.output + index + offset, l.classes, 1, 1, l.output + index + offset); } } } if(net.train){ float avg_iou = 0; float avg_cat = 0; float avg_allcat = 0; float avg_obj = 0; float avg_anyobj = 0; int count = 0; *(l.cost) = 0; int size = l.inputs * l.batch; memset(l.delta, 0, size * sizeof(float)); for (b = 0; b < l.batch; ++b){ int index = b*l.inputs; for (i = 0; i < locations; ++i) { int truth_index = (b*locations + i)*(1+l.coords+l.classes); int is_obj = net.truth[truth_index]; for (j = 0; j < l.n; ++j) { int p_index = index + locations*l.classes + i*l.n + j; l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); avg_anyobj += l.output[p_index]; } int best_index = -1; float best_iou = 0; float best_rmse = 20; if (!is_obj){ continue; } int class_index = index + i*l.classes; for(j = 0; j < l.classes; ++j) { l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]); *(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2); if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; avg_allcat += l.output[class_index+j]; } box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1); truth.x /= l.side; truth.y /= l.side; for(j = 0; j < l.n; ++j){ int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; box out = float_to_box(l.output + box_index, 1); out.x /= l.side; out.y /= l.side; if (l.sqrt){ out.w = out.w*out.w; out.h = out.h*out.h; } float iou = box_iou(out, truth); //iou = 0; float rmse = box_rmse(out, truth); if(best_iou > 0 || iou > 0){ if(iou > best_iou){ best_iou = iou; best_index = j; } }else{ if(rmse < best_rmse){ best_rmse = rmse; best_index = j; } } } if(l.forced){ if(truth.w*truth.h < .1){ best_index = 1; }else{ best_index = 0; } } if(l.random && *(net.seen) < 64000){ best_index = rand()%l.n; } int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; int tbox_index = truth_index + 1 + l.classes; box out = float_to_box(l.output + box_index, 1); out.x /= l.side; out.y /= l.side; if (l.sqrt) { out.w = out.w*out.w; out.h = out.h*out.h; } float iou = box_iou(out, truth); //printf("%d,", best_index); int p_index = index + locations*l.classes + i*l.n + best_index; *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); avg_obj += l.output[p_index]; l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); if(l.rescore){ l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); } l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]); l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]); l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]); l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]); if(l.sqrt){ l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]); l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]); } *(l.cost) += pow(1-iou, 2); avg_iou += iou; ++count; } } if(0){ float *costs = calloc(l.batch*locations*l.n, sizeof(float)); for (b = 0; b < l.batch; ++b) { int index = b*l.inputs; for (i = 0; i < locations; ++i) { for (j = 0; j < l.n; ++j) { int p_index = index + locations*l.classes + i*l.n + j; costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; } } } int indexes[100]; top_k(costs, l.batch*locations*l.n, 100, indexes); float cutoff = costs[indexes[99]]; for (b = 0; b < l.batch; ++b) { int index = b*l.inputs; for (i = 0; i < locations; ++i) { for (j = 0; j < l.n; ++j) { int p_index = index + locations*l.classes + i*l.n + j; if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; } } } free(costs); } *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0); } }
//NNFF performs a feedforward pass double FBNN::nnff(const FMatrix& x, const FMatrix& y) { // std::cout << "start nnff x = (" << x.rows() << "," << x.columns() << ")" << std::endl; double L = 0; if(m_oAs.empty()) { for(int i = 0; i < m_iN; ++i) m_oAs.push_back(std::make_shared<FMatrix>(FMatrix())); } *m_oAs[0] = addPreColumn(x,1); if(m_fDropoutFraction > 0 && !m_fTesting) { if(m_odOMs.empty())//clear dropOutMask { for(int i = 0; i < m_iN - 1; ++i) m_odOMs.push_back(std::make_shared<FMatrix>(FMatrix())); } } // std::cout << "start feedforward" << std::endl; //feedforward pass for(int i = 1; i < m_iN - 1; ++i) { // std::cout << "activation function" << std::endl; //activation function if(m_strActivationFunction == "sigm") { //Calculate the unit's outputs (including the bias term) *m_oAs[i] = sigm((*m_oAs[i-1]) * blaze::trans(*m_oWs[i-1])); } else if(m_strActivationFunction == "tanh_opt") { *m_oAs[i] = tanh_opt((*m_oAs[i-1]) * blaze::trans(*m_oWs[i-1])); } // std::cout << "dropout" << std::endl; //dropout if(m_fDropoutFraction > 0) { if(m_fTesting) *m_oAs[i] = (*m_oAs[i]) * (1 - m_fDropoutFraction); else { *m_odOMs[i] = rand(m_oAs[i]->rows(),m_oAs[i]->columns()) > m_fDropoutFraction; *m_oAs[i] = bitWiseMul(*m_oAs[i],*m_odOMs[i]); } } // std::cout << "sparsity" << std::endl; //calculate running exponential activations for use with sparsity if(m_fNonSparsityPenalty > 0) *m_oPs[i] = (*m_oPs[i]) * 0.99 + columnMean(*m_oAs[i]); // std::cout << "Add the bias term" << std::endl; //Add the bias term *m_oAs[i] = addPreColumn(*m_oAs[i],1); } // std::cout << "start calculate output" << std::endl; if(m_strOutput == "sigm") { *m_oAs[m_iN -1] = sigm((*m_oAs[m_iN-2]) * blaze::trans(*m_oWs[m_iN-2])); } else if(m_strOutput == "linear") { *m_oAs[m_iN -1] = (*m_oAs[m_iN-2]) * blaze::trans(*m_oWs[m_iN-2]); } else if(m_strOutput == "softmax") { *m_oAs[m_iN -1] = softmax((*m_oAs[m_iN-2]) * blaze::trans(*m_oWs[m_iN-2])); } // std::cout << "start error and loss" << std::endl; //error and loss m_oEp = std::make_shared<FMatrix>(y - (*m_oAs[m_iN-1])); if(m_strOutput == "sigm" || m_strOutput == "linear") { L = 0.5 * matrixSum(bitWiseSquare(*m_oEp)) / x.rows(); } else if(m_strOutput == "softmax") { L = -matrixSum(bitWiseMul(y,bitWiseLog(*m_oAs[m_iN-1]))) / x.rows(); } // std::cout << "end nnff" << std::endl; return L; }
void RNNSoftmaxLayer::feedForward(int inputSeqLen) { #pragma omp parallel for for (int seqIdx=1; seqIdx<=inputSeqLen; ++seqIdx) { softmax(m_outputActs[seqIdx], m_inputActs[seqIdx], m_numNeuron); } }
void RNNSoftmaxLayer::forwardStep(int seqIdx) { softmax(m_outputActs[seqIdx], m_inputActs[seqIdx], m_numNeuron); }
int stepwise_regression(int N, int D, int M, double w[M][D], float w_resamp[M][D], double X[D][N], double Xw[M][N], double E[M][N], double S[N], const double XY[M][D], const double B[D], const double delta[D], double maxiter, double tol, double decay_rate, double decay_min, int seed, int verbose) { srand (seed); if (verbose) { // Output parameters in verbose mode printf("SMLR: random seed=%d\n", seed); printf("SMLR: decay: r=%g, min=%g\n", decay_rate, decay_min); printf("SMLR: tol=%g, maxiter=%g\n", tol, maxiter); } // initialize the iterative optimization int iter; double incr = DBL_MAX; // Begin iterative optimization for (iter = 0; iter < maxiter; iter++) { // Reset performance indicators for this iteration int wasted = 0; int saved = 0; int nonzero = 0; // zero out the sums for assessing convergence double sum2_w_diff = 0; double sum2_w_old = 0; // update each weight for (int d = 0; d < D; d++) { for (int m = 0; m < M; m++) { // get the starting weight double w_old = w[m][d]; // Sample randomly to determine update probability double r = ((double)rand())/((double)RAND_MAX); // Update a given weight if non-zero or within sampling dist if ( w_old != 0 || r < w_resamp[m][d]) { // Update predictions: double XdotP = 0.0; #pragma omp parallel for for (int i = 0; i < N; i++) XdotP += X[d][i] * E[m][i]/S[i]; // get the gradient double grad = XY[m][d] - XdotP; // Calcluate the new weight double w_new = softmax(w_old + grad/B[d], delta[d]); // Debugging: //printf("[%d,%d] - w_old: %g, XY: %g, XdotP: %g, grad: %g, w_new: %g\n", //d, m, w_old, XY[m][d], XdotP, grad, w_new); // Update our efficiency measures + resampling probabilities if (w_new == 0 && w_old == 0) { wasted++; w_resamp[m][d] = (w_resamp[m][d]-decay_min)*decay_rate + decay_min; } if (w_new != 0 && w_old == 0) { saved++; w_resamp[m][d] = 1; } double w_diff = w_new - w_old; // If we changed, update our running calculations if (w_diff != 0) { #pragma omp parallel for for (int i = 0; i < N; i++) { Xw[m][i] += X[d][i]*w_diff; double E_new_m = exp(Xw[m][i]); S[i] += E_new_m - E[m][i]; E[m][i] = E_new_m; } // update the weight w[m][d] = w_new; // keep track of the sqrt sum squared distances sum2_w_diff += w_diff*w_diff; } if (w_new != 0) nonzero++; // might not have changed, but could be non-zero weight, so add to sum sum2_w_old += w_old*w_old; } } } // finished a iter, assess convergence incr = sqrt(sum2_w_diff) / (sqrt(sum2_w_old)+DBL_EPSILON); if (verbose) printf("SMLR [%d]: incr=%g (saved %d, wasted %d, nonzero %d)\n", iter, incr, saved, wasted, nonzero); // Check for convergence if (incr < tol) break; } return iter; }
void forward_region_layer(const region_layer l, network_state state) { int i,j,b,t,n; int size = l.coords + l.classes + 1; memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); #ifndef GPU flatten(l.output, l.w*l.h, size*l.n, l.batch, 1); #endif for (b = 0; b < l.batch; ++b){ for(i = 0; i < l.h*l.w*l.n; ++i){ int index = size*i + b*l.outputs; l.output[index + 4] = logistic_activate(l.output[index + 4]); } } #ifndef GPU if (l.softmax_tree){ for (b = 0; b < l.batch; ++b){ for(i = 0; i < l.h*l.w*l.n; ++i){ int index = size*i + b*l.outputs; softmax_tree(l.output + index + 5, 1, 0, 1, l.softmax_tree, l.output + index + 5); } } } else if (l.softmax){ for (b = 0; b < l.batch; ++b){ for(i = 0; i < l.h*l.w*l.n; ++i){ int index = size*i + b*l.outputs; softmax(l.output + index + 5, l.classes, 1, l.output + index + 5, 1); } } } #endif if(!state.train) return; memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); float avg_iou = 0; float recall = 0; float avg_cat = 0; float avg_obj = 0; float avg_anyobj = 0; int count = 0; int class_count = 0; *(l.cost) = 0; for (b = 0; b < l.batch; ++b) { if(l.softmax_tree){ int onlyclass_id = 0; for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(state.truth + t*5 + b*l.truths); if(!truth.x) break; // continue; int class_id = state.truth[t*5 + b*l.truths + 4]; float maxp = 0; int maxi = 0; if(truth.x > 100000 && truth.y > 100000){ for(n = 0; n < l.n*l.w*l.h; ++n){ int index = size*n + b*l.outputs + 5; float scale = l.output[index-1]; float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class_id); if(p > maxp){ maxp = p; maxi = n; } } int index = size*maxi + b*l.outputs + 5; delta_region_class(l.output, l.delta, index, class_id, l.classes, l.softmax_tree, l.class_scale, &avg_cat, l.focal_loss); ++class_count; onlyclass_id = 1; break; } } if(onlyclass_id) continue; } for (j = 0; j < l.h; ++j) { for (i = 0; i < l.w; ++i) { for (n = 0; n < l.n; ++n) { int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); float best_iou = 0; int best_class_id = -1; for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(state.truth + t*5 + b*l.truths); int class_id = state.truth[t * 5 + b*l.truths + 4]; if (class_id >= l.classes) continue; // if label contains class_id more than number of classes in the cfg-file if(!truth.x) break; // continue; float iou = box_iou(pred, truth); if (iou > best_iou) { best_class_id = state.truth[t*5 + b*l.truths + 4]; best_iou = iou; } } avg_anyobj += l.output[index + 4]; l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); if(l.classfix == -1) l.delta[index + 4] = l.noobject_scale * ((best_iou - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); else{ if (best_iou > l.thresh) { l.delta[index + 4] = 0; if(l.classfix > 0){ delta_region_class(l.output, l.delta, index + 5, best_class_id, l.classes, l.softmax_tree, l.class_scale*(l.classfix == 2 ? l.output[index + 4] : 1), &avg_cat, l.focal_loss); ++class_count; } } } if(*(state.net.seen) < 12800){ box truth = {0}; truth.x = (i + .5)/l.w; truth.y = (j + .5)/l.h; truth.w = l.biases[2*n]; truth.h = l.biases[2*n+1]; if(DOABS){ truth.w = l.biases[2*n]/l.w; truth.h = l.biases[2*n+1]/l.h; } delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01); } } } } for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box(state.truth + t*5 + b*l.truths); int class_id = state.truth[t * 5 + b*l.truths + 4]; if (class_id >= l.classes) { printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1); getchar(); continue; // if label contains class_id more than number of classes in the cfg-file } if(!truth.x) break; // continue; float best_iou = 0; int best_index = 0; int best_n = 0; i = (truth.x * l.w); j = (truth.y * l.h); //printf("%d %f %d %f\n", i, truth.x*l.w, j, truth.y*l.h); box truth_shift = truth; truth_shift.x = 0; truth_shift.y = 0; //printf("index %d %d\n",i, j); for(n = 0; n < l.n; ++n){ int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); if(l.bias_match){ pred.w = l.biases[2*n]; pred.h = l.biases[2*n+1]; if(DOABS){ pred.w = l.biases[2*n]/l.w; pred.h = l.biases[2*n+1]/l.h; } } //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h); pred.x = 0; pred.y = 0; float iou = box_iou(pred, truth_shift); if (iou > best_iou){ best_index = index; best_iou = iou; best_n = n; } } //printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h); float iou = delta_region_box(truth, l.output, l.biases, best_n, best_index, i, j, l.w, l.h, l.delta, l.coord_scale); if(iou > .5) recall += 1; avg_iou += iou; //l.delta[best_index + 4] = iou - l.output[best_index + 4]; avg_obj += l.output[best_index + 4]; l.delta[best_index + 4] = l.object_scale * (1 - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]); if (l.rescore) { l.delta[best_index + 4] = l.object_scale * (iou - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]); } if (l.map) class_id = l.map[class_id]; delta_region_class(l.output, l.delta, best_index + 5, class_id, l.classes, l.softmax_tree, l.class_scale, &avg_cat, l.focal_loss); ++count; ++class_count; } } //printf("\n"); #ifndef GPU flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0); #endif *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); }
int main(int argc, char *argv[]){ opt_register_table(options, NULL); if (!opt_parse(&argc, argv, opt_log_stderr)){ exit(1); } if (argc == 1){ DEBUG("No text files to evaluate!"); opt_usage_and_exit(argv[0]); } DEBUG("given %d arguments", argc - 1); RecurNN *net = rnn_load_net(opt_filename); RnnCharAlphabet *alphabet = rnn_char_new_alphabet_from_net(net); init_rand64_maybe_randomly(&net->rng, -1); int len = 0; int count = 0; if (opt_min_length <= opt_ignore_start){ DEBUG("hey! --min-length=%d <= --ignore-start=%d! Fixing.. now its %d.", opt_min_length, opt_ignore_start, opt_ignore_start + 1); opt_min_length = opt_ignore_start + 1; } float sum[net->output_size]; float sumsq[net->output_size]; float mean[net->output_size]; float stddev[net->output_size]; for (int i = 1; i < argc; i++){ const char *filename = argv[i]; u8* text = rnn_char_load_new_encoded_text(filename, alphabet, &len, 3); if (len >= opt_min_length){ memset(sum, 0, net->output_size * sizeof(float)); memset(sumsq, 0, net->output_size * sizeof(float)); int j, k; for (j = 0; j < opt_ignore_start; j++){ one_hot_opinion(net, text[j], 0); } for (j = opt_ignore_start; j < len; j++){ float *raw = one_hot_opinion(net, text[j], 0); float *answer = mean; softmax(answer, raw, net->output_size); for (k = 0; k < net->output_size; k++){ float a = answer[k]; sum[k] += a; sumsq[k] += a * a; } } for (k = 0; k < net->output_size; k++){ float m = sum[k] / (len - opt_ignore_start); stddev[k] = sqrtf(sumsq[k] / (len - opt_ignore_start) - m * m); mean[k] = m; } printf("%s mean: ", filename); for (k = 0; k < net->output_size; k++){ printf("%.3e ", mean[k]); } printf(" stddev: "); for (k = 0; k < net->output_size; k++){ printf("%.3e ", stddev[k]); } puts("\n"); } free(text); } DEBUG("processed %d texts", count); return 0; }