tnn_error tnn_module_bprop_linear(tnn_module *m){ tnn_error ret; gsl_matrix w; gsl_matrix dw; //Routine check if(m->t != TNN_MODULE_TYPE_LINEAR){ return TNN_ERROR_MODULE_MISTYPE; } if(m->input->valid != true || m->output->valid != true || m->w.valid != true){ return TNN_ERROR_STATE_INVALID; } //Transform the matrix TNN_MACRO_ERRORTEST(tnn_numeric_v2m(&m->w.x, &w, m->output->size, m->input->size),ret); TNN_MACRO_ERRORTEST(tnn_numeric_v2m(&m->w.dx, &dw, m->output->size, m->input->size), ret); //bprop to input TNN_MACRO_GSLTEST(gsl_blas_dgemv(CblasTrans, 1.0, &w, &m->output->dx, 0.0, &m->input->dx)); //bprop to dw gsl_matrix_set_zero(&dw); TNN_MACRO_GSLTEST(gsl_blas_dger(1.0, &m->output->dx, &m->input->x, &dw)); return TNN_ERROR_SUCCESS; }
tnn_error tnn_module_fprop_bias(tnn_module *m){ //Routine check if(m->t != TNN_MODULE_TYPE_BIAS){ return TNN_ERROR_MODULE_MISTYPE; } if(m->input->valid != true || m->output->valid != true || m->w.valid != true){ return TNN_ERROR_STATE_INVALID; } //fprop to output TNN_MACRO_GSLTEST(gsl_blas_dcopy(&m->input->x, &m->output->x)); TNN_MACRO_GSLTEST(gsl_blas_daxpy(1.0, &m->w.x, &m->output->x)); return TNN_ERROR_SUCCESS; }
tnn_error tnn_loss_bprop_euclidean(tnn_loss *l){ //Routine check if(l->t != TNN_LOSS_TYPE_EUCLIDEAN){ return TNN_ERROR_LOSS_MISTYPE; } if(l->input1->valid != true || l->input2->valid != true || l->output->valid != true){ return TNN_ERROR_STATE_INVALID; } //bprop to input1 and input2 dx = dl 2 (x-y); dy = dl 2 (y-x) TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->x, &l->input1->dx)); TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, &l->input2->x, &l->input1->dx)); gsl_blas_dscal(2.0*gsl_vector_get(&l->output->dx, 0), &l->input1->dx); TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->dx, &l->input2->dx)); gsl_blas_dscal(-1.0, &l->input2->dx); return TNN_ERROR_SUCCESS; }
//Learn one sample using naive stochastic gradient descent tnn_error tnn_trainer_class_learn_nsgd(tnn_trainer_class *t, gsl_vector *input, size_t label){ tnn_error ret; tnn_state *sin; tnn_param *p; gsl_vector_view lb; //Routine check if(t->t != TNN_TRAINER_CLASS_TYPE_NSGD){ return TNN_ERROR_TRAINER_CLASS_MISTYPE; } //Check the input and label TNN_MACRO_ERRORTEST(tnn_machine_get_sin(&t->m, &sin),ret); if(label >= t->lset->size1 || input->size != sin->size){ return TNN_ERROR_STATE_INCOMP; } lb = gsl_matrix_row(t->lset, label); //Set the loss output dx to be 1 gsl_vector_set(&t->l.output->dx, 0, 1.0); //Copy the data into the input/label and do forward and backward propagation TNN_MACRO_GSLTEST(gsl_blas_dcopy(input, &sin->x)); TNN_MACRO_GSLTEST(gsl_blas_dcopy(&lb.vector, &t->label->x)); TNN_MACRO_ERRORTEST(tnn_machine_fprop(&t->m), ret); TNN_MACRO_ERRORTEST(tnn_loss_fprop(&t->l), ret); TNN_MACRO_ERRORTEST(tnn_loss_bprop(&t->l), ret); TNN_MACRO_ERRORTEST(tnn_machine_bprop(&t->m), ret); //Compute the accumulated regularization paramter TNN_MACRO_ERRORTEST(tnn_machine_get_param(&t->m, &p), ret); TNN_MACRO_ERRORTEST(tnn_reg_addd(&t->r, p->x, p->dx, t->lambda), ret); //Compute the parameter update TNN_MACRO_GSLTEST(gsl_blas_daxpy(-((tnn_trainer_class_nsgd*)t->c)->eta, p->dx, p->x)); //Set the titer parameter ((tnn_trainer_class_nsgd*)t->c)->titer = 1; return TNN_ERROR_SUCCESS; }
tnn_error tnn_module_fprop_sum(tnn_module *m){ tnn_state **t; //Routine check if(m->t != TNN_MODULE_TYPE_SUM){ return TNN_ERROR_MODULE_MISTYPE; } if(m->input->valid != true || m->output->valid != true){ return TNN_ERROR_STATE_INVALID; } //fprop to output TNN_MACRO_GSLTEST(gsl_blas_dscal(0.0, &m->output->x)); for(t = (tnn_state **)utarray_front(((tnn_module_sum*)m->c)->sarray); t != NULL; t = (tnn_state **)utarray_next(((tnn_module_sum*)m->c)->sarray, t)){ TNN_MACRO_GSLTEST(gsl_blas_daxpy(1.0, &(*t)->x, &m->output->x)); } return TNN_ERROR_SUCCESS; }
tnn_error tnn_loss_fprop_euclidean(tnn_loss *l){ gsl_vector *diff; double loss; //Routine check if(l->t != TNN_LOSS_TYPE_EUCLIDEAN){ return TNN_ERROR_LOSS_MISTYPE; } if(l->input1->valid != true || l->input2->valid != true || l->output->valid != true){ return TNN_ERROR_STATE_INVALID; } //Do the forward propagation if((diff = gsl_vector_alloc(l->input1->size)) == NULL){ return TNN_ERROR_GSL; } TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->x, diff)); TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, &l->input2->x, diff)); loss = gsl_blas_dnrm2(diff); gsl_vector_set(&l->output->x, 0, loss*loss); gsl_vector_free(diff); return TNN_ERROR_SUCCESS; }
tnn_error tnn_module_fprop_linear(tnn_module *m){ tnn_error ret; gsl_matrix w; //Routine check if(m->t != TNN_MODULE_TYPE_LINEAR){ return TNN_ERROR_MODULE_MISTYPE; } if(m->input->valid != true || m->output->valid != true || m->w.valid != true){ return TNN_ERROR_STATE_INVALID; } //Transform the matrix TNN_MACRO_ERRORTEST(tnn_numeric_v2m(&m->w.x, &w, m->output->size, m->input->size),ret); //Compute the result using BLAS TNN_MACRO_GSLTEST(gsl_blas_dgemv(CblasNoTrans, 1.0, &w, &m->input->x, 0.0, &m->output->x)); return TNN_ERROR_SUCCESS; }
//Train all the samples using naive stochastic gradient descent tnn_error tnn_trainer_class_train_nsgd(tnn_trainer_class *t, gsl_matrix *inputs, size_t *labels){ tnn_error ret; tnn_state *sin; tnn_param *p; gsl_vector *rd; gsl_vector *pw; gsl_vector_view in; gsl_vector_view lb; double eps; size_t i,j; //Routine check if(t->t != TNN_TRAINER_CLASS_TYPE_NSGD){ return TNN_ERROR_TRAINER_CLASS_MISTYPE; } //Check the input TNN_MACRO_ERRORTEST(tnn_machine_get_sin(&t->m, &sin),ret); if(inputs->size2 != sin->size){ return TNN_ERROR_STATE_INCOMP; } //Set the loss output dx to be 1 gsl_vector_set(&t->l.output->dx, 0, 1.0); //Get the parameter and allocate rd and pw TNN_MACRO_ERRORTEST(tnn_machine_get_param(&t->m, &p), ret); rd = gsl_vector_alloc(p->size); pw = gsl_vector_alloc(p->size); if(rd == NULL || pw == NULL){ return TNN_ERROR_GSL; } //Into the main loop for(eps = DBL_MAX, ((tnn_trainer_class_nsgd*)t->c)->titer = 0; eps > ((tnn_trainer_class_nsgd*)t->c)->epsilon && ((tnn_trainer_class_nsgd*)t->c)->titer < ((tnn_trainer_class_nsgd*)t->c)->niter; ((tnn_trainer_class_nsgd*)t->c)->titer = ((tnn_trainer_class_nsgd*)t->c)->titer + ((tnn_trainer_class_nsgd*)t->c)->eiter){ //Copy the previous pw TNN_MACRO_GSLTEST(gsl_blas_dcopy(p->x, pw)); for(i = 0; i < ((tnn_trainer_class_nsgd*)t->c)->eiter; i = i + 1){ j = (((tnn_trainer_class_nsgd*)t->c)->titer + i)%inputs->size1; //Check the label if(labels[j] >= t->lset->size1){ return TNN_ERROR_STATE_INCOMP; } //Get the inputs and label vector lb = gsl_matrix_row(t->lset, labels[j]); in = gsl_matrix_row(inputs, j); //Copy the data into the input/label and do forward and backward propagation TNN_MACRO_GSLTEST(gsl_blas_dcopy(&in.vector, &sin->x)); TNN_MACRO_GSLTEST(gsl_blas_dcopy(&lb.vector, &t->label->x)); TNN_MACRO_ERRORTEST(tnn_machine_fprop(&t->m), ret); TNN_MACRO_ERRORTEST(tnn_loss_fprop(&t->l), ret); TNN_MACRO_ERRORTEST(tnn_loss_bprop(&t->l), ret); TNN_MACRO_ERRORTEST(tnn_machine_bprop(&t->m), ret); //Compute the accumulated regularization paramter TNN_MACRO_ERRORTEST(tnn_reg_d(&t->r, p->x, rd), ret); TNN_MACRO_GSLTEST(gsl_blas_daxpy(t->lambda, rd, p->dx)); //Compute the parameter update TNN_MACRO_GSLTEST(gsl_blas_daxpy(-((tnn_trainer_class_nsgd*)t->c)->eta, p->dx, p->x)); } //Compute the 2 square norm of difference of p as eps TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, p->x, pw)); eps = gsl_blas_dnrm2(pw); } return TNN_ERROR_SUCCESS; }