예제 #1
0
void HLayeredBlWStructure::AtVkV( gsl_vector *u, long k, const gsl_matrix *A,
         const gsl_vector *v, gsl_vector *tmpVkV, double beta ) const {
  gsl_blas_dcopy(v, tmpVkV);
  gsl_blas_dtrmv(CblasLower, (k > 0 ? CblasNoTrans : CblasTrans),
                 CblasNonUnit, getWk(abs(k)), tmpVkV);
  gsl_blas_dgemv(CblasTrans, 1.0, A, tmpVkV, beta, u);       
}
double lda_m_step(lda* model, lda_suff_stats* ss) {
    int k, w;
    double lhood = 0;
    for (k = 0; k < model->ntopics; k++)
    {
        gsl_vector ss_k = gsl_matrix_column(ss->topics_ss, k).vector;
        gsl_vector log_p = gsl_matrix_column(model->topics, k).vector;
        if (LDA_USE_VAR_BAYES == 0)
        {
            gsl_blas_dcopy(&ss_k, &log_p);
            normalize(&log_p);
            vct_log(&log_p);
        }
        else
        {
            double digsum = sum(&ss_k)+model->nterms*LDA_TOPIC_DIR_PARAM;
            digsum = gsl_sf_psi(digsum);
            double param_sum = 0;
            for (w = 0; w < model->nterms; w++)
            {
                double param = vget(&ss_k, w) + LDA_TOPIC_DIR_PARAM;
                param_sum += param;
                double elogprob = gsl_sf_psi(param) - digsum;
                vset(&log_p, w, elogprob);
                lhood += (LDA_TOPIC_DIR_PARAM - param) * elogprob + gsl_sf_lngamma(param);
            }
            lhood -= gsl_sf_lngamma(param_sum);
        }
    }
    return(lhood);
}
예제 #3
0
double normal_null_maximum(gsl_vector *means,gsl_vector *s) {
  assert(means->size == s->size);
  
  // Baskara coefs.
  double a,b,c;

  gsl_vector *s2=gsl_vector_alloc(means->size);
  gsl_blas_dcopy(s,s2);
  gsl_vector_mul(s2,s2);

  gsl_vector *B=gsl_vector_alloc(means->size);
  gsl_blas_dcopy(means,B);
  gsl_blas_dscal(-2.0,B);
  //printf("B=%lg %lg\n",ELTd(B,0),ELTd(B,1));

  gsl_vector *C=gsl_vector_alloc(means->size);
  gsl_blas_dcopy(means,C);
  gsl_vector_mul(C,C);

  gsl_vector *prods=gsl_vector_alloc(means->size);
  mutual_prod(s2,prods);

  a=gsl_blas_dasum(prods);
  gsl_blas_ddot(B,prods,&b);
  gsl_blas_ddot(C,prods,&c);

  printf("null max: a=%lf b=%lf c=%lf\n",a,b,c);

  double delta=b*b-4*a*c;
  double x0=-b/(2.0*a);
  printf("null max: delta=%lg\n",delta);
  
  if (fabs(delta) < 1e-5) {
    return x0; 
  } else {
    if (delta > 0) {
      double x1=(-b-sqrt(delta))/(2.0*a);
      double x2=(-b-sqrt(delta))/(2.0*a);
      
      printf("null max: x1=%lg x2=%lg\n",x1,x2);
      return x1;
    } else {
      printf("WARNING: Null max not found!\n");
      return x0;
    }
  }
}
tnn_error tnn_module_bprop_bias(tnn_module *m){
  //Routine check
  if(m->t != TNN_MODULE_TYPE_BIAS){
    return TNN_ERROR_MODULE_MISTYPE;
  }
  if(m->input->valid != true || m->output->valid != true || m->w.valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //bprop to input
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&m->output->dx,  &m->input->dx));

  //bprop to dw
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&m->output->dx,  &m->w.dx));

  return TNN_ERROR_SUCCESS;
}
tnn_error tnn_loss_bprop_euclidean(tnn_loss *l){
  //Routine check
  if(l->t != TNN_LOSS_TYPE_EUCLIDEAN){
    return TNN_ERROR_LOSS_MISTYPE;
  }
  if(l->input1->valid != true || l->input2->valid != true || l->output->valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //bprop to input1 and input2 dx = dl 2 (x-y); dy = dl 2 (y-x)
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->x, &l->input1->dx));
  TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, &l->input2->x, &l->input1->dx));
  gsl_blas_dscal(2.0*gsl_vector_get(&l->output->dx, 0), &l->input1->dx);
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->dx, &l->input2->dx));
  gsl_blas_dscal(-1.0, &l->input2->dx);

  return TNN_ERROR_SUCCESS;
}
//Learn one sample using naive stochastic gradient descent
tnn_error tnn_trainer_class_learn_nsgd(tnn_trainer_class *t, gsl_vector *input, size_t label){
  tnn_error ret;
  tnn_state *sin;
  tnn_param *p;
  gsl_vector_view lb;

  //Routine check
  if(t->t != TNN_TRAINER_CLASS_TYPE_NSGD){
    return TNN_ERROR_TRAINER_CLASS_MISTYPE;
  }

  //Check the input and label
  TNN_MACRO_ERRORTEST(tnn_machine_get_sin(&t->m, &sin),ret);
  if(label >= t->lset->size1 || input->size != sin->size){
    return TNN_ERROR_STATE_INCOMP;
  }
  lb = gsl_matrix_row(t->lset, label);

  //Set the loss output dx to be 1
  gsl_vector_set(&t->l.output->dx, 0, 1.0);

  //Copy the data into the input/label and do forward and backward propagation
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(input, &sin->x));
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&lb.vector, &t->label->x));
  TNN_MACRO_ERRORTEST(tnn_machine_fprop(&t->m), ret);
  TNN_MACRO_ERRORTEST(tnn_loss_fprop(&t->l), ret);
  TNN_MACRO_ERRORTEST(tnn_loss_bprop(&t->l), ret);
  TNN_MACRO_ERRORTEST(tnn_machine_bprop(&t->m), ret);

  //Compute the accumulated regularization paramter
  TNN_MACRO_ERRORTEST(tnn_machine_get_param(&t->m, &p), ret);
  TNN_MACRO_ERRORTEST(tnn_reg_addd(&t->r, p->x, p->dx, t->lambda), ret);

  //Compute the parameter update
  TNN_MACRO_GSLTEST(gsl_blas_daxpy(-((tnn_trainer_class_nsgd*)t->c)->eta, p->dx, p->x));

  //Set the titer parameter
  ((tnn_trainer_class_nsgd*)t->c)->titer = 1;

  return TNN_ERROR_SUCCESS;
}
tnn_error tnn_module_bprop_sum(tnn_module *m){
  tnn_state **t;

  //Routine check
  if(m->t != TNN_MODULE_TYPE_SUM){
    return TNN_ERROR_MODULE_MISTYPE;
  }
  if(m->input->valid != true || m->output->valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //bprop to each input
  for(t = (tnn_state **)utarray_front(((tnn_module_sum*)m->c)->sarray);
      t != NULL;
      t = (tnn_state **)utarray_next(((tnn_module_sum*)m->c)->sarray, t)){
    TNN_MACRO_GSLTEST(gsl_blas_dcopy(&m->output->dx, &(*t)->dx));
  }

  return TNN_ERROR_SUCCESS;
}
tnn_error tnn_loss_fprop_euclidean(tnn_loss *l){
  gsl_vector *diff;
  double loss;

  //Routine check                                                                                                                                   
  if(l->t != TNN_LOSS_TYPE_EUCLIDEAN){
    return TNN_ERROR_LOSS_MISTYPE;
  }
  if(l->input1->valid != true || l->input2->valid != true || l->output->valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //Do the forward propagation
  if((diff = gsl_vector_alloc(l->input1->size)) == NULL){
    return TNN_ERROR_GSL;
  }
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->x, diff));
  TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, &l->input2->x, diff));
  loss = gsl_blas_dnrm2(diff);
  gsl_vector_set(&l->output->x, 0, loss*loss);
  gsl_vector_free(diff);

  return TNN_ERROR_SUCCESS;
}
예제 #9
0
 /**
  * C++ version of gsl_blas_dcopy().
  * @param X A vector
  * @param Y A vector
  * @return Error code on failure
  */
 int dcopy( vector const& X, vector& Y ){ return gsl_blas_dcopy( X.get(), Y.get() ); }
예제 #10
0
static VALUE rb_gsl_blas_dcopy(int argc, VALUE *argv, VALUE obj)
{
  gsl_vector *x = NULL, *y = NULL;
  get_vector2(argc, argv, obj, &x, &y);
  return INT2FIX(gsl_blas_dcopy(x, y));
}
예제 #11
0
static int
bundle_method_iterate (void *vstate, gsl_multimin_function_fsdf * fsdf, gsl_vector * x, double * f, 
                       gsl_vector * subgradient, gsl_vector * dx, double * eps)
{
	bundle_method_state_t *state = (bundle_method_state_t *) vstate;
	
	bundle_element *item;
	
	size_t i, debug=0;
	
	int status;
	double tmp_d, t_old, t_int_l; /* local variables */
	
	gsl_vector *y;		/* a trial point (the next iteration point by the serios step) */
	gsl_vector *sgr_y;	/* subgradient at y */
	double f_y;		/* the function value at y */
	
	gsl_vector *p;			/* the aggregate subgradient */
	double p_norm, lin_error_p;	/* norm of p, the aggregate linear. error */ 
	gsl_vector *tmp_v;
	
	/* data for the convex quadratic problem (for the dual problem) */
	gsl_vector *q;		/* elements of the array are the linearization errors */
	gsl_matrix *Q;		/* Q=G^T*G (G is matrix which collumns are subgradients) */
	gsl_vector *lambda;	/*  the convex combination coefficients of the subgradients (solution of the dual problem) */
	
	
	lambda = gsl_vector_alloc(state->bundle_size);
	if(lambda == 0)
	{
		GSL_ERROR_VAL ("failed to allocate workspace", GSL_ENOMEM, 0);
	}
	
	q = gsl_vector_alloc(lambda->size);
	if(q == 0)
	{
		gsl_vector_free(lambda);
		GSL_ERROR_VAL ("failed to allocate workspace", GSL_ENOMEM, 0);
	}
	
	y = gsl_vector_calloc(x->size);
	if(y == 0)
	{
		gsl_vector_free(q);
		gsl_vector_free(lambda);
		GSL_ERROR_VAL ("failed to allocate workspace", GSL_ENOMEM, 0);
	}
	
	sgr_y = gsl_vector_calloc(x->size);
	if(sgr_y == 0)
	{
		gsl_vector_free(y);
		gsl_vector_free(q);
		gsl_vector_free(lambda);
		GSL_ERROR_VAL ("failed to allocate workspace", GSL_ENOMEM, 0);
	}
	
	Q = gsl_matrix_alloc(state->bundle_size, state->bundle_size);
	if(Q == 0)
	{
		gsl_vector_free(sgr_y);
		gsl_vector_free(y);
		gsl_vector_free(q);
		gsl_vector_free(lambda);
		GSL_ERROR_VAL ("failed to allocate workspace", GSL_ENOMEM, 0);
	}
	
	p = gsl_vector_calloc(x->size);
	if(p == 0)
	{
		gsl_matrix_free(Q);
		gsl_vector_free(sgr_y);
		gsl_vector_free(y);
		gsl_vector_free(q);
		gsl_vector_free(lambda);
		GSL_ERROR_VAL ("failed to allocate workspace", GSL_ENOMEM, 0);
	}
	
	tmp_v = gsl_vector_calloc(x->size);
	if(tmp_v == 0)
	{
		gsl_vector_free(p);
		gsl_matrix_free(Q);
		gsl_vector_free(sgr_y);
		gsl_vector_free(y);
		gsl_vector_free(q);
		gsl_vector_free(lambda);
		GSL_ERROR_VAL ("failed to allocate workspace", GSL_ENOMEM, 0);
	}
	
	/* solve the dual problem */
	status = build_cqp_data(state, Q, q);
	
	status = solve_qp_pdip(Q, q, lambda);	
	
	gsl_matrix_free(Q);
	gsl_vector_free(q);
	
	
	/* compute the aggregate subgradient (it is called p in the documantation)*/
	/* and the appropriated linearization error */
	
	lin_error_p = 0.0;
	item = state->head;
	for(i=0; i<lambda->size; i++)
	{
		status = gsl_blas_daxpy(gsl_vector_get(lambda,i), item->sgr, p);
		lin_error_p += gsl_vector_get(lambda,i)*(item->lin_error);
		
		item = item->next;
	}
	
	
	if(debug)
	{
		printf("the dual problem solution:\n");
		for(i=0;i<lambda->size;i++)
			printf("%7.6e ",gsl_vector_get(lambda,i));
		printf("\n\n");
		
		printf("the aggregate subgradient: \n");
		for(i=0;i<p->size;i++)
			printf("%.6e ",gsl_vector_get(p,i));
		printf("\n");
		
		printf("lin. error for aggr subgradient = %e\n",lin_error_p);
	}
	
	/* the norm of the aggr subgradient */
	p_norm = gsl_blas_dnrm2(p);
		
	/* search direction dx=-t*p (t is the length of step) */
	status = gsl_vector_memcpy(dx,p);
	status = gsl_vector_scale(dx,-1.0*state->t);
	
	
	/* v =-t*norm(p)^2-alpha_p */
	state->v = -gsl_pow_2(p_norm)*(state->t)-lin_error_p;
	
	/* the subgradient is the aggegate sungradient */
	status = gsl_blas_dcopy(p,subgradient);
		
	/* iteration step */	
	/* y=x+dx */
	status = gsl_blas_dcopy(dx,y);
	status = gsl_blas_daxpy(1.0,x,y);
	
	/* function value at y */
	f_y = GSL_MULTIMIN_FN_EVAL_F(fsdf, y);
	
	state->f_eval++;
	
	/* for t-update */
	if(!state->fixed_step_length)
	{
		t_old = state->t;
		if(fabs(state->v-(f_y-*f)) < state->rg || state->v-(f_y-*f) > state->rg)
			t_int_l = state->t_max;
		else
			t_int_l = 0.5*t_old*(state->v)/(state->v-(f_y-*f));
	}
	else
	{
		t_old = state->t;
		t_int_l = state->t;
	}
	
	
	if( f_y-*f <= state->m_ss*state->v ) /* Serious-Step */
	{
		
		if(debug)
			printf("\nSerious-Step\n");
		
		/* the relaxation step */
		if(state->relaxation)
		{
			if(f_y-*f <= state->v*state->m_rel)
			{
				double f_z;
			
				gsl_vector * z = gsl_vector_alloc(y->size);
			
				/* z = y+dx = x+2*dx */
				status = gsl_blas_dcopy(x,z);
				status = gsl_blas_daxpy(2.0,dx,z);
			
				f_z = GSL_MULTIMIN_FN_EVAL_F(fsdf, z);
				state->f_eval++;
				
				if(0.5*f_z-f_y+0.5*(*f) > state->rg)
					state->rel_parameter = GSL_MIN_DBL(-0.5*(-0.5*f_z+2.0*f_y-1.5*(*f))/(0.5*f_z-f_y+0.5*(*f)),1.999);
				else if (fabs(0.5*f_z-f_y+0.5*(*f)) > state->rg)
					state->rel_parameter = 1.999;
				else
					/* something is wrong */
					state->rel_parameter = 1.0;
								
				
				/* save the old iteration point */
				status = gsl_blas_dcopy(y,z);
				
				/* y = (1-rel_parameter)*x+rel_parameter*y */
				gsl_blas_dscal(state->rel_parameter,y);
				status = gsl_blas_daxpy(1.0-state->rel_parameter,x,y);
				
				/* f(y) und sgr_f(y) */
				tmp_d = GSL_MULTIMIN_FN_EVAL_F(fsdf, y);
				state->f_eval++;
				if(tmp_d > f_y)
				{
					/* keep y as the current point */
					status = gsl_blas_dcopy(z,y);
					
					state->rel_counter++;	
					
				}				
				else
				{
					f_y = tmp_d;
					/* dx = y-x */
					status = gsl_blas_dcopy(y,dx);
					status = gsl_blas_daxpy(-1.0,x,dx);
					
					/* if iteration points bevor and after the rel. step are closly,
					the rel_step counte will be increased */
					/* |1-rel_parameter| <= 0.1*/
					if( fabs(1.0-state->rel_parameter) < 0.1)
						state->rel_counter++;	
				}
				
				
				GSL_MULTIMIN_FN_EVAL_SDF(fsdf, y, sgr_y);
				state->sgr_eval++;
				
				if(state->rel_counter > state->rel_counter_max)
					state->relaxation = 0;
				
				/* */
				status = gsl_blas_daxpy(-1.0,y,z);
				status = gsl_blas_ddot(p, z, &tmp_d);
				*eps = f_y-*f-(state->v)+tmp_d;
				
				gsl_vector_free(z);
			}
			else
			{
				*eps = f_y-(state->v)-*f;
				GSL_MULTIMIN_FN_EVAL_SDF(fsdf, y, sgr_y);
				state->sgr_eval++;
			}
		}
		else
		{
			*eps = f_y-(state->v)-*f;
			
			GSL_MULTIMIN_FN_EVAL_SDF(fsdf, y, sgr_y);
			state->sgr_eval++;
		}
		
		/* calculate linearization errors at new iteration point  */
		item = state->head;
		for(i=0; i<state->bundle_size; i++)
		{
			status = gsl_blas_ddot(item->sgr, dx, &tmp_d);
			item->lin_error += f_y-*f-tmp_d;
			
			item = item->next;
		}
		
		/*  linearization error at new iteration point  */
		status = gsl_blas_ddot(p, dx, &tmp_d);
		lin_error_p += f_y-*f-tmp_d;
		
		/* update the bundle  */
		status = update_bundle(state, sgr_y, 0.0, lambda, p, lin_error_p, 1);
		
		/* adapt the step length */
		if(!state->fixed_step_length)
		{
			if(f_y-*f <= state->v*state->m_t && state->step_counter > 0)
				state->t = t_int_l;
			else if(state->step_counter>3)
				state->t=2.0*t_old;
		
			state->t = GSL_MIN_DBL(GSL_MIN_DBL(state->t,10.0*t_old),state->t_max);
			/*state->eps_v = GSL_MAX_DBL(state->eps_v,-2.0*state->v);*/
		
			state->step_counter = GSL_MAX_INT(state->step_counter+1,1);
				
			if(fabs(state->t-t_old) > state->rg) 
				state->step_counter=1;
		}
		
		
		/* x=y, f=f(y) */
		status = gsl_blas_dcopy(y,x);
		*f = f_y;
	 
		
	}
	else /* Null-Step */
	{	
		
		if(debug)
		  printf("\nNull-Step\n");
		
		GSL_MULTIMIN_FN_EVAL_SDF(fsdf, y, sgr_y);
		state->sgr_eval++;
		
		/* eps for the eps_subdifferential */
		*eps = lin_error_p;
		
		/*calculate the liniarization error at y */
		status = gsl_blas_ddot(sgr_y,dx,&tmp_d);
		tmp_d += *f-f_y;
		
		/* Bundle update */
		status = update_bundle(state, sgr_y, tmp_d, lambda, p, lin_error_p, 0);
		
		/* adapt the step length */
		if(!state->fixed_step_length)
		{
			/*state->eps_v = GSL_MIN_DBL(state->eps_v,lin_error_p);*/
		
			if(tmp_d > GSL_MAX_DBL(p_norm,lin_error_p) && state->step_counter < -1)
				state->t = t_int_l;
			else if(state->step_counter < -3)
				state->t = 0.5*t_old;
		
			state->t = GSL_MAX_DBL(GSL_MAX_DBL(0.1*t_old,state->t),state->t_min);
		
			state->step_counter = GSL_MIN_INT(state->step_counter-1,-1);
				
			if(fabs(state->t-t_old) > state->rg) 
				state->step_counter = -1;
		}

		
	}
	
	
	state->lambda_min = p_norm * state->lm_accuracy;

	if(debug)
	{  
	  
	  printf("\nthe new bundle:\n");
	  bundle_out_liste(state);
  
	  printf("\n\n");
	
	  printf("the curent itarationspoint (1 x %d)\n",x->size);
	  for(i=0;i<x->size;i++)
		  printf("%12.6f ",gsl_vector_get(x,i)); 
	  printf("\n\n");	
	
	  printf("functions value at current point: f=%.8f\n",*f);
	
	  printf("\nstep length t=%.5e\n",state->t);
	  
	  printf("\nstep_counter sc=%d\n",state->step_counter);
	
	  printf("\naccuracy: v=%.5e\n",state->v);
	
	  printf("\nlambda_min=%e\n",state->lambda_min);
  
	  printf("\n");
	}
	
	gsl_vector_free(lambda);
	gsl_vector_free(y);
	gsl_vector_free(sgr_y);
	gsl_vector_free(p);
	
	return GSL_SUCCESS;
}
예제 #12
0
static int
update_bundle(bundle_method_state_t *state, const gsl_vector *new_sgr, const double lin_error_sgr, const gsl_vector *lambda,
	      const gsl_vector *aggr_sgr, const double lin_error_aggr_sgr, const size_t serious_step)
{
	bundle_element *current;
	bundle_element *item;
	bundle_element *item_aggr_sgr;
	bundle_element *item_largest_lin_error;
	
	size_t i;
	
	int status;

	
	/* at first: drop all inactive bundle elements, i.e. elements with lambda(j)=0, because they do not contribute to the trial point y */
	/* second: drop the bundle elemen (if necessary ) with the largest linearization error */
	current = state->head;
	item_largest_lin_error = NULL;
	item_aggr_sgr = NULL;
	
	for(i=0; i<lambda->size; i++)
	{
		if(!serious_step && current->state == 2)
		{
			item_aggr_sgr = current;
			current = current->next;
			
		}
		else if((fabs(gsl_vector_get(lambda,i))<state->lambda_min && current->state != 0) || (serious_step && current->state == 2))
		{
			item = current;
			current = current->next;
			
			status = remove_element(item, &(state->head), &(state->tail));
			
			(state->bundle_size)--;
			
		}
		else 
		{
			if(item_largest_lin_error == NULL || fabs(current->lin_error) > fabs(item_largest_lin_error->lin_error))
				item_largest_lin_error = current;
			
			if (current->state ==0 && serious_step)
				current->state = 1;
			
			current = current->next;
			
			
		}
	}
	
	if(state->bundle_size >= state->bundle_size_max)
	{
		status = remove_element(item_largest_lin_error, &(state->head), &(state->tail));
		(state->bundle_size)--;
		
		if(state->bundle_size >= state->bundle_size_max-1 && !serious_step && item_aggr_sgr == NULL)
		{
			if(state->head->state != 0)
				item_largest_lin_error = state->head;
			else
				item_largest_lin_error = state->head->next;
			
			for(item = item_largest_lin_error->next; item != NULL; item=item->next)
			{
				if(fabs(item->lin_error) > fabs(item_largest_lin_error->lin_error) && item->state != 0)
					item_largest_lin_error = item;
			}
			
			status = remove_element(item_largest_lin_error, &(state->head), &(state->tail));
			(state->bundle_size)--;
		}
	}
	
	
	/* add the new element to the bundle */
	item = malloc(sizeof(bundle_element));
	if (item == 0)
	{
		GSL_ERROR ("failed to allocate space for a new bundle element", GSL_ENOMEM);
	}
	
	item->sgr = gsl_vector_alloc(new_sgr->size);
	if (item->sgr == 0)
	{
		free(item);
		GSL_ERROR ("failed to allocate space for a subgradient in the new bundle element", GSL_ENOMEM);
	}
	
	status = gsl_blas_dcopy(new_sgr,item->sgr);
	item->lin_error = lin_error_sgr;
	
	if(serious_step)
		item->state = 0;
	else
		item->state = 1;
	
	status = insert_element(item, &(state->head), &(state->tail));
	
	(state->bundle_size)++;
	
	
	if(!serious_step)
	{
		if(item_aggr_sgr == NULL)
		{
			item = malloc(sizeof(bundle_element));
			if (item == 0)
			{
				GSL_ERROR ("failed to allocate space for a new bundle element", GSL_ENOMEM);
			}
	
			item->sgr = gsl_vector_alloc(new_sgr->size);
			if (item->sgr == 0)
			{
				free(item);
				GSL_ERROR ("failed to allocate space for a subgradient in the new bundle element", GSL_ENOMEM);
			}
	
			status = gsl_blas_dcopy(aggr_sgr,item->sgr);
			item->lin_error = lin_error_aggr_sgr;
			item->state = 2;
	
			status = insert_element(item, &(state->head), &(state->tail));
	
			(state->bundle_size)++;
		}
		else
		{
			status = gsl_blas_dcopy(aggr_sgr,item_aggr_sgr->sgr);
			item_aggr_sgr->lin_error = lin_error_aggr_sgr;
		}
	}
	
	return GSL_SUCCESS;

} 
예제 #13
0
static int
bundle_method_set(void *vstate, gsl_multimin_function_fsdf * fsdf,
                  const gsl_vector * x, double *f, gsl_vector * subgradient,
                  double *eps, size_t bundle_size_max)
{
	bundle_method_state_t *state = (bundle_method_state_t *) vstate;
	
	int status, debug=0;
	
	state->rg = 1.0e-20;
	
	/* initialize the first bundle element with a subgradient at the start point,
	the lin. error at this point (it is of course zero),
	the state of the bundle elemet is seted to 0 (current iteration point) */
	GSL_MULTIMIN_FN_EVAL_F_SDF (fsdf, x, f, subgradient);
	
	status = gsl_blas_dcopy(subgradient, state->head->sgr);
	state->head->lin_error = 0.0;
	state->head->state = 0;
	
	*eps = 0.0;
	
	state->f_eval = 1;
	state->sgr_eval = 1;
	
	state->serious_step = 0;
	
	if(debug)
	{
		size_t i;
		printf("\n the start point:\n");
		for(i=0; i<x->size; i++)
			printf("%12.8f ", gsl_vector_get(x,i));
		printf("\n");
		printf("the function value at the start point:   f=%f\n",*f);
		printf("\n the subgradient at the start point:\n");
		for(i=0; i<subgradient->size; i++)
			printf("%12.8f ", gsl_vector_get(subgradient,i));
		printf("\n");
	}
	
	
	/* the initial length step, and lower and upper bound for t */
	state->fixed_step_length = 0;
	
	if(state->fixed_step_length)
	{
		state->t_max = 1;
		state->t = 1;
		state->t_min = 1;
	}
	else
	{
		state->t_max = 1.0e+10;
	
		if(gsl_blas_dnrm2(subgradient) > state->rg)
			state->t = 1.0/gsl_blas_dnrm2(subgradient);
		else
			state->t = state->t_max;
	
		state->t_min = (state->t)*(1.0e-10);
	}
	
	
	state->step_counter = 0;
	
	state->m_ss  = 1.0e-1;
	state->m_t   = 5.0e-1;
	state->m_rel = 3.0e-1;
	
	
	if(bundle_size_max < 3)
	{
		GSL_ERROR ("the maximal number of bundle elements must be greater or equal to 3", GSL_ENOMEM);
	}
	
	state->bundle_size_max = bundle_size_max; 
	state->bundle_size = 1;
	
	state->lm_accuracy = 1.0e-12;
	state->lambda_min = (1.0/(state->t))*state->lm_accuracy;
	
	state->relaxation = 1;
	state->fixed_relaxation = 0;
	state->rel_parameter = 1.2;
	state->rel_counter = 0;
	state->rel_counter_max = 3;
	
	
	return GSL_SUCCESS;
}
//Train all the samples using naive stochastic gradient descent
tnn_error tnn_trainer_class_train_nsgd(tnn_trainer_class *t, gsl_matrix *inputs, size_t *labels){
  tnn_error ret;
  tnn_state *sin;
  tnn_param *p;
  gsl_vector *rd;
  gsl_vector *pw;
  gsl_vector_view in;
  gsl_vector_view lb;
  double eps;
  size_t i,j;

  //Routine check
  if(t->t != TNN_TRAINER_CLASS_TYPE_NSGD){
    return TNN_ERROR_TRAINER_CLASS_MISTYPE;
  }

  //Check the input
  TNN_MACRO_ERRORTEST(tnn_machine_get_sin(&t->m, &sin),ret);
  if(inputs->size2 != sin->size){
    return TNN_ERROR_STATE_INCOMP;
  }

  //Set the loss output dx to be 1
  gsl_vector_set(&t->l.output->dx, 0, 1.0);

  //Get the parameter and allocate rd and pw
  TNN_MACRO_ERRORTEST(tnn_machine_get_param(&t->m, &p), ret);
  rd = gsl_vector_alloc(p->size);
  pw = gsl_vector_alloc(p->size);
  if(rd == NULL || pw == NULL){
    return TNN_ERROR_GSL;
  }

  //Into the main loop
  for(eps = DBL_MAX, ((tnn_trainer_class_nsgd*)t->c)->titer = 0;
      eps > ((tnn_trainer_class_nsgd*)t->c)->epsilon && ((tnn_trainer_class_nsgd*)t->c)->titer < ((tnn_trainer_class_nsgd*)t->c)->niter;
      ((tnn_trainer_class_nsgd*)t->c)->titer = ((tnn_trainer_class_nsgd*)t->c)->titer + ((tnn_trainer_class_nsgd*)t->c)->eiter){

    //Copy the previous pw
    TNN_MACRO_GSLTEST(gsl_blas_dcopy(p->x, pw));

    for(i = 0; i < ((tnn_trainer_class_nsgd*)t->c)->eiter; i = i + 1){

      j = (((tnn_trainer_class_nsgd*)t->c)->titer + i)%inputs->size1;

      //Check the label
      if(labels[j] >= t->lset->size1){
	return TNN_ERROR_STATE_INCOMP;
      }

      //Get the inputs and label vector
      lb = gsl_matrix_row(t->lset, labels[j]);
      in = gsl_matrix_row(inputs, j);

      //Copy the data into the input/label and do forward and backward propagation
      TNN_MACRO_GSLTEST(gsl_blas_dcopy(&in.vector, &sin->x));
      TNN_MACRO_GSLTEST(gsl_blas_dcopy(&lb.vector, &t->label->x));
      TNN_MACRO_ERRORTEST(tnn_machine_fprop(&t->m), ret);
      TNN_MACRO_ERRORTEST(tnn_loss_fprop(&t->l), ret);
      TNN_MACRO_ERRORTEST(tnn_loss_bprop(&t->l), ret);
      TNN_MACRO_ERRORTEST(tnn_machine_bprop(&t->m), ret);

      //Compute the accumulated regularization paramter
      TNN_MACRO_ERRORTEST(tnn_reg_d(&t->r, p->x, rd), ret);
      TNN_MACRO_GSLTEST(gsl_blas_daxpy(t->lambda, rd, p->dx));

      //Compute the parameter update
      TNN_MACRO_GSLTEST(gsl_blas_daxpy(-((tnn_trainer_class_nsgd*)t->c)->eta, p->dx, p->x));
    }

    //Compute the 2 square norm of difference of p as eps
    TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, p->x, pw));
    eps = gsl_blas_dnrm2(pw);
  }
  
  return TNN_ERROR_SUCCESS;
}