Exemplo n.º 1
0
// Do an iteration of stochastic gradient descent and return the post-update objective function
double stochastic_gradient_descent(double *w, double **features, int *grades, int *num_updates, int num_samples, int num_features) {
  double *scores = (double *)malloc(num_samples * sizeof(double));
  int sample_ind, other_sample_ind;
  double t0;
  double step_size, base_step_size;
  
  t0 = pow(num_samples, 2);
  base_step_size = C*ETA;
  step_size = base_step_size / t0;
  

  for (sample_ind=0; sample_ind<num_samples; ++sample_ind) {
    scores[sample_ind] = dot_product(w, features[sample_ind], num_features);
    for (other_sample_ind=0; other_sample_ind<sample_ind; ++other_sample_ind) {
      if (grades[sample_ind] < grades[other_sample_ind] && scores[sample_ind]+1 > scores[other_sample_ind]) {
        // step_size = base_step_size / (t0 + *num_updates);
        vec_assign(w, w, 1-ETA/t0, num_features);
        vec_add(w, features[sample_ind], -step_size, num_features);
        vec_add(w, features[other_sample_ind], step_size, num_features);
        ++(*num_updates);
      } else if (grades[sample_ind] > grades[other_sample_ind] && scores[sample_ind] < 1+scores[other_sample_ind]) {
        // step_size = base_step_size / (t0 + *num_updates);
        vec_assign(w, w, 1-ETA/t0, num_features);
        vec_add(w, features[sample_ind], step_size, num_features);
        vec_add(w, features[other_sample_ind], -step_size, num_features);
        ++(*num_updates);
      }
      else { vec_assign(w, w, 1-ETA/t0, num_features); }
    }
  }
  free(scores);
  
  return compute_objective(w, features, grades, num_samples, num_features);
}
Exemplo n.º 2
0
// Do an iteration of gradient descent and return the post-update objective function.
// Possibly update the optimal step size in place.
double gradient_descent(double *w, double *step_size, double **features, int *grades, int num_samples, int num_features) {
  double *scores = (double *)malloc(num_samples * sizeof(double));
  double *grad = (double *)malloc(num_features * sizeof(double));
  int sample_ind, other_sample_ind;
  double slack_coeff = C / pow(num_samples, 2);
  
  vec_assign(grad, w, 1, num_features);

  for (sample_ind=0; sample_ind<num_samples; ++sample_ind) {
    scores[sample_ind] = dot_product(w, features[sample_ind], num_features);
    for (other_sample_ind=0; other_sample_ind<sample_ind; ++other_sample_ind) {
      if (grades[sample_ind] < grades[other_sample_ind] && scores[sample_ind]+1 > scores[other_sample_ind]) {
        vec_add(grad, features[sample_ind], slack_coeff, num_features);
        vec_add(grad, features[other_sample_ind], -slack_coeff, num_features);
      } else if (grades[sample_ind] > grades[other_sample_ind] && scores[sample_ind] < 1+scores[other_sample_ind]) {
        vec_add(grad, features[sample_ind], -slack_coeff, num_features);
        vec_add(grad, features[other_sample_ind], slack_coeff, num_features);
      }
    }
  }

  // vec_add(w, grad, -step_size, num_features);

  free(scores);
  free(grad);  
  
  // return compute_objective(w, features, grades, num_samples, num_features);
  return take_gradient_step(w, grad, step_size, features, grades, num_samples, num_features);
}
Exemplo n.º 3
0
Need ompcore(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L,
                 int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp)
{
  
  profdata pd;
  /* mxArray *Gamma;*/
  mwIndex i, j, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count;
  mwSize allocated_coefs, allocated_cols;
  int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms,*times_atoms ;
  double *alpha, *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2; 
  double eps2, resnorm, delta, deltaprev, secs_remain;
  int mins_remain, hrs_remain;
  clock_t lastprint_time, starttime;

  Need my;
  
  /*** status flags ***/
  
  DtX_specified = (DtX!=0);   /* indicates whether D'*x was provided */
  XtX_specified = (XtX!=0);   /* indicates whether sum(x.*x) was provided */
  
  standardomp = (G==0);       /* batch-omp or standard omp are selected depending on availability of G */
  batchomp = !standardomp;
  
  
  
  /*** allocate output matrix ***/
  
  
  if (gamma_mode == FULL_GAMMA) {
    
    /* allocate full matrix of size m X L */
    
   Gamma = mxCreateDoubleMatrix(m, L, mxREAL);
    gammaPr = mxGetPr(Gamma);
    gammaIr = 0;
    gammaJc = 0;
  }
  else {
    
    /* allocate sparse matrix with room for allocated_coefs nonzeros */
    
    /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */
    allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T;
    Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL);
    gammaPr = mxGetPr(Gamma);
    gammaIr = mxGetIr(Gamma);
    gammaJc = mxGetJc(Gamma);
    gamma_count = 0;
    gammaJc[0] = 0;
  }
  
  
  /*** helper arrays ***/
  
  alpha = (double*)mxMalloc(m*sizeof(double));        /* contains D'*residual */
  ind = (mwIndex*)mxMalloc(n*sizeof(mwIndex));        /* indices of selected atoms */
  selected_atoms = (int*)mxMalloc(m*sizeof(int));     /* binary array with 1's for selected atoms */
  times_atoms = (int*)mxMalloc(m*sizeof(int)); 
  c = (double*)mxMalloc(n*sizeof(double));            /* orthogonal projection result */
  
  /* current number of columns in Dsub / Gsub / Lchol */
  allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T;
  
  /* Cholesky decomposition of D_I'*D_I */
  Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double));

  /* temporary vectors for various computations */
  tempvec1 = (double*)mxMalloc(m*sizeof(double));
  tempvec2 = (double*)mxMalloc(m*sizeof(double));
  
  if (batchomp) {
    /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */
    Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double));
  }
  else {
    /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */
    Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double));
    
    /* stores the residual */
    r = (double*)mxMalloc(n*sizeof(double));        
  }
  
  if (!DtX_specified) {
    /* contains D'*x for the current signal */
    DtX = (double*)mxMalloc(m*sizeof(double));  
  }
  
  
  
  /*** initializations for error omp ***/
  
  if (erroromp) {
    eps2 = eps*eps;        /* compute eps^2 */
    if (T<0 || T>n) {      /* unspecified max atom num - set max atoms to n */
      T = n;
    }
  }
  
  
  
  /*** initialize timers ***/
  
  initprofdata(&pd);             /* initialize profiling counters */
  starttime = clock();           /* record starting time for eta computations */
  lastprint_time = starttime;    /* time of last status display */
  
  
  
  /**********************   perform omp for each signal   **********************/
  
  
  
  for (signum=0; signum<L; ++signum) {
    
    
    /* initialize residual norm and deltaprev for error-omp */
    
    if (erroromp) {
      if (XtX_specified) {
        resnorm = XtX[signum];
      }
      else {
        resnorm = dotprod(x+n*signum, x+n*signum, n);
        addproftime(&pd, XtX_TIME);
      }
      deltaprev = 0;     /* delta tracks the value of gamma'*G*gamma */
    }
    else {
      /* ignore residual norm stopping criterion */
      eps2 = 0;
      resnorm = 1;
    }
    
    
    if (resnorm>eps2 && T>0) {
      
      /* compute DtX */
      
      if (!DtX_specified) {
        matT_vec(1, D, x+n*signum, DtX, n, m);
        addproftime(&pd, DtX_TIME);
      }
      
      
      /* initialize alpha := DtX */
      
      memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double));
      
      
      /* mark all atoms as unselected */
      
      for (i=0; i<m; ++i) {
        selected_atoms[i] = 0;
      }
	   for (i=0; i<m; ++i) {
        times_atoms[i] = 0;
      }
      
    }
    

    /* main loop */
    
    i=0;
    while (resnorm>eps2 && i<T) {

      /* index of next atom */
      
      pos = maxabs(alpha, m);
      addproftime(&pd, MAXABS_TIME);
      
      
      /* stop criterion: selected same atom twice, or inner product too small */
      
      if (selected_atoms[pos] || alpha[pos]*alpha[pos]<1e-14) {
        break;
      }
      
      
      /* mark selected atom */
      
      ind[i] = pos;
      selected_atoms[pos] = 1;
	  times_atoms[pos]++;
      
      
      /* matrix reallocation */
      
      if (erroromp && i>=allocated_cols) {
        
        allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01);
        
        Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double));
        
        batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) :
                   (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ;
      }
      
      
      /* append column to Gsub or Dsub */
      
      if (batchomp) {
        memcpy(Gsub+i*m, G+pos*m, m*sizeof(double));
      }
      else {
        memcpy(Dsub+i*n, D+pos*n, n*sizeof(double));
      }
      
      
      /*** Cholesky update ***/
      
      if (i==0) {
        *Lchol = 1;
      }
      else {
        
        /* incremental Cholesky decomposition: compute next row of Lchol */
        
        if (standardomp) {
          matT_vec(1, Dsub, D+n*pos, tempvec1, n, i);      /* compute tempvec1 := Dsub'*d where d is new atom */
          addproftime(&pd, DtD_TIME);
        }
        else {
          vec_assign(tempvec1, Gsub+i*m, ind, i);          /* extract tempvec1 := Gsub(ind,i) */
        }
        backsubst('L', Lchol, tempvec1, tempvec2, n, i);   /* compute tempvec2 = Lchol \ tempvec1 */
        for (j=0; j<i; ++j) {                              /* write tempvec2 to end of Lchol */
          Lchol[j*n+i] = tempvec2[j];
        }
        
        /* compute Lchol(i,i) */
        sum = 0;
        for (j=0; j<i; ++j) {         /* compute sum of squares of last row without Lchol(i,i) */
          sum += SQR(Lchol[j*n+i]);
        }
        if ( (1-sum) <= 1e-14 ) {     /* Lchol(i,i) is zero => selected atoms are dependent */
          break;
        }
        Lchol[i*n+i] = sqrt(1-sum);
      }
      
      addproftime(&pd, LCHOL_TIME);

      i++;
      
      
      /* perform orthogonal projection and compute sparse coefficients */
      
      vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i);   /* extract tempvec1 = DtX(ind) */
      cholsolve('L', Lchol, tempvec1, c, n, i);                     /* solve LL'c = tempvec1 for c */
      addproftime(&pd, COMPCOEF_TIME);
      

      /* update alpha = D'*residual */
      
      if (standardomp) {
        mat_vec(-1, Dsub, c, r, n, i);             /* compute r := -Dsub*c */
        vec_sum(1, x+n*signum, r, n);              /* compute r := x+r */
        
        
        /*memcpy(r, x+n*signum, n*sizeof(double));   /* assign r := x */
        /*mat_vec1(-1, Dsub, c, 1, r, n, i);         /* compute r := r-Dsub*c */
        
        addproftime(&pd, COMPRES_TIME);
        matT_vec(1, D, r, alpha, n, m);            /* compute alpha := D'*r */
        addproftime(&pd, DtR_TIME);
        
        /* update residual norm */
        if (erroromp) {
          resnorm = dotprod(r, r, n);
          addproftime(&pd, UPDATE_RESNORM_TIME);
        }
      }
      else {
        mat_vec(1, Gsub, c, tempvec1, m, i);                              /* compute tempvec1 := Gsub*c */
        memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double));    /* set alpha = D'*x */
        vec_sum(-1, tempvec1, alpha, m);                                  /* compute alpha := alpha - tempvec1 */
        addproftime(&pd, UPDATE_DtR_TIME);
        
        /* update residual norm */
        if (erroromp) {
          vec_assign(tempvec2, tempvec1, ind, i);      /* assign tempvec2 := tempvec1(ind) */
          delta = dotprod(c,tempvec2,i);               /* compute c'*tempvec2 */
          resnorm = resnorm - delta + deltaprev;       /* residual norm update */
          deltaprev = delta;
          addproftime(&pd, UPDATE_RESNORM_TIME);
        }
      }
    }
    
    
    /*** generate output vector gamma ***/

    if (gamma_mode == FULL_GAMMA) {    /* write the coefs in c to their correct positions in gamma */
      for (j=0; j<i; ++j) {
        gammaPr[m*signum + ind[j]] = c[j];
      }
    }
    else {
      /* sort the coefs by index before writing them to gamma */
      quicksort(ind,c,i);
      addproftime(&pd, INDEXSORT_TIME);
      
      /* gamma is full - reallocate */
      if (gamma_count+i >= allocated_coefs) {
        
        while(gamma_count+i >= allocated_coefs) {
          allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01);
        }
        
        mxSetNzmax(Gamma, allocated_coefs);
        mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double)));
        mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex)));
        
        gammaPr = mxGetPr(Gamma);
        gammaIr = mxGetIr(Gamma);
      }
      
      /* append coefs to gamma and update the indices */
      for (j=0; j<i; ++j) {
        gammaPr[gamma_count] = c[j];
        gammaIr[gamma_count] = ind[j];
        gamma_count++;
      }
      gammaJc[signum+1] = gammaJc[signum] + i;
    }
    
    
    
    /*** display status messages ***/
    
    if (msg_delta>0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta)
    {
      lastprint_time = clock();
      
      /* estimated remainig time */
      secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) ,
        &hrs_remain, &mins_remain, &secs_remain);
      
      mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n",        
        signum+1, L, hrs_remain, mins_remain, secs_remain);
      mexEvalString("drawnow;");
    }
    
  }
  
  /* end omp */
  
  
  
  /*** print final messages ***/
  
  if (msg_delta>0) {
    mexPrintf("omp: signal %d / %d\n", signum, L);
  }
  
  if (profile) {
    printprofinfo(&pd, erroromp, batchomp, L);
  }
  
  
  
  /* free memory */
  
  if (!DtX_specified) {
    mxFree(DtX);
  }
  if (standardomp) {
    mxFree(r);
    mxFree(Dsub);
  }
  else {
    mxFree(Gsub);
  }  
  mxFree(tempvec2);
  mxFree(tempvec1);
  mxFree(Lchol);
  mxFree(c);
  mxFree(selected_atoms);
  mxFree(ind);
  mxFree(alpha);

  my.qGamma=Gamma;
  my.qtimes__atoms=times__atoms;
  
  /*return Gamma;*/
  return  my;
 
}
Exemplo n.º 4
0
// Take a well-sized step in the specified gradient direction and return the final objective value
double take_gradient_step(double *w, double *grad, double *eta, double **features, int *grades, int num_samples, int num_features) {
  double *new_w = (double *)malloc(num_samples * sizeof(double));
  double obj_0, obj_1, obj_2, final_obj;
  double f_p, f_pp;
  double step_size;
  
  vec_assign(new_w, w, 1, num_features);
  obj_0 = compute_objective(new_w, features, grades, num_samples, num_features);
  
  vec_add(new_w, grad, -*eta, num_features);
  obj_1 = compute_objective(new_w, features, grades, num_samples, num_features);
  
  if (obj_1 > obj_0) {
    printf("WARNING: gradient direction didn't improve things with step size %3.10f\n", *eta);
    vec_add(new_w, grad, *eta/2, num_features);
    obj_2 = compute_objective(new_w, features, grades, num_samples, num_features);
    if (obj_2 > obj_0) {
      printf("WARNING: gradient direction really didn't improve things; shrinking step size\n");
      *eta = *eta / 4;
      free(new_w);
      return obj_0;
    }
    
    f_p = obj_1 - obj_0;
    f_pp = 4 * (obj_1 + obj_0 - 2*obj_2);
    
    if (f_pp != 0) step_size = 0.5 - f_p / f_pp;
    else step_size = 0.5;
  } else {
    vec_add(new_w, grad, -*eta, num_features);
    obj_2 = compute_objective(new_w, features, grades, num_samples, num_features);
    
    f_p = (obj_2 - obj_0) / 2;
    f_pp = obj_0 + obj_2 - 2*obj_1;
    
    if (f_pp > 0) step_size = 1 - f_p / f_pp;
    else step_size = 2;
  }
  
  step_size = MIN(step_size, MAX_STEP_SIZE);
  vec_add(w, grad, -*eta*step_size, num_features);
  final_obj = compute_objective(w, features, grades, num_samples, num_features);
  if (final_obj > obj_2) {
    printf("WARNING: final objective wasn't as good as we thought it should be\n");
    if (obj_1 > obj_0) {
      vec_add(w, grad, *eta*(step_size-0.5), num_features);      
    } else {
      vec_add(w, grad, *eta*(step_size-2), num_features);            
    }
    final_obj = obj_2;
  }
  
  // Adust step size
  if (step_size == MAX_STEP_SIZE) {
    *eta *= 1.4;
  } else if (obj_1 > obj_0) {
    *eta /= 1.2;
  }
  
  free(new_w);
  
  
  return final_obj;
}