void NeuralNetwork::backprop(const arma::mat& input, const arma::mat& output)
{
    std::vector<arma::mat> gradients;

    gradients.push_back(m_activationOnLayer[m_numLayers - 1] - output);

    unsigned int prevErrorIndex = 0;
    for (int layer = m_numLayers - 2; layer > 0; --layer)
    {
        arma::mat error;
        error = gradients[prevErrorIndex++] * m_theta[layer].cols(1, m_theta[layer].n_cols - 1);
        error = error % sigmoidGradient(m_partialOnLayer[layer - 1]);
        gradients.push_back(error);
    }

    int errorIndex = 0;
    for (int layer = m_numLayers - 2; layer >= 0; --layer)
    {
        gradients[errorIndex] = (1.0 / input.n_rows) * (gradients[errorIndex].t() * m_activationOnLayer[layer]);
        ++errorIndex;
    }

    std::reverse(gradients.begin(), gradients.end());

    for (unsigned int layer = 0; layer < m_numLayers - 1; ++layer)
    {
        int lastCol = gradients[layer].n_cols - 1;
        gradients[layer].cols(1, lastCol) += (m_regFactor / input.n_rows) * m_theta[layer].cols(1, lastCol);
    }

    //checkGradients(input, output, gradients);
    gradientDescent(gradients);
}
예제 #2
0
int main()
{
	populateTrainingSet();
	printf("\nEnter the learning rate ");
	scanf("%f",&learningRate);	
	gradientDescent(0,0);
}
int main(int argc, char **argv){
	char* filename = argv[1];
	/*This is the number of features provided on the command line*/
	/*The csv should contain values for each x value and the result, per example*/
	int features = atoi(argv[2]);
	int examples = atoi(argv[3]); /*Number of training examples provided*/
	double cost = DBL_MAX; /*Cost for the current hypothesis, set arbitratily high*/
	/*Values for the coefficients in the hypothesis function*/
	double *theta = malloc(features * sizeof(double));
	double **X = malloc(examples * sizeof(double*));
	for(int i = 0; i < examples; i++){
		X[i] = malloc(features * sizeof(double));
	}
	double *Y = malloc(examples * sizeof(double));
	parse(features, examples, X, Y, filename);
	for(int i = 0; i < features; i++){
		theta[i] = 0;
	}
	theta[0] = 1;
	double **meanAndRange = malloc((features - 1) * sizeof(double*));
	for(int i = 0; i < features - 1; i++){
		meanAndRange[i] = malloc(2 * sizeof(double));
	}
	
	/*meanNormalization(X, Y, meanAndRange, features, examples);*/     

	clock_t begin, end;
	begin = clock();
	double timeElapsed;
	gradientDescent(X, Y, theta, meanAndRange, features, examples);
	int *values = malloc((features - 1) * sizeof(int));
	values[0] = 1;
	char val[5];
	/*Print the learned formula*/
	printf("Learned function: %f", theta[0]);
	for(int i = 1; i < features; i++){
		printf(" + %f(x%d)",theta[i], i);
	}
	printf("\n");
	end = clock();
	timeElapsed = (double)(end - begin) / CLOCKS_PER_SEC;
	printf("Elapsed Time: %f\n", timeElapsed);
	/*Obtain experimental values*/
	for(int i = 1; i < features ; i++){
		printf("Value for x%d:", i);
		scanf("%s", val);
		values[i] = atoi(val);
	}
	/*Print out the estimate for given values*/
	float output = 0;
	for(int i = 0; i < features; i++){
		output += values[i] * theta[i];
	}
	printf("\nOutput: %f\n", output);
}
예제 #4
0
void gradientDescent(float x0,float x1)
{
	float a=x0,b=x1,temp0,temp1;
	temp0=x0-(learningRate*der_x0(x0,x1));
	temp1=x1-(learningRate*der_x1(x0,x1));
	x0=temp0;
	x1=temp1;
	printf("x0=%f\tx1=%f\n",x0,x1);
	getchar();
	temp0=a-x0;
	temp1=b-x1;
	temp0=temp0<0?temp0*-1:temp0;
	temp1=temp1<0?temp1*-1:temp1;
	if(temp1<.001&&temp0<.001)
	{
		printf(" %f %f\n ",x0,x1);
		exit(0);
	}
	gradientDescent(x0,x1);
}
예제 #5
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray * prhs[]) {
  /* compute the infimum in CSFM, provided that the W
   * (which is 2 x nPoint x nFrame) is centered
   */
  double *wOri, *q, *res;
  int i, ii, j, k, l, m, n, p, iFrame, jFrame, nFrame, nPoint;
  mxArray *pIsDone;
  const mwSize *dim_array;
  double *errArray, *quaternionArray ;
  char *isDone;
  mxArray *pWPairwiseProd, *pTmp2By2, *pRTransposeR, *pDRTransposeR,
          *pQuaternionTmp, *pGradient, *pWPrimeWPrimeTranspose;
  double *wPairwiseProd, *tmp2By2, *RTransposeR, *dRTransposeR,
          *quaternionTmp, *gradient, *wPrimeWPrimeTranspose;
  
  /* Retrieve the useful data */
  wOri= mxGetPr(prhs[0]);
  dim_array = mxGetDimensions(prhs[0]);
  nPoint = dim_array[1];
  nFrame = dim_array[2];
  
  /* Create the output data */
  plhs[0] = mxCreateDoubleMatrix(nFrame, nFrame, mxREAL);
  errArray = mxGetPr(plhs[0]);
  
  plhs[1] = mxCreateDoubleMatrix(4, nFrame * nFrame, mxREAL);
  quaternionArray = mxGetPr(plhs[1]);
  
  pIsDone = mxCreateLogicalMatrix(nFrame, nFrame);
  isDone = (char*) mxGetPr(pIsDone);
  
  for (iFrame = 0, k = 0; iFrame < nFrame; ++iFrame)
    for (jFrame = 0; jFrame < nFrame; ++jFrame, ++k) {
    errArray[k] = 0;
    if (iFrame == jFrame) {
      /* set the rotation to identity */
      isDone[k] = 1;
      quaternionArray[4 * k + 0] = 1;
      for (i = 1; i < 4; ++i)
        quaternionArray[4 * k + i] = 0;
    } else
      isDone[k] = 0;
    }
  
  /* create the pairwise W multiplications between frames */
  pWPairwiseProd = mxCreateDoubleMatrix(2*nFrame, 2*nFrame, mxREAL);
  wPairwiseProd = mxGetPr(pWPairwiseProd);
  pTmp2By2 = mxCreateDoubleMatrix(2, 2, mxREAL);
  tmp2By2 = mxGetPr(pTmp2By2);
  
  for (i = 0; i < nFrame; ++i )
    for (j = 0; j <= i; ++j ) {
    matrixMultiply(wOri + i * 2 * nPoint, wOri + j * 2 * nPoint, tmp2By2, 2, nPoint, 2);
    for( k = 0; k < 2; ++k )
      for( l = 0; l < 2; ++l )
        wPairwiseProd[(2*i+k)*2*nFrame + 2*j + l] = tmp2By2[l+2*k];
    }
  
  /* symmetrize the matrix */
  for (i = 0; i < 2*nFrame; ++i )
    for (j = i+1; j < 2*nFrame; ++j )
      wPairwiseProd[i*2*nFrame+j] = wPairwiseProd[j*2*nFrame+i];
  
  /* create some cache some temporary matrices */
  pRTransposeR = mxCreateDoubleMatrix(4, 4, mxREAL);
  RTransposeR = mxGetPr(pRTransposeR);
  pDRTransposeR = mxCreateDoubleMatrix(4, 4*4, mxREAL);
  dRTransposeR = mxGetPr(pDRTransposeR);
  pQuaternionTmp = mxCreateDoubleMatrix(4, 1, mxREAL);
  quaternionTmp = mxGetPr(pQuaternionTmp);
  pGradient = mxCreateDoubleMatrix(4, 1, mxREAL);
  gradient = mxGetPr(pGradient);
  pWPrimeWPrimeTranspose = mxCreateDoubleMatrix(4, 4, mxREAL);
  wPrimeWPrimeTranspose = mxGetPr(pWPrimeWPrimeTranspose);
  
  /* do everything else */
  for (iFrame = 0; iFrame < nFrame; ++iFrame) {
    for (jFrame = iFrame+1; jFrame < nFrame; ++jFrame)
      gradientDescent(wPrimeWPrimeTranspose, iFrame, jFrame, wPairwiseProd, errArray, quaternionArray, isDone, nFrame, nPoint, RTransposeR, dRTransposeR, quaternionTmp, gradient);
    ++iFrame;
    for (jFrame = nFrame - 1; jFrame > iFrame; --jFrame)
      gradientDescent(wPrimeWPrimeTranspose, iFrame, jFrame, wPairwiseProd, errArray, quaternionArray, isDone, nFrame, nPoint, RTransposeR, dRTransposeR, quaternionTmp, gradient);
  }
  
  /* do everything else in a different order */
  for (jFrame = nFrame-1; jFrame > 0; --jFrame) {
    for (iFrame = jFrame+1; iFrame < nFrame; ++iFrame)
      gradientDescent(wPrimeWPrimeTranspose, iFrame, jFrame, wPairwiseProd, errArray, quaternionArray, isDone, nFrame, nPoint, RTransposeR, dRTransposeR, quaternionTmp, gradient);
    
    --jFrame;
    for (iFrame = nFrame - 1; iFrame > jFrame; --iFrame)
      gradientDescent(wPrimeWPrimeTranspose, iFrame, jFrame, wPairwiseProd, errArray, quaternionArray, isDone, nFrame, nPoint, RTransposeR, dRTransposeR, quaternionTmp, gradient);
  }
  
  /* Free memory */
  mxDestroyArray(pIsDone);
  mxDestroyArray(pWPrimeWPrimeTranspose);
  mxDestroyArray(pTmp2By2);
  mxDestroyArray(pWPairwiseProd);
  mxDestroyArray(pRTransposeR);
  mxDestroyArray(pDRTransposeR);
  mxDestroyArray(pQuaternionTmp);
  mxDestroyArray(pGradient);
}
예제 #6
0
void GMExperiment6_1::displayBezierSpline()
{
    //Trying a new way of computing inflection points
    //First look for long straight bits on the curve and add
    //one of the end points to the list of knots
    float inflection_thresh = 1.5f;
    int32 inflection_filter = 4;
    Vector2i inflection_range;
    inflection_range[0] = -1; inflection_range[1]= -1;
    vector<Vector2i> searchRanges;
    vector<int> inflection_idx;

    for(int i = 0; i < curvature.size(); ++i)
    {
        if(abs(curvature[i]) < inflection_thresh)
        { if(inflection_range[0]<0)
            { inflection_range[0]=i;
            }
        }
        else
        {
            if((inflection_range[0]>=0)) //Infection range was being observed
            {
                inflection_idx.push_back(inflection_range[0]);
                inflection_range[1]=i;
                //inflection_idx.push_back(inflection_range[1]);
                searchRanges.push_back(inflection_range);
                inflection_range[0]=-1;
                inflection_range[1]=-1;
            }
        }
    }


    //Now add endpoints to the vector and filter out any indices extremely close to each other
    //See if the endpoints already exist. If not, then push them onto the data structure
    if(inflection_idx.empty())
    {
        inflection_idx.push_back(0);
        inflection_idx.push_back(curvature.size()-1);
    }
    else
    {
        if(inflection_idx[0]!=0)
        {
            inflection_idx.insert( inflection_idx.begin(), 0);
        }
        if(inflection_idx[inflection_idx.size()-1] != curvature.size()-1)
        {
            inflection_idx.push_back(curvature.size()-1);
        }
    }

    qDebug()<<"Number of inflection range points "<<inflection_idx.size()<<"\n";

    //Then look for sign changes in the segments not spanned by the long straight curves
    //and add those and sort the vector
    if(!searchRanges.empty())
    {
        if(searchRanges[0][0]!=0)
        {
            searchRanges.insert(searchRanges.begin(),makeVector2i(0,0));
        }
        if(searchRanges[searchRanges.size()-1][1]!=curvature.size()-1)
        {
            searchRanges.push_back(makeVector2i(curvature.size()-1,curvature.size()-1));
        }

        for(int i=0; i<searchRanges.size()-1; i++)
        {
            bool old_sign = curvature[searchRanges[i][1]]>0;
            for(int j=searchRanges[i][1]+1; j<searchRanges[i+1][0]; j++)
            {
                bool new_sign = curvature[j]>0;
                if(new_sign!=old_sign)
                {
                    inflection_idx.push_back(j);
                }
                old_sign=new_sign;
            }
        }
#if 1
    for(int i=0; i<inflection_idx.size()-1; i++)
    {
       if(i < inflection_idx.size()-2)
       {
           if(( inflection_idx[i+1]-inflection_idx[i]) < inflection_filter)
           {
            inflection_idx.erase(inflection_idx.begin()+i+1);
           }
       }
       else
       {
           if(( inflection_idx[i+1]-inflection_idx[i]) < inflection_filter)
           {
            inflection_idx.erase(inflection_idx.begin()+i);
           }
       }
    }
#endif
    }
    sort(inflection_idx.begin(),inflection_idx.end());
    vector<int> knots;
    knots = inflection_idx;
    //knots.push_back(inflection_idx[0]);
    //knots.push_back(inflection_idx[inflection_idx.size()-1]);

    //for(int i=1; i<inflection_idx.size()-1; i+=2)
    //{ knots.push_back(inflection_idx[i]);
    //}
    //Do the tangent check between inflection points and add to knots
    for(int j=0; j<inflection_idx.size()-1; j++)
    {
            tangent_check(inflection_idx[j],inflection_idx[j+1],knots,smoothData);
    }

    std::sort(knots.begin(), knots.end());

#if 1
    for(int i=0; i<knots.size()-1; i++)
    {
       if(i < knots.size()-2)
       {
           if(( knots[i+1]-knots[i]) < knot_filter)
           {
            knots.erase(knots.begin()+i+1);
           }
       }
       else
       {
           if(( knots[i+1]-knots[i]) < knot_filter)
           {
            knots.erase(knots.begin()+i);
           }
       }
    }
#endif
    output<<"Total Number of control points (knots):"<< knots.size()<<"\n";


    int start = 0;
    int end = knots.size()-1;

    if(segment_id != -1)

    {
        start = segment_id;
        end = segment_id+1;
    }

    for(int j = start; j < end; ++j)

    {
        int i_start = knots[j];
        int i_end = knots[j+1];

        auto p0 = smoothData[i_start];
        auto p3 = smoothData[i_end];

        // estimate derivatives
        vector<Vector2d> d(2);
        d[0] = to(firstDerivative(smoothData, i_start));
        d[0].normalize();
        d[1] = -to(firstDerivative(smoothData, i_end));
        d[1].normalize();

        vector<Vector2f> points;
        for(int k = knots[j]; k <= knots[j+1]; k++)
            points.push_back(smoothData[k]);



//        if(points.size() < 6)
//            continue;

        Vector2d s;
        if(least_squares)
        {
            s = leastSquaresEstimate(points, p0, p3, d[0], d[1]);
        }
        else
        {
            s[0] = sqrt((points[1]-points[0]).getSqrNorm());
            s[1] = sqrt((points[points.size()-1]-points[points.size()-2]).getSqrNorm());
        }

        auto ctrl = s2ctrl(to(p0), to(p3), d, s);

        if(gradient_descent)
        {
            auto new_s = gradientDescent(ctrl, d, points, num_iterations);
            ctrl = s2ctrl(to(p0), to(p3), d, new_s);
        }

        // display the curve!
        auto piece = renderCubicBezier(ctrl, std::max(points.size(), size_t(20)));
        displayCurve(viewer, piece, 2);

        // display control points
        int pt_indices[4];
        if(display_control_points)
        {
            for(int k = 0; k < 4; ++k)
            {
                Point2D pt;
                pt.position = to(ctrl[k]);

                bool is_knot = k == 0 || k == 3;
                pt.size = is_knot ? 8 : 6;
                pt.color = is_knot ? makeVector4f(0, 1.0, 0, 1) :
                                     makeVector4f(1, 0, 1, 1);
                pt_indices[k] = viewer->addPoint(pt);
            }
            Line l;
            l.thickness = 1;
            l.color = makeVector4f(1,1,1,1);
            l.vertices[0] = pt_indices[0];
            l.vertices[1] = pt_indices[1];
            viewer->addLine(l);
            l.vertices[0] = pt_indices[2];
            l.vertices[1] = pt_indices[3];
            viewer->addLine(l);
        }
    }


    viewer->refresh();
}
int main(int argc, char **argv){
	int numProcs, procId;
	MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&numProcs);
    MPI_Comm_rank(MPI_COMM_WORLD,&procId);
	char* filename = argv[1];
	/*This is the number of features provided on the command line*/
	/*The csv should contain values for each x value and the result, per example*/
	int features = atoi(argv[2]);
	int examples = atoi(argv[3]); /*Number of training examples provided*/
	int itsOver = 0;
	double cost = DBL_MAX; /*Cost for the current hypothesis, set arbitratily high*/
	/*Values for the coefficients in the hypothesis function*/
	double *theta = malloc(features * sizeof(double));
	double **X = malloc(examples * sizeof(double*));
	for(int i = 0; i < examples; i++){
		X[i] = malloc(features * sizeof(double));
	}
	double *Y = malloc(examples * sizeof(double));
	parse(features, examples, X, Y, filename);
	for(int i = 0; i < features; i++){
		theta[i] = 0;
	}
	theta[0] = 1;
	double **meanAndRange = malloc((features - 1) * sizeof(double*));
	for(int i = 0; i < features - 1; i++){
		meanAndRange[i] = malloc(2 * sizeof(double));
	}
	
	/*meanNormalization(X, Y, meanAndRange, features, examples);*/  

	double timeElapsed;
	if(procId == 0){
		timeElapsed = -MPI_Wtime();
	}
	double converged = gradientDescent(X, Y, theta, meanAndRange, features, examples, numProcs, procId);
	if(converged == 0){
		itsOver = 1;
		for(int i = 0; i < numProcs; i++){
			if(i != procId){
				MPI_Send(&itsOver, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
			}
		}
		printf("Proc %d learned function: %f", procId, theta[0]);
		fflush(stdout);
		for(int i = 1; i < features; i++){
			printf(" + %f(x%d)",theta[i], i);
		}
		printf("\n");
		fflush(stdout);
	}
	else{
		if(converged == -1){
			/*Skip to finalize*/
		}
		else{
			printf("proc %d: %f\n", procId, converged);
			MPI_Barrier(MPI_COMM_WORLD);
			double min;
			MPI_Allreduce(&converged, &min, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
			if(converged == min){
				/*Print the learned formula*/
				printf("Proc %d learned function: %f", procId, theta[0]);
				for(int i = 1; i < features; i++){
					printf(" + %f(x%d)",theta[i], i);
				}
				printf("\n");
				fflush(stdout);
			}
		}
	}
	/*
	Obtain experimental values
	int *values = malloc((features - 1) * sizeof(int));
	values[0] = 1;
	char val[5];
	for(int i = 1; i < features ; i++){
		printf("Value for x%d:", i);
		scanf("%s", val);
		values[i] = atoi(val);
	}
	Print out the estimate for given values
	float output = 0;
	for(int i = 0; i < features; i++){
		output += values[i] * theta[i];
	}
	printf("\nOutput: %f\n", output);
	*/
	MPI_Barrier(MPI_COMM_WORLD);
	if(procId == 0){
		timeElapsed += MPI_Wtime();
		printf("Elapsed time: %f\n", timeElapsed);
		fflush(stdout);
	}
	MPI_Finalize();
}