Example #1
0
	int CheckMat(const Eigen::ArrayXXd &cost_mat) {
		// コスト行列のサイズを確認
		if( cost_mat.cols() != n_ || cost_mat.rows() != n_)
			return 0;
		else
			return 1;
	}
Example #2
0
int main() {
    distmesh::helper::HighPrecisionTime time;

    Eigen::ArrayXXd boundingBox(2, 2);
    boundingBox << -2.0, -1.0, 2.0, 1.0;

    // radii of ellipse
    Eigen::ArrayXd radii(2);
    radii << 2.0, 1.0;

    // create mesh
    Eigen::ArrayXXd points;
    Eigen::ArrayXXi elements;

    std::tie(points, elements) = distmesh::distmesh(
        distmesh::distanceFunction::elliptical(radii),
        0.2, 1.0, boundingBox);

    // print mesh properties and elapsed time
    std::cout << "Created mesh with " << points.rows() << " points and " << elements.rows() <<
        " elements in " << time.elapsed() * 1e3 << " ms." << std::endl;

    // save mesh to file
    distmesh::helper::savetxt<double>(points, "points.txt");
    distmesh::helper::savetxt<int>(elements, "triangulation.txt");

    // plot mesh using python
    return system("python plot_mesh.py");
}
Eigen::VectorXd banded_spd_solve(const Eigen::ArrayXXd & A, int m,
                                 const Eigen::VectorXd & b)
{
    int n = b.size();
    assert(A.rows() == n);
    assert(A.cols() == m+1);

    Eigen::ArrayXXd U = cholesky_banded(A, m);
    Eigen::ArrayXXd L = band_transpose(U,0,m);
    Eigen::VectorXd y = forward_subst_banded(L,b);
    Eigen::VectorXd x = backward_subst_banded(U,y);

    return x;
}
Eigen::MatrixXd dense_from_band(const Eigen::ArrayXXd & a, int m1, int m2)
{
    int n = a.rows();
    assert( a.cols() == m1+m2+1 );

    Eigen::MatrixXd A = Eigen::MatrixXd::Zero(n,n);
    for (int i=0; i<n; i++) {
        for (int j=0; j<n; j++) {
            if (i-j>m1 || j-i>m2) {
                continue;
            }
            A(i,j) = a(i,m1-i+j);
        }
    }

    return A;
}
void printConfusionMatrix(std::ostream& inOutputStream, const Eigen::ArrayXXd& inConfusionMatrix,
						  const std::vector<std::string>& inLabels, const std::string inSeparator /*= ","*/,
                          const std::string inQuote /*= "\""*/)
{
	inOutputStream << "reference\test" << inSeparator << inQuote;
	copy(inLabels.begin(), inLabels.begin()+inConfusionMatrix.cols()-1, std::ostream_iterator<std::string>(inOutputStream, (inQuote+inSeparator+inQuote).c_str()));
	inOutputStream << inLabels[inConfusionMatrix.cols()-1] << inQuote << endl;
	for (int i = 0; i < inConfusionMatrix.rows(); ++i)
	{
		inOutputStream << inQuote << inLabels[i] << inQuote;
		for (int j = 0; j < inConfusionMatrix.cols(); ++j)
		{
			inOutputStream << inSeparator << std::fixed << std::setprecision(9) << inConfusionMatrix(i,j);
		}
		inOutputStream << endl;
	}
}
Eigen::ArrayXXd band_transpose(const Eigen::ArrayXXd & a, int m1, int m2)
{
    int n = a.rows();
    int m = m1+m2+1;
    assert( a.cols() == m );

    Eigen::ArrayXXd b = Eigen::ArrayXXd::Zero(n,m);

    b.col(m2) = a.col(m1);
    for (int i=1; i<=m1; i++) {
        b.col(m2+i).block(0,0,n-i,1) = a.col(m1-i).block(i,0,n-i,1);
    }
    for (int i=1; i<=m2; i++) {
        b.col(m2-i).block(i,0,n-i,1) = a.col(m1+i).block(0,0,n-i,1);
    }

    return b;
}
std::tuple<int, double, double> laplace2d(const Eigen::ArrayXXd & a,
                                          const Eigen::ArrayXXd & b,
                                          const Eigen::ArrayXXd & c,
                                          const Eigen::ArrayXXd & d,
                                          const Eigen::ArrayXXd & e,
                                          const Eigen::ArrayXXd & f,
                                          Eigen::ArrayXXd * u,
                                          double omega,
                                          double tol,
                                          bool redblack,
                                          bool chebyshev)
{
    assert(omega<2);  // required by convergence
    assert(omega>=1); // enforce over-relaxation
    // estimated jacobi spectral radius
    double rjac = std::sqrt(1.0-(2.0/omega-1.0)*(2.0/omega-1.0));

    const int MAXITS=1000000;

    double anorm=0.0;
    double anormf=0.0;
    double resid=0.0;

    int imax=a.rows();
    int jmax=a.cols();
    assert(imax == b.rows());
    assert(imax == c.rows());
    assert(imax == d.rows());
    assert(imax == e.rows());
    assert(imax == f.rows());
    assert(imax == u->rows());
    assert(jmax == b.cols());
    assert(jmax == c.cols());
    assert(jmax == d.cols());
    assert(jmax == e.cols());
    assert(jmax == f.cols());
    assert(jmax == u->cols());

    // Compute the initial norm of residual and terminate 
    // iteration when norm has been reduced by a tolerance
    // factor. Assume the initial values of u are all zero.
    for (int i=1; i<imax-1; i++) {
        for (int j=1; j<jmax-1; j++) {
            resid = a(i,j)*(*u)(i+1,j)
                  + b(i,j)*(*u)(i-1,j)
                  + c(i,j)*(*u)(i,j+1)
                  + d(i,j)*(*u)(i,j-1)
                  + e(i,j)*(*u)(i,j)
                  - f(i,j);
            anormf += resid*resid;
        }
    }
    anormf = std::sqrt(anormf);

    for (int n=0; n<MAXITS; n++) {
        anorm = 0.0;
        //red-black checkerboard ordering
        if (redblack) {
            int isw=1;
            for (int ipass=0; ipass<2; ipass++) {
                int jsw=isw;
                for (int i=1; i<imax-1; i++) {
                    for (int j=jsw; j<jmax-1; j+=2) {
                        resid = a(i,j)*(*u)(i+1,j)
                              + b(i,j)*(*u)(i-1,j)
                              + c(i,j)*(*u)(i,j+1)
                              + d(i,j)*(*u)(i,j-1)
                              + e(i,j)*(*u)(i,j)
                              - f(i,j);
                        anorm += resid*resid;
                        (*u)(i,j) -= omega*resid/e(i,j);
                    }
                    jsw=3-jsw; //switch between 1 and 2
                }
                isw=3-isw; //switch between 1 and 2

                // chebyshev acceleration
                if (chebyshev) {
                    if (n==0 && ipass==0) {
                        omega = 1.0/(1.0-0.5*rjac*rjac);
                    }
                    else {
                        omega = 1.0/(1.0-0.25*rjac*rjac*omega);
                    }
                }
            }
        }
        //row-major ordering
        else {
            for (int i=1; i<imax-1; i++) {
                for (int j=1; j<jmax-1; j++) {
                    resid = a(i,j)*(*u)(i+1,j)
                          + b(i,j)*(*u)(i-1,j)
                          + c(i,j)*(*u)(i,j+1)
                          + d(i,j)*(*u)(i,j-1)
                          + e(i,j)*(*u)(i,j)
                          - f(i,j);
                    anorm += resid*resid;
                    (*u)(i,j) -= omega*resid/e(i,j);
                }
            }
        }
        
        anorm = std::sqrt(anorm);
        std::cout << "n = " << n
                  << ", rnorm = " << anorm 
                  << ", relative error = " << anorm/anormf 
                  << ", initial norm = " << anormf << std::endl;
        if (anorm <= tol*anormf) {
            return std::make_tuple(n,anorm, anorm/anormf);
        }
    }

    bool MAXITS_EXCEEDED = false;
    assert(!MAXITS_EXCEEDED);

    return std::make_tuple(MAXITS,anormf,1);
}
Example #8
0
// apply the distmesh algorithm
std::tuple<Eigen::ArrayXXd, Eigen::ArrayXXi> distmesh::distmesh(
    Functional const& distanceFunction, double const initialPointDistance,
    Functional const& elementSizeFunction, Eigen::Ref<Eigen::ArrayXXd const> const boundingBox,
    Eigen::Ref<Eigen::ArrayXXd const> const fixedPoints) {
    // determine dimension of mesh
    unsigned const dimension = boundingBox.cols();

    // create initial distribution in bounding box
    Eigen::ArrayXXd points = utils::createInitialPoints(distanceFunction,
        initialPointDistance, elementSizeFunction, boundingBox, fixedPoints);

    // create initial triangulation
    Eigen::ArrayXXi triangulation = triangulation::delaunay(points);

    // create buffer to store old point locations to calculate
    // retriangulation and stop criterion
    Eigen::ArrayXXd retriangulationCriterionBuffer = Eigen::ArrayXXd::Constant(
        points.rows(), points.cols(), INFINITY);
    Eigen::ArrayXXd stopCriterionBuffer = Eigen::ArrayXXd::Zero(
        points.rows(), points.cols());

    // main distmesh loop
    Eigen::ArrayXXi edgeIndices;
    for (unsigned step = 0; step < constants::maxSteps; ++step) {
        // retriangulate if point movement is above threshold
        if ((points - retriangulationCriterionBuffer).square().rowwise().sum().sqrt().maxCoeff() >
            constants::retriangulationThreshold * initialPointDistance) {
            // update triangulation
            triangulation = triangulation::delaunay(points);

            // reject triangles with circumcenter outside of the region
            Eigen::ArrayXXd circumcenter = Eigen::ArrayXXd::Zero(triangulation.rows(), dimension);
            for (int point = 0; point < triangulation.cols(); ++point) {
                circumcenter += utils::selectIndexedArrayElements<double>(
                    points, triangulation.col(point)) / triangulation.cols();
            }
            triangulation = utils::selectMaskedArrayElements<int>(triangulation,
                distanceFunction(circumcenter) < -constants::geometryEvaluationThreshold * initialPointDistance);

            // find unique edge indices
            edgeIndices = utils::findUniqueEdges(triangulation);

            // store current points positions
            retriangulationCriterionBuffer = points;
        }

        // calculate edge vectors and their length
        auto const edgeVector = (utils::selectIndexedArrayElements<double>(points, edgeIndices.col(0)) -
            utils::selectIndexedArrayElements<double>(points, edgeIndices.col(1))).eval();
        auto const edgeLength = edgeVector.square().rowwise().sum().sqrt().eval();

        // evaluate elementSizeFunction at midpoints of edges
        auto const desiredElementSize = elementSizeFunction(0.5 *
            (utils::selectIndexedArrayElements<double>(points, edgeIndices.col(0)) +
            utils::selectIndexedArrayElements<double>(points, edgeIndices.col(1)))).eval();

        // calculate desired edge length
        auto const desiredEdgeLength = (desiredElementSize * (1.0 + 0.4 / std::pow(2.0, dimension - 1)) *
            std::pow((edgeLength.pow(dimension).sum() / desiredElementSize.pow(dimension).sum()),
                1.0 / dimension)).eval();

        // calculate force vector for each edge
        auto const forceVector = (edgeVector.colwise() *
            ((desiredEdgeLength - edgeLength) / edgeLength).max(0.0)).eval();

        // store current points positions
        stopCriterionBuffer = points;

        // move points
        for (int edge = 0; edge < edgeIndices.rows(); ++edge) {
            if (edgeIndices(edge, 0) >= fixedPoints.rows()) {
                points.row(edgeIndices(edge, 0)) += constants::deltaT * forceVector.row(edge);
            }
            if (edgeIndices(edge, 1) >= fixedPoints.rows()) {
                points.row(edgeIndices(edge, 1)) -= constants::deltaT * forceVector.row(edge);
            }
        }

        // project points outside of domain to boundary
        utils::projectPointsToBoundary(distanceFunction, initialPointDistance, points);

        // stop, when maximum points movement is below threshold
        if ((points - stopCriterionBuffer).square().rowwise().sum().sqrt().maxCoeff() <
            constants::pointsMovementThreshold * initialPointDistance) {
            break;
        }
    }

    return std::make_tuple(points, triangulation);
}
int main()
{
	//initialisation
	Eigen::ArrayXXd R = Eigen::ArrayXXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS); //matrix R, dimension: Nf x Nw
	std::default_random_engine generator;
	std::normal_distribution<double> distr(0.0, 0.1);
	//initialise the matrix R with small, random, non-zero numbers
	for (int i = 0; i < R.rows(); i++) {
		for (int j = 0; j < R.cols(); j++) {
			double d = distr(generator);
			//avoid having 0's as the weight, try to get all the coefficients initialised with small numbers
			while (d == 0.0) {
				d = distr(generator);
			}
			R(i, j) = d;
		}
	}
		//initialise the matrix Q, of size feature_size x (no_of_postags * no_of_distance) 
	Eigen::ArrayXXd Q = Eigen::ArrayXXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	for (int i = 0; i < Q.rows(); i++) {
		for (int j = 0; j < Q.cols(); j++) {
			double d = distr(generator);
			//avoid having 0's as the weight, try to get all the coefficients initialised with small numbers
			while (d == 0.0) {
				d = distr(generator);
			}
			Q(i, j) = d;
		}
	}
	//create an array of Ci (where each Ci is a matrix of Nf x Nf that governs the interaction between the ith feature and vn)
	std::vector<Eigen::ArrayXXd> Ci;
	for (int i = 0; i < 5; i++) {
		Eigen::ArrayXXd Ci_temp = Eigen::ArrayXXd::Zero(FEATURE_SIZE, FEATURE_SIZE);
		std::default_random_engine generator;
		std::normal_distribution<double> distr(0.0, 0.1);
		for (int j = 0; j < Ci_temp.rows(); j++) {
			for (int k = 0; k < Ci_temp.cols(); k++) {
				double d = distr(generator);
				//avoid having 0's as the weight, try to get all the coefficients initialised with small numbers
				while (d == 0.0) {
					d = distr(generator);
				}
				Ci_temp(j, k) = d;
			}
		}
		Ci.push_back(Ci_temp);
	}
	//initialise vi. We use a context of 5 (the postag of the head itself, the postag before the modifier, the postag after the modifier, the postag before the head, and the postag after the head)
	//initialise vn. Both vi and vn are column vectors with all 0's and a single 1, which indicates the POS-tag we desire to get
	//FOR NOW, we ignore the term br^transpose * R^transpose*vn, but find out what it is
	//initialise the bias term bv
	Eigen::ArrayXd bv;
	//print the array of biases into an external file
	std::ofstream final_postag_bias("final_postag_bias.txt"); 
	if (!final_postag_bias.is_open()) {
		std::cout << "fail to create final postag bias file" << std::endl;
	} 
	/*std::map<std::string, int> big_pos_tag_map = get_postag_bias(bv, "distance_count.txt", "fine_pos_tags.txt");
	std::cout << "size of big pos tag map = " << big_pos_tag_map.size() << std::endl;
	for (std::map<std::string, int>::iterator it = big_pos_tag_map.begin(); it != big_pos_tag_map.end(); it++) {
		final_postag_bias << "Pos tag " << it->first << " has bias value " << bv(it->second) << std::endl;
	} */

	/* create a pair between each "target pos tag" with a bias value (tested and correct)*/
	//Eigen::ArrayXd word_bias_pair = Eigen::ArrayXd::Zero(NO_OF_POSTAGS_WITHOUT_DUMMY * NO_OF_DISTANCE);
	int i_3 = -1;
	std::ifstream fine_pos_tags("fine_pos_tags.txt");
	std::string temp_string;
	/*
	while (getline(fine_pos_tags, temp_string)) {
		for (int i = -MAX_DISTANCE; i <= MAX_DISTANCE; i++) {
			if (i != 0) {
				if (i == -49 || i == -45 || i == -40 || i == -35 || i == -30 || i == -25 || i == -20 || i == -15 || i == -10 || i == -5 || i == -4 || i == -3 || i == -2 || i == -1 || i == 1 || i == 2 || i == 3 || i == 4 || i == 5 || i == 6 || i == 11 || i == 16 || i == 21 || i == 26 || i == 31 || i == 36|| i == 41 || i == 46) {
					i_3++;
					std::stringstream sstm;
					sstm << temp_string << i;
					if (big_pos_tag_map.find(sstm.str()) == big_pos_tag_map.end()) {
						std::cout << "ERROR" << std::endl;
					} else {
						int pos_tag_idx = big_pos_tag_map[sstm.str()];
						//word_bias_pair(i_3) = bv(pos_tag_idx);
					}
				}
			}
		}
	} */
	/* end */
	//main training part
	std::map<std::string, int> mapping = postag_mapping("fine_pos_tags.txt");
	fine_pos_tags.close();
	//print out both the small and big mapping
	//std::ofstream big_pos_tag_mapping("big_pos_tag_map.txt");
	std::ofstream small_pos_tag_mapping("small_pos_tag_map.txt");
	/*
	for (std::map<std::string, int>::iterator it = big_pos_tag_map.begin(); it != big_pos_tag_map.end(); it++) {
		big_pos_tag_mapping << it->first << " " << it->second << std::endl;
	} */
	for (std::map<std::string, int>::iterator it = mapping.begin(); it != mapping.end(); it++) {
		small_pos_tag_mapping << it->first << " " << it->second << std::endl;
	}
	small_pos_tag_mapping.close();

	//create the pairs
	std::ifstream pairs ("real_pairs_medium.txt");
	//initialise the gradient matrix
	std::vector<Eigen::MatrixXd> dCi;
	for (int i = 0; i < 5; i++) {
		Eigen::MatrixXd temp = Eigen::MatrixXd::Zero(FEATURE_SIZE, FEATURE_SIZE);
		dCi.push_back(temp);
	}
	Eigen::MatrixXd dR = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd dQ = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	std::ofstream parameters ("parameters_each_epoch_small_trial_1_check_R.txt");
	if (!parameters.is_open()) {
		std::cout << "Error: output file not opening" << std::endl;
	}
	
	//initialise all the temporary variables outside the loop to save time on memory allocation on the heap (expensive)
	//all these temporary variables are reset to zero using .setZero() method at the end of each loop
	Eigen::MatrixXd tempC = Eigen::MatrixXd::Zero(FEATURE_SIZE, FEATURE_SIZE);
	Eigen::MatrixXd temp_R_Model = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd temp_R_Data = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd temp_Q = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd temp_Q_Model = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd temp_Q_Data = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::VectorXd temp_Qw = Eigen::VectorXd::Zero(FEATURE_SIZE); 
	//Eigen::VectorXd dbias = Eigen::VectorXd::Zero(NO_OF_POSTAGS_WITHOUT_DUMMY * NO_OF_DISTANCE);
	//Eigen::VectorXd temp_dbias = Eigen::VectorXd::Zero(NO_OF_POSTAGS_WITHOUT_DUMMY * NO_OF_DISTANCE);
	Eigen::VectorXd tempCr = Eigen::VectorXd::Zero(FEATURE_SIZE);
	Eigen::ArrayXd temp_e = Eigen::ArrayXd::Zero(NO_OF_POSTAGS);
	/*
	Eigen::MatrixXd temp_matrix = Eigen::MatrixXd::Zero(NO_OF_POSTAGS, FEATURE_SIZE);
	Eigen::MatrixXd temp_matrix_2 = Eigen::MatrixXd::Zero(NO_OF_POSTAGS, FEATURE_SIZE); */
	//calculate the normalising constant for the first time
	std::ofstream gradient_check_R("gradient_check_R_big.txt");
	Eigen::MatrixXd curr_dR = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS); //DELETE THIS LATER
	int no_of_iter = 0;
	for (int i = 0; i < NO_OF_EPOCH; i++) { 
		if (!pairs.is_open()) {
			std::cout << "Fail to open input file" << std::endl;
		} else {
			std::vector<std::string> temp;
			std::string line;
			while (getline(pairs, line)) {
				if (line.size() > 1) {
					no_of_iter++;
					temp = split(line, ' ');
					//initialise an array with the elements as the indices of: postag of the modifier, postag of the head, postag of before modifier, postag of after modifier, postag of before head, postag of after head
					int arr_temp[6] = {mapping[temp[0]], mapping[temp[1]], mapping[temp[2]], mapping[temp[3]], mapping[temp[4]], mapping[temp[5]]};
					if (no_of_iter % 10000 == 0) {
						std::cout << no_of_iter << std::endl;
					}
					//calculate the normalising constant
					double normalising_constant = 0.0;
					for (int iter2 = 0; iter2 < 5; iter2++) {
							tempCr += (Ci[iter2].matrix() * R.col(arr_temp[iter2+1]).matrix()); //prediction
					}
					for (int iter = 0; iter < Q.cols(); iter++) {
						double temp = exp((tempCr.transpose() * Q.col(iter).matrix()));//dot product
						temp_e(iter) = temp; 
						normalising_constant += temp; 
					} 					
					//calculate all the gradient changes for each of the Ci, from dCi[0] to dCi[4]
					//this operation is already efficient
					for (int j = 0; j < 5; j++) {
						for (int k = 0; k < Q.cols(); k++) {
							tempC += (temp_e(k) * (Q.col(k).matrix() * R.col(arr_temp[j+1]).transpose().matrix()));
						}
						//normalise
						tempC /= normalising_constant;
						dCi[j] += (Q.col(arr_temp[0]).matrix() * R.col(arr_temp[j+1]).transpose().matrix() - tempC);
						//reset the matrix tempC to prepare to calculate the next dCi[j]
						tempC.setZero();
					}
					
					//calculate dQ VERIFY
					for (int j = 0; j < 5; j++) {
						temp_Qw += (Ci[j].matrix() * R.col(arr_temp[j+1]).matrix());
					}
					for (int j = 0; j < Q.cols(); j++) {
						temp_Q_Model.col(j) += (temp_e(j) * temp_Qw);
					}
					temp_Q_Model /= normalising_constant;
					temp_Q_Data.col(arr_temp[0]) = temp_Qw;
					dQ += (temp_Q_Data - temp_Q_Model);
					temp_Qw.setZero();
					temp_Q_Model.setZero();
					temp_Q_Data.setZero();

					//calculate dR
					//calculate dR with respect to data
					for (int j = 0; j < 5; j++) {
						temp_R_Data.col(arr_temp[j+1]) += Ci[j].transpose().matrix() * Q.col(arr_temp[0]).matrix();
					}
					//calculate dR with respect to model
					for (int k = 0; k < Q.cols(); k++) {
						for (int j = 0; j < 5; j++) {
							temp_R_Model.col(arr_temp[j+1]) += (temp_e(k) * (Ci[j].transpose().matrix() * Q.col(k).matrix()));
						}
					}
					temp_R_Model /= normalising_constant;
					dR += (temp_R_Data - temp_R_Model);
					if (no_of_iter  % 1000 == 0) {
						curr_dR = temp_R_Data - temp_R_Model;
					}
					temp_R_Data.setZero();
					temp_R_Model.setZero();

					//calculate dbias
					//temp_dbias(arr_temp[0]) = 1.0;
					//dbias += (temp_dbias - temp_e.matrix() / normalising_constant);
					//temp_dbias(arr_temp[0]) = 0.0;

					tempCr.setZero();
					temp_e.setZero();

					if (no_of_iter % 1000 == 0) {
							Eigen::VectorXd temp = Eigen::VectorXd::Zero(FEATURE_SIZE);
							gradient_check_R <<  "current data point = " << no_of_iter <<std::endl;
								for (int j = 0; j < R.rows(); j++) {
									for (int k = 0; k < R.cols(); k++) {
										Eigen::ArrayXXd R_copy = R;
										double result_1 = 0.0;
										double result_2 = 0.0;
										double temp_double = 0.0;
										gradient_check_R << curr_dR(j, k) << " ";
										R_copy(j, k) += EPSILON;
										for (int iter2 = 0; iter2 < 5; iter2++) {
												temp += (Ci[iter2].matrix() * R_copy.col(arr_temp[iter2+1]).matrix());
										}
										result_1 += temp.transpose() * Q.col(arr_temp[0]).matrix();
										for (int l = 0; l < Q.cols(); l++) {
											temp_double += exp(temp.transpose() * Q.col(l).matrix());
										}
										result_1 -= log(temp_double);
										temp_double = 0.0;
										//calculate result_2
										R_copy(j, k) -= (2 * EPSILON);
										temp.setZero();
										for (int iter2 = 0; iter2 < 5; iter2++) {
												temp += (Ci[iter2].matrix() * R_copy.col(arr_temp[iter2 + 1]).matrix());	
										}
										result_2 += temp.transpose() * Q.col(arr_temp[0]).matrix();
										for (int l = 0; l < Q.cols(); l++) {
											temp_double += exp(temp.transpose() * Q.col(l).matrix());
										}
										result_2 -= log(temp_double);
										temp.setZero();
										gradient_check_R << (result_1 - result_2) / (2 * EPSILON) << std::endl;

									}
							}
							curr_dR.setZero();
				}

					//update the gradient if no_of_iter mod mini batch size == 0, reset the gradient matrix to 0

					if (no_of_iter % BATCH_SIZE == 0) {
						double curr_learning_rate = LEARNING_RATE / (1 + no_of_iter * LEARNING_DECAY);
						for (int j = 0; j < 5; j++) {
							Ci[j] = Ci[j].matrix() + curr_learning_rate * dCi[j];
							//reset the matrix dCi[j]
							dCi[j].setZero();
						}
						//word_bias_pair = word_bias_pair + curr_learning_rate * dbias.array();
						R = R.matrix() + curr_learning_rate * dR;
						Q = Q.matrix() + curr_learning_rate * dQ;
						dR.setZero();
						dQ.setZero();
						//dbias.setZero();
					} 
				} 
				temp.clear();
			}
			pairs.close();
		}
		parameters << "Epoch number: " << i+1 << std::endl;
		for (int j = 0; j < 5; j++) {
			parameters << "Matrix C" << j+1 << std::endl; 
			parameters << Ci[j] << std::endl;
			parameters << std::endl;
			parameters << "End of matrix C" << j+1 << std::endl;
		}
		parameters << "Matrix R" << std::endl;
		parameters << R << std::endl;
		parameters << "End of matrix R" << std::endl;
		parameters << "Matrix Q" << std::endl;
		parameters << Q << std::endl;
		parameters << "End of matrix Q" << std::endl;
		//parameters << "Bias vector" << std::endl;
		//parameters << word_bias_pair << std::endl;
		//parameters << "End of bias vector" << std::endl;
		pairs.open("real_pairs_medium.txt");
	} 
	pairs.close();
	parameters.close();
	return 0;
}