C++ (Cpp) ArrayXXd::matrix примеры использования

Язык программирования: C++ (Cpp)

Пространство имен/Пакет: eigen

Класс/Тип: ArrayXXd

Метод/Функция: matrix

Примеров на hotexamples.com: 2

C++ (Cpp) ArrayXXd::matrix - 2 примера найдено. Это лучшие примеры C++ (Cpp) кода для eigen::ArrayXXd::matrix, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

rows(9)

col(9)

cols(8)

block(3)

setZero(3)

matrix(2)

setOnes(2)

cwiseAbs(1)

row(1)

Пример #1

Показать файл

Файл: SPMBgraphsqrt.cpp Проект: cran/huge

//[[Rcpp::export]]
List SPMBgraphsqrt(Eigen::Map<Eigen::MatrixXd> data, NumericVector &lambda, int nlambda, int d, NumericVector &x, IntegerVector &col_cnz, IntegerVector &row_idx)
{

    Eigen::ArrayXd Xb, r, grad, w1, Y, XX, gr;
    Eigen::ArrayXXd X;
    Eigen::MatrixXd tmp_icov;
    tmp_icov.resize(d, d);
    tmp_icov.setZero();
    std::vector<Eigen::MatrixXd > tmp_icov_p;
    tmp_icov_p.clear();
    for(int i = 0; i < nlambda; i++)
      tmp_icov_p.push_back(tmp_icov);
    int n = data.rows();
    X = data;
    XX.resize(d);
    for (int j = 0; j < d; j++)
      XX[j] = (X.col(j)*X.col(j)).sum()/n;
    double prec = 1e-4;
    int max_iter = 1000;
    int num_relaxation_round = 3;
	  int cnz = 0;
    for(int m=0;m<d;m++)
    {
      Xb.resize(n);
      Xb.setZero();
      grad.resize(d);
      grad.setZero();
      gr.resize(d);
      gr.setZero();
      w1.resize(d);
      w1.setZero();
      r.resize(n);
      r.setZero();
      Y = X.col(m);

      Eigen::ArrayXd Xb_master(n);
      Eigen::ArrayXd w1_master(n);
      std::vector<int> actset_indcat(d, 0);
      std::vector<int> actset_indcat_master(d, 0);
      std::vector<int> actset_idx;
      std::vector<double> old_coef(d, 0);
      std::vector<double> grad(d, 0);
      std::vector<double> grad_master(d, 0);

      double a = 0, g = 0, L = 0, sum_r = 0, sum_r2 = 0;
      double tmp_change = 0, local_change = 0;

      r = Y - Xb;
      sum_r = r.sum();
      sum_r2 = r.matrix().dot(r.matrix());
      L = sqrt(sum_r2 / n);

      double dev_thr = fabs(L) * prec;

      //cout<<dev_thr<<endl;


      for(int i = 0; i < d; i++)
      {
        grad[i] = (r * X.col(i)).sum() / (n*L);
      }
      for(int i = 0; i < d; i++) gr[i] = abs(grad[i]);
      w1_master = w1;
      Xb_master = Xb;
      for (int i = 0; i < d; i++) grad_master[i] = gr[i];

      std::vector<double> stage_lambdas(d, 0);

      for(int i=0;i<nlambda;i++)
      {
        double ilambda = lambda[i];
        w1 = w1_master;
        Xb = Xb_master;

        for (int j = 0; j < d; j++)
        {
          gr[j] = grad_master[j];
          actset_indcat[j] = actset_indcat_master[j];
        }

        // init the active set
        double threshold;
        if (i > 0)
          threshold = 2 * lambda[i] - lambda[i - 1];
        else
          threshold = 2 * lambda[i];

        for (int j = 0; j < m; ++j)
        {
          stage_lambdas[j] = lambda[i];

          if (gr[j] > threshold) actset_indcat[j] = 1;
        }
        for (int j = m+1; j < d; ++j)
        {
          stage_lambdas[j] = lambda[i];

          if (gr[j] > threshold) actset_indcat[j] = 1;
        }
        stage_lambdas[m] = lambda[i];
        r = Y - Xb;
        sum_r = r.sum();
        sum_r2 = r.matrix().dot(r.matrix());
        L = sqrt(sum_r2 / n);
        // loop level 0: multistage convex relaxation
        int loopcnt_level_0 = 0;
        int idx;
        double old_w1, updated_coord;
        while (loopcnt_level_0 < num_relaxation_round)
        {
          loopcnt_level_0++;

          // loop level 1: active set update
          int loopcnt_level_1 = 0;
          bool terminate_loop_level_1 = true;
          while (loopcnt_level_1 < max_iter)
          {
            loopcnt_level_1++;
            terminate_loop_level_1 = true;

            for (int j = 0; j < d; j++) old_coef[j] = w1[j];

            // initialize actset_idx
            actset_idx.clear();
            for (int j = 0; j < m; j++)
              if (actset_indcat[j])
              {
                g = 0.0;
                a = 0.0;

                double tmp;

                sum_r2 = r.matrix().dot(r.matrix());
                L = sqrt(sum_r2 / n);

                Eigen::ArrayXd wXX  = (1 - r*r/sum_r2) * X.col(j) * X.col(j);
                g = (wXX * w1[j] + r * X.col(j)).sum()/(n*L);
                a = wXX.sum()/(n*L);

                tmp = w1[j];
                w1[j] = thresholdl1(g, stage_lambdas[j]) / a;

                tmp = w1[j] - tmp;
                // Xb += delta*X[idx*n]
                Xb = Xb + tmp * X.col(j);

                sum_r = 0.0;
                sum_r2 = 0.0;
                // r -= delta*X
                r = r - tmp * X.col(j);
                sum_r = r.sum();

                sum_r2 = r.matrix().dot(r.matrix());
                L = sqrt(sum_r2 / n);

                updated_coord = w1[j];

                if (fabs(updated_coord) > 0) actset_idx.push_back(j);
              }

            for (int j = m+1; j < d; j++)
              if (actset_indcat[j])
              {
                  g = 0.0;
                  a = 0.0;

                  double tmp;

                  sum_r2 = r.matrix().dot(r.matrix());
                  L = sqrt(sum_r2 / n);

                  Eigen::ArrayXd wXX  = (1 - r*r/sum_r2) * X.col(j) * X.col(j);
                  g = (wXX * w1[j] + r * X.col(j)).sum()/(n*L);
                  a = wXX.sum()/(n*L);

                  tmp = w1[j];
                  w1[j] = thresholdl1(g, stage_lambdas[j]) / a;

                  tmp = w1[j] - tmp;
                  // Xb += delta*X[idx*n]
                  Xb = Xb + tmp * X.col(j);

                  sum_r = 0.0;
                  sum_r2 = 0.0;
                  // r -= delta*X
                  r = r - tmp * X.col(j);
                  sum_r = r.sum();

                  sum_r2 = r.matrix().dot(r.matrix());
                  L = sqrt(sum_r2 / n);

                  updated_coord = w1[j];

                  if (fabs(updated_coord) > 0) actset_idx.push_back(j);
                }

              // loop level 2: proximal newton on active set
            int loopcnt_level_2 = 0;
            bool terminate_loop_level_2 = true;
            while (loopcnt_level_2 < max_iter)
            {
              loopcnt_level_2++;
              terminate_loop_level_2 = true;

              for (int k = 0; k < actset_idx.size(); k++)
              {
                  idx = actset_idx[k];

                  old_w1 = w1[idx];
                  g = 0.0;
                  a = 0.0;

                  double tmp;

                  sum_r2 = r.matrix().dot(r.matrix());
                  L = sqrt(sum_r2 / n);

                  Eigen::ArrayXd wXX  = (1 - r*r/sum_r2) * X.col(idx) * X.col(idx);
                  g = (wXX * w1[idx] + r * X.col(idx)).sum()/(n*L);
                  a = wXX.sum()/(n*L);

                  tmp = w1[idx];
                  w1[idx] = thresholdl1(g, stage_lambdas[idx]) / a;

                  tmp = w1[idx] - tmp;
                  // Xb += delta*X[idx*n]
                  Xb = Xb + tmp * X.col(idx);

                  sum_r = 0.0;
                  sum_r2 = 0.0;
                  // r -= delta*X
                  r = r - tmp * X.col(idx);
                  sum_r = r.sum();

                  sum_r2 = r.matrix().dot(r.matrix());
                  L = sqrt(sum_r2 / n);

                  updated_coord = w1[idx];
                  tmp_change = old_w1 - w1[idx];
                  double a =  (X.col(idx) * X.col(idx) * (1 - r * r/(L*L*n))).sum()/(n*L);
                  local_change = a * tmp_change * tmp_change / (2 * L * n);
                  if (local_change > dev_thr)
                    terminate_loop_level_2 = false;
                }
              if (terminate_loop_level_2)
                break;
            }

            terminate_loop_level_1 = true;
              // check stopping criterion 1: fvalue change
            for (int k = 0; k < actset_idx.size(); ++k)
            {
              idx = actset_idx[k];
              tmp_change = old_w1 - w1[idx];
              double a =  (X.col(idx) * X.col(idx) * (1 - r * r/(L*L*n))).sum()/(n*L);
              local_change = a * tmp_change * tmp_change / (2 * L * n);
              if (local_change > dev_thr)
                terminate_loop_level_1 = false;
            }

            r = Y - Xb;
            sum_r = r.sum();
            sum_r2 = r.matrix().dot(r.matrix());
            L = sqrt(sum_r2 / n);

            if (terminate_loop_level_1)
              break;


              // check stopping criterion 2: active set change
            bool new_active_idx = false;
            for (int k = 0; k < m; k++)
              if (actset_indcat[k] == 0)
              {
                grad[idx] = (r * X.col(idx)).sum() / (n*L);
                //cout<<grad[idx];
                gr[k] = fabs(grad[k]);
                if (gr[k] > stage_lambdas[k])
                {
                  actset_indcat[k] = 1;
                  new_active_idx = true;
                }
              }
            for (int k = m+1; k < d; k++)
              if (actset_indcat[k] == 0)
              {
                grad[idx] = (r * X.col(idx)).sum() / (n*L);
                //cout<<grad[idx]
                gr[k] = fabs(grad[k]);
                if (gr[k] > stage_lambdas[k])
                {
                  actset_indcat[k] = 1;
                  new_active_idx = true;
                }
              }
            if(!new_active_idx)
              break;
          }
          if (loopcnt_level_0 == 1)
          {
            for (int j = 0; j < d; j++)
            {
              w1_master[j] = w1[j];

              grad_master[j] = gr[j];
              actset_indcat_master[j] = actset_indcat[j];
            }

            for (int j = 0; j < n; j++) Xb_master[j] = Xb[j];
          }
        }
        for(int j=0;j<actset_idx.size();j++)
        {
          int w_idx = actset_idx[j];
          x[cnz] = w1[w_idx];
          row_idx[cnz] = i*d+w_idx;
          cnz++;
          //cout<<cnz<<"    ";
        }
        double tal = 0;
        Eigen::MatrixXd temp;
        temp.resize(n, 1);
        for(int j = 0; j < n; j++)
        {
          temp(j, 0) = 0;
          for(int k = 0; k < d; k++)
            temp(j, 0) += X.matrix()(j, k)*w1[k];
          temp(j, 0) = Y[j] - temp(j, 0);
        }
        //temp = Y.matrix() - X.matrix().transpose()*w1.matrix();
        for(int j = 0; j < n; j++)
          tal += temp(j, 0)*temp(j, 0);
        tal = sqrt(tal)/sqrt(n);

        tmp_icov = tmp_icov_p[i];
        tmp_icov(m, m) = pow(tal, -2);
        for(int j = 0; j < m; j++)
          tmp_icov(j, m) = -tmp_icov(m, m)*w1[j];
        for(int j = m+1; j < d; j++)
          tmp_icov(j, m) = -tmp_icov(m, m)*w1[j];
        tmp_icov_p[i] = tmp_icov;
      }
      col_cnz[m+1]=cnz;
    }
    for(int i = 0; i < nlambda; i++)
      tmp_icov_p[i] = (tmp_icov_p[i].transpose()+tmp_icov_p[i])/2;
	  return List::create(
	    _["col_cnz"] = col_cnz,
	    _["row_idx"] = row_idx,
	    _["x"] = x,
	    _["icov"] = tmp_icov_p
	  );
}

Пример #2

Показать файл

Файл: test_check_R.cpp Проект: adhigunasurya/Log_Bilinear_Dependency_Parsing

int main()
{
	//initialisation
	Eigen::ArrayXXd R = Eigen::ArrayXXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS); //matrix R, dimension: Nf x Nw
	std::default_random_engine generator;
	std::normal_distribution<double> distr(0.0, 0.1);
	//initialise the matrix R with small, random, non-zero numbers
	for (int i = 0; i < R.rows(); i++) {
		for (int j = 0; j < R.cols(); j++) {
			double d = distr(generator);
			//avoid having 0's as the weight, try to get all the coefficients initialised with small numbers
			while (d == 0.0) {
				d = distr(generator);
			}
			R(i, j) = d;
		}
	}
		//initialise the matrix Q, of size feature_size x (no_of_postags * no_of_distance) 
	Eigen::ArrayXXd Q = Eigen::ArrayXXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	for (int i = 0; i < Q.rows(); i++) {
		for (int j = 0; j < Q.cols(); j++) {
			double d = distr(generator);
			//avoid having 0's as the weight, try to get all the coefficients initialised with small numbers
			while (d == 0.0) {
				d = distr(generator);
			}
			Q(i, j) = d;
		}
	}
	//create an array of Ci (where each Ci is a matrix of Nf x Nf that governs the interaction between the ith feature and vn)
	std::vector<Eigen::ArrayXXd> Ci;
	for (int i = 0; i < 5; i++) {
		Eigen::ArrayXXd Ci_temp = Eigen::ArrayXXd::Zero(FEATURE_SIZE, FEATURE_SIZE);
		std::default_random_engine generator;
		std::normal_distribution<double> distr(0.0, 0.1);
		for (int j = 0; j < Ci_temp.rows(); j++) {
			for (int k = 0; k < Ci_temp.cols(); k++) {
				double d = distr(generator);
				//avoid having 0's as the weight, try to get all the coefficients initialised with small numbers
				while (d == 0.0) {
					d = distr(generator);
				}
				Ci_temp(j, k) = d;
			}
		}
		Ci.push_back(Ci_temp);
	}
	//initialise vi. We use a context of 5 (the postag of the head itself, the postag before the modifier, the postag after the modifier, the postag before the head, and the postag after the head)
	//initialise vn. Both vi and vn are column vectors with all 0's and a single 1, which indicates the POS-tag we desire to get
	//FOR NOW, we ignore the term br^transpose * R^transpose*vn, but find out what it is
	//initialise the bias term bv
	Eigen::ArrayXd bv;
	//print the array of biases into an external file
	std::ofstream final_postag_bias("final_postag_bias.txt"); 
	if (!final_postag_bias.is_open()) {
		std::cout << "fail to create final postag bias file" << std::endl;
	} 
	/*std::map<std::string, int> big_pos_tag_map = get_postag_bias(bv, "distance_count.txt", "fine_pos_tags.txt");
	std::cout << "size of big pos tag map = " << big_pos_tag_map.size() << std::endl;
	for (std::map<std::string, int>::iterator it = big_pos_tag_map.begin(); it != big_pos_tag_map.end(); it++) {
		final_postag_bias << "Pos tag " << it->first << " has bias value " << bv(it->second) << std::endl;
	} */

	/* create a pair between each "target pos tag" with a bias value (tested and correct)*/
	//Eigen::ArrayXd word_bias_pair = Eigen::ArrayXd::Zero(NO_OF_POSTAGS_WITHOUT_DUMMY * NO_OF_DISTANCE);
	int i_3 = -1;
	std::ifstream fine_pos_tags("fine_pos_tags.txt");
	std::string temp_string;
	/*
	while (getline(fine_pos_tags, temp_string)) {
		for (int i = -MAX_DISTANCE; i <= MAX_DISTANCE; i++) {
			if (i != 0) {
				if (i == -49 || i == -45 || i == -40 || i == -35 || i == -30 || i == -25 || i == -20 || i == -15 || i == -10 || i == -5 || i == -4 || i == -3 || i == -2 || i == -1 || i == 1 || i == 2 || i == 3 || i == 4 || i == 5 || i == 6 || i == 11 || i == 16 || i == 21 || i == 26 || i == 31 || i == 36|| i == 41 || i == 46) {
					i_3++;
					std::stringstream sstm;
					sstm << temp_string << i;
					if (big_pos_tag_map.find(sstm.str()) == big_pos_tag_map.end()) {
						std::cout << "ERROR" << std::endl;
					} else {
						int pos_tag_idx = big_pos_tag_map[sstm.str()];
						//word_bias_pair(i_3) = bv(pos_tag_idx);
					}
				}
			}
		}
	} */
	/* end */
	//main training part
	std::map<std::string, int> mapping = postag_mapping("fine_pos_tags.txt");
	fine_pos_tags.close();
	//print out both the small and big mapping
	//std::ofstream big_pos_tag_mapping("big_pos_tag_map.txt");
	std::ofstream small_pos_tag_mapping("small_pos_tag_map.txt");
	/*
	for (std::map<std::string, int>::iterator it = big_pos_tag_map.begin(); it != big_pos_tag_map.end(); it++) {
		big_pos_tag_mapping << it->first << " " << it->second << std::endl;
	} */
	for (std::map<std::string, int>::iterator it = mapping.begin(); it != mapping.end(); it++) {
		small_pos_tag_mapping << it->first << " " << it->second << std::endl;
	}
	small_pos_tag_mapping.close();

	//create the pairs
	std::ifstream pairs ("real_pairs_medium.txt");
	//initialise the gradient matrix
	std::vector<Eigen::MatrixXd> dCi;
	for (int i = 0; i < 5; i++) {
		Eigen::MatrixXd temp = Eigen::MatrixXd::Zero(FEATURE_SIZE, FEATURE_SIZE);
		dCi.push_back(temp);
	}
	Eigen::MatrixXd dR = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd dQ = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	std::ofstream parameters ("parameters_each_epoch_small_trial_1_check_R.txt");
	if (!parameters.is_open()) {
		std::cout << "Error: output file not opening" << std::endl;
	}
	
	//initialise all the temporary variables outside the loop to save time on memory allocation on the heap (expensive)
	//all these temporary variables are reset to zero using .setZero() method at the end of each loop
	Eigen::MatrixXd tempC = Eigen::MatrixXd::Zero(FEATURE_SIZE, FEATURE_SIZE);
	Eigen::MatrixXd temp_R_Model = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd temp_R_Data = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd temp_Q = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd temp_Q_Model = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::MatrixXd temp_Q_Data = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS);
	Eigen::VectorXd temp_Qw = Eigen::VectorXd::Zero(FEATURE_SIZE); 
	//Eigen::VectorXd dbias = Eigen::VectorXd::Zero(NO_OF_POSTAGS_WITHOUT_DUMMY * NO_OF_DISTANCE);
	//Eigen::VectorXd temp_dbias = Eigen::VectorXd::Zero(NO_OF_POSTAGS_WITHOUT_DUMMY * NO_OF_DISTANCE);
	Eigen::VectorXd tempCr = Eigen::VectorXd::Zero(FEATURE_SIZE);
	Eigen::ArrayXd temp_e = Eigen::ArrayXd::Zero(NO_OF_POSTAGS);
	/*
	Eigen::MatrixXd temp_matrix = Eigen::MatrixXd::Zero(NO_OF_POSTAGS, FEATURE_SIZE);
	Eigen::MatrixXd temp_matrix_2 = Eigen::MatrixXd::Zero(NO_OF_POSTAGS, FEATURE_SIZE); */
	//calculate the normalising constant for the first time
	std::ofstream gradient_check_R("gradient_check_R_big.txt");
	Eigen::MatrixXd curr_dR = Eigen::MatrixXd::Zero(FEATURE_SIZE, NO_OF_POSTAGS); //DELETE THIS LATER
	int no_of_iter = 0;
	for (int i = 0; i < NO_OF_EPOCH; i++) { 
		if (!pairs.is_open()) {
			std::cout << "Fail to open input file" << std::endl;
		} else {
			std::vector<std::string> temp;
			std::string line;
			while (getline(pairs, line)) {
				if (line.size() > 1) {
					no_of_iter++;
					temp = split(line, ' ');
					//initialise an array with the elements as the indices of: postag of the modifier, postag of the head, postag of before modifier, postag of after modifier, postag of before head, postag of after head
					int arr_temp[6] = {mapping[temp[0]], mapping[temp[1]], mapping[temp[2]], mapping[temp[3]], mapping[temp[4]], mapping[temp[5]]};
					if (no_of_iter % 10000 == 0) {
						std::cout << no_of_iter << std::endl;
					}
					//calculate the normalising constant
					double normalising_constant = 0.0;
					for (int iter2 = 0; iter2 < 5; iter2++) {
							tempCr += (Ci[iter2].matrix() * R.col(arr_temp[iter2+1]).matrix()); //prediction
					}
					for (int iter = 0; iter < Q.cols(); iter++) {
						double temp = exp((tempCr.transpose() * Q.col(iter).matrix()));//dot product
						temp_e(iter) = temp; 
						normalising_constant += temp; 
					} 					
					//calculate all the gradient changes for each of the Ci, from dCi[0] to dCi[4]
					//this operation is already efficient
					for (int j = 0; j < 5; j++) {
						for (int k = 0; k < Q.cols(); k++) {
							tempC += (temp_e(k) * (Q.col(k).matrix() * R.col(arr_temp[j+1]).transpose().matrix()));
						}
						//normalise
						tempC /= normalising_constant;
						dCi[j] += (Q.col(arr_temp[0]).matrix() * R.col(arr_temp[j+1]).transpose().matrix() - tempC);
						//reset the matrix tempC to prepare to calculate the next dCi[j]
						tempC.setZero();
					}
					
					//calculate dQ VERIFY
					for (int j = 0; j < 5; j++) {
						temp_Qw += (Ci[j].matrix() * R.col(arr_temp[j+1]).matrix());
					}
					for (int j = 0; j < Q.cols(); j++) {
						temp_Q_Model.col(j) += (temp_e(j) * temp_Qw);
					}
					temp_Q_Model /= normalising_constant;
					temp_Q_Data.col(arr_temp[0]) = temp_Qw;
					dQ += (temp_Q_Data - temp_Q_Model);
					temp_Qw.setZero();
					temp_Q_Model.setZero();
					temp_Q_Data.setZero();

					//calculate dR
					//calculate dR with respect to data
					for (int j = 0; j < 5; j++) {
						temp_R_Data.col(arr_temp[j+1]) += Ci[j].transpose().matrix() * Q.col(arr_temp[0]).matrix();
					}
					//calculate dR with respect to model
					for (int k = 0; k < Q.cols(); k++) {
						for (int j = 0; j < 5; j++) {
							temp_R_Model.col(arr_temp[j+1]) += (temp_e(k) * (Ci[j].transpose().matrix() * Q.col(k).matrix()));
						}
					}
					temp_R_Model /= normalising_constant;
					dR += (temp_R_Data - temp_R_Model);
					if (no_of_iter  % 1000 == 0) {
						curr_dR = temp_R_Data - temp_R_Model;
					}
					temp_R_Data.setZero();
					temp_R_Model.setZero();

					//calculate dbias
					//temp_dbias(arr_temp[0]) = 1.0;
					//dbias += (temp_dbias - temp_e.matrix() / normalising_constant);
					//temp_dbias(arr_temp[0]) = 0.0;

					tempCr.setZero();
					temp_e.setZero();

					if (no_of_iter % 1000 == 0) {
							Eigen::VectorXd temp = Eigen::VectorXd::Zero(FEATURE_SIZE);
							gradient_check_R <<  "current data point = " << no_of_iter <<std::endl;
								for (int j = 0; j < R.rows(); j++) {
									for (int k = 0; k < R.cols(); k++) {
										Eigen::ArrayXXd R_copy = R;
										double result_1 = 0.0;
										double result_2 = 0.0;
										double temp_double = 0.0;
										gradient_check_R << curr_dR(j, k) << " ";
										R_copy(j, k) += EPSILON;
										for (int iter2 = 0; iter2 < 5; iter2++) {
												temp += (Ci[iter2].matrix() * R_copy.col(arr_temp[iter2+1]).matrix());
										}
										result_1 += temp.transpose() * Q.col(arr_temp[0]).matrix();
										for (int l = 0; l < Q.cols(); l++) {
											temp_double += exp(temp.transpose() * Q.col(l).matrix());
										}
										result_1 -= log(temp_double);
										temp_double = 0.0;
										//calculate result_2
										R_copy(j, k) -= (2 * EPSILON);
										temp.setZero();
										for (int iter2 = 0; iter2 < 5; iter2++) {
												temp += (Ci[iter2].matrix() * R_copy.col(arr_temp[iter2 + 1]).matrix());	
										}
										result_2 += temp.transpose() * Q.col(arr_temp[0]).matrix();
										for (int l = 0; l < Q.cols(); l++) {
											temp_double += exp(temp.transpose() * Q.col(l).matrix());
										}
										result_2 -= log(temp_double);
										temp.setZero();
										gradient_check_R << (result_1 - result_2) / (2 * EPSILON) << std::endl;

									}
							}
							curr_dR.setZero();
				}

					//update the gradient if no_of_iter mod mini batch size == 0, reset the gradient matrix to 0

					if (no_of_iter % BATCH_SIZE == 0) {
						double curr_learning_rate = LEARNING_RATE / (1 + no_of_iter * LEARNING_DECAY);
						for (int j = 0; j < 5; j++) {
							Ci[j] = Ci[j].matrix() + curr_learning_rate * dCi[j];
							//reset the matrix dCi[j]
							dCi[j].setZero();
						}
						//word_bias_pair = word_bias_pair + curr_learning_rate * dbias.array();
						R = R.matrix() + curr_learning_rate * dR;
						Q = Q.matrix() + curr_learning_rate * dQ;
						dR.setZero();
						dQ.setZero();
						//dbias.setZero();
					} 
				} 
				temp.clear();
			}
			pairs.close();
		}
		parameters << "Epoch number: " << i+1 << std::endl;
		for (int j = 0; j < 5; j++) {
			parameters << "Matrix C" << j+1 << std::endl; 
			parameters << Ci[j] << std::endl;
			parameters << std::endl;
			parameters << "End of matrix C" << j+1 << std::endl;
		}
		parameters << "Matrix R" << std::endl;
		parameters << R << std::endl;
		parameters << "End of matrix R" << std::endl;
		parameters << "Matrix Q" << std::endl;
		parameters << Q << std::endl;
		parameters << "End of matrix Q" << std::endl;
		//parameters << "Bias vector" << std::endl;
		//parameters << word_bias_pair << std::endl;
		//parameters << "End of bias vector" << std::endl;
		pairs.open("real_pairs_medium.txt");
	} 
	pairs.close();
	parameters.close();
	return 0;
}