Beispiel #1
0
void printRunNode(RunNode *node, enum ScoreMetric metric, Markov *markov, Dataset *data, Zoops *zoops) {
	if(DEBUG0) {
		assert(node->countmat->span == node->pswm->span);
	}

	printf("All of the following scores are computed after EM (if applicable).\n");
	if(metric == ILR) {
		//ILR from PWM
		double ilr_pwm = computeIlr(markov, data, node->pswm->mat, node->pswm->span);
		printf("Log Markovian-ILR of PWM: %.2lf\n", ilr_pwm);


		if(DEBUG0) {
			assert(fabs(ilr_pwm - node->score) < 0.00000001);
		}
	}

	//ILR from sites
	double ilr_sites = computeIlr(markov, data, zoops, node->sites, node->countmat->span);
	printf("Log Markovian-ILR generated from sites without pseudocount: %.2lf\n", ilr_sites);

	//CLR from sites
	double entscore= computeEntropy(markov, data, zoops, node->sites, node->countmat->span);
	printf("Log Markovian-CLR generated from sites without pseudocount: %.2lf\n", entscore);

	printf("Log(CLR) normalized by number of sequences: %.4lf\n", entscore / data->numseqs);

	//Score that was used as the best run
	printf("Score for ranking runs: %.2lf\n", node->score);
	printf("\n");

	//printf("PWM after EM:\n");
	//printProfile(stdout, node->pswm); //hopefully, no one screwed around with this matrix
	//printf("\n");

	//compute scores for each site
	double *scorePerSite = (double*) malloc(sizeof(double) * data->numseqs);
	for(int i = 0; i < data->numseqs; i++) {
		if(node->sites[i] >= 0) {
			scorePerSite[i] = computeEntropyPerSite(markov, data, zoops, node->sites, i, node->countmat->span);
		}
		else {
			scorePerSite[i] = NAN;
		}
	}

	printCountmatAndSites(node->countmat, node->sites, scorePerSite, data);

	free(scorePerSite);
}
void Rnn::lineSearch() {
  // saving the model
  Model modelSave(model_);

  double wordEntropy = 0.0;
  double charEntropy = 0.0;

  for (int i=0; i<20; i++) {
    model_.update(0.001);
    computeEntropy(wordEntropy, charEntropy);
    printf("%8.3f ", model_.alpha_ * wordEntropy
        + (1.0 - model_.alpha_) * charEntropy);
  }
  printf("\n");

  // returning to original model
  model_.copy(modelSave);
}
Beispiel #3
0
/** ***************************************************************************
 * Metoda vybere nejvhodnější odhad řešení. Nejvodnější řešení jeurčeno pomocí
 * výpočtu entropie. Odhadujeme tedy řešení, které nám přenese nejvíce nové infomace.
 * Proto je odhadnuto řešení s maximáln entropií.
 * @brief EntropySolver::nextTry
 * @return vektor odhadnutého řešení
 */
std::vector<unsigned int> EntropySolver::nextTry(){
   /* if(first == true){ //poprvé zvol náhodný odhad
        first = false;
        lastSolution = solutions.at(rand() % solutions.size());
        return lastSolution;
    }*/ //špatný přístup k randomizaci začátku

    //pro včechny řešení propočti entropii a hledej maximum
    unsigned int indexOfMaximumEntropy = 0;
    double maximumEntropy = 0.0;

    for(unsigned int i = 0; i < solutions.size(); ++i){
        double tmpEntropy = computeEntropy(solutions.at(i));
        if(tmpEntropy > maximumEntropy){
            maximumEntropy = tmpEntropy;
            indexOfMaximumEntropy = i;
        }
    }

    //vyber to s nejvyšší hodnotou
    lastSolution = solutions.at(indexOfMaximumEntropy);
    return lastSolution;
}
void Rnn::gradientCheck() {
  // saving the model
  Model modelSave(model_);
  // computing initial cost
  double initWordEntropy = 0.0;
  double initCharEntropy = 0.0;
  computeEntropy(initWordEntropy, initCharEntropy);
  double initEntropy = model_.alpha_ * initWordEntropy
      + (1.0 - model_.alpha_) * initCharEntropy;
  // printf("%8.3f\n", initEntropy);

  // storage for linearization and difference
  int nSteps = 30;
  double* linearization = new double[nSteps];
  double* difference = new double[nSteps];

  double maxPow = 2;
  double minPow = -7;
  double c = pow(10.0, (maxPow - minPow) / (nSteps-1));
  double t = - minPow / log10(c);

  double wordEntropy = 0.0;
  double charEntropy = 0.0;
  // model_.pickDeltas();

  for (int j=0; j<4; j++) {
    // pick random direction
    model_.copy(modelSave);
    model_.pickDeltas();

    for (int i=0; i<nSteps; i++) {
      double gamma = pow(c, (double)i-t);
      model_.addDeltas(gamma);
      computeEntropy(wordEntropy, charEntropy);
      double entropy = model_.alpha_ * wordEntropy
          + (1.0 - model_.alpha_) * charEntropy;
      difference[i] = entropy - initEntropy;
      linearization[i] = gamma * model_.gradTDelta();
      model_.addDeltas(-gamma);
    }

    for (int i=0; i<nSteps; i++) {
      printf("%+10.2e ", linearization[i]);
    }
    std::cout << std::endl;
    for (int i=0; i<nSteps; i++) {
      printf("%+10.2e ", difference[i]);
    }
    std::cout << std::endl;
    for (int i=0; i<nSteps; i++) {
      printf("%+10.3f ", difference[i] / linearization[i]);
    }
    // std::cout << std::endl;
    std::cout << std::endl;
  }

  delete[] linearization;
  delete[] difference;

  // returning to original model
  model_.copy(modelSave);
  computeEntropy(wordEntropy, charEntropy);
  // checking that the entropy is the same as at the beginning
}
Beispiel #5
0
 /** Get entropy for the i-th chunk in normalized range [0; 1[ */
 static double getChunkEntropy(const Chunk * chunk) { return chunk ? computeEntropy(chunk->data, chunk->size) / 8.0 : 1.0; }
Beispiel #6
0
 /** Get this multichunk's entropy in a normalized range [0; 1[ */
 double getEntropy() const { return computeEntropy(chunkArray.getConstBuffer(), chunkArray.getSize()) / 8.0; }