コード例 #1
0
ファイル: Genome.cpp プロジェクト: jcnossen/hmmgenefinder
int Genome::CountGenes( int start, int finish )
{
	int cnt = 0;
	for(int cgI=1;cgI<=genes.size();++cgI)
		if (min(genes(cgI)->indices) >= start && max(genes(cgI)->indices) <= finish)
			cnt = cnt + 1;
	return cnt;
}
コード例 #2
0
ファイル: dna.cpp プロジェクト: maleadt/genetic
// Insert a gene
bool DNA::insert_gene(unsigned int index, unsigned char* gene, unsigned int size) {
    unsigned int amountgenes = genes();
    assert(index < amountgenes);

    // Case 1: gene at start
    if (index == 0) {
        if (amountgenes > 1) {
            unsigned char* gene_mod = (unsigned char*) malloc((size+1) * sizeof(unsigned char));
            memcpy(gene_mod, gene, size);
            gene_mod[size] = 0;
            insert(0, gene_mod, size+1);
            free(gene_mod);
        } else {
            dataGenes = (unsigned char*) malloc(size * sizeof(unsigned char));
            memcpy(dataGenes, gene, size);
            dataSize = size;
        }
    }

    // Case 2: gene at midst
    else {
        unsigned char* gene_mod = (unsigned char*) malloc((size+1) * sizeof(unsigned char));
        memcpy(gene_mod, gene, size);
        gene_mod[size] = 0;

        unsigned int i_self = gene_start(index);
        insert(i_self, gene_mod, size+1);
        free(gene_mod);
    }

    return true;
}
コード例 #3
0
ファイル: dna.cpp プロジェクト: maleadt/genetic
// Erase a gene
bool DNA::erase_gene(unsigned int index) {
    unsigned int amountgenes = genes();
    assert(index < amountgenes);
    
    // Case 1: gene at start
    if (index == 0) {
        if (amountgenes > 1) {
            unsigned int i_next = gene_start(1);
            erase(0, i_next);
        } else {
            free(dataGenes);
            dataGenes = 0;
            dataSize = 0;
        }
    }

    // Case 2: gene at midst
    else if (index < amountgenes-1) {
        unsigned int i_self = gene_start(index);
        unsigned int i_next = gene_start(index+1);
        erase(i_self, i_next);
    }

    // Case 3: gene at end
    else {
        if (amountgenes > 1) {
            unsigned int i_prev = gene_end(index-1);
            erase(i_prev, dataSize);
        } else {
            free(dataGenes);
            dataSize = 0;
        }
    }
    return true;
}
コード例 #4
0
ファイル: Genome.cpp プロジェクト: jcnossen/hmmgenefinder
// Returns a boolean value - tells us if we can cut the genome at given
// position.
bool Genome::CanCut(int location) {
	for(int i=1;i<=genes.size();i++) {
		Feature* g = genes(i);
		if (min(g->indices) <= location && location <= max(g->indices))
			return false;
	}
	return true;
}
コード例 #5
0
ファイル: dna.cpp プロジェクト: maleadt/genetic
void DNA::debug() const
{
	// Debug message
	std::cout << "* DNA.debug" << std::endl;

	// Process chararray
	std::cout << "Contents of DNA object (" << genes() << " genes): " << std::endl;
        for (unsigned int i = 0; i < genes(); i++) {
            std::cout << "\tgene " << i+1 << ":";
            unsigned int start = gene_start(i);
            unsigned int end = gene_end(i);
            while (start < end) {
                std::cout << " 0x" << std::hex << std::setfill('0') << std::setw(2) << (int)dataGenes[start++] << std::dec;
            }
            std::cout << std::endl;
        }
}
コード例 #6
0
ファイル: dna.cpp プロジェクト: maleadt/genetic
// Extract a gene
bool DNA::extract_gene(unsigned int index, unsigned char*& gene, unsigned int& size) const {
    unsigned int amountgenes = genes();
    assert(index < amountgenes);
    
    unsigned int i_start = gene_start(index);
    unsigned int i_end = gene_end(index);
    size = i_end-i_start;
    extract(i_start, i_end, gene);

    return true;
}
コード例 #7
0
ファイル: dna.cpp プロジェクト: maleadt/genetic
// Replace a gene
bool DNA::replace_gene(unsigned int index, unsigned char* gene, unsigned int size) {
    unsigned int amountgenes = genes();
    assert(index < amountgenes);

    if (!erase_gene(index))
        return false;
    if (index < amountgenes-1)
        return insert_gene(index, gene, size);
    else
        return push_back_gene(gene, size);
}
コード例 #8
0
  RankTransformer(const std::string& aMatrixDir, const std::string& aOutputfile,
                  bool aQuantileNormalisation = false, bool aUseInverse = false,
                  bool aScramble = false)
    : mMatrixDir(aMatrixDir), mOutputFile(NULL), mData(NULL), mBuf(NULL),
      mRanks(NULL), mInvRanks(NULL), mRankAvgs(NULL), mRankCounts(NULL),
      mQuantileNormalisation(aQuantileNormalisation), mUseInverse(aUseInverse),
      mScramble(aScramble)
  {
    mOutputFile = fopen(aOutputfile.c_str(), "w");

    fs::path data(mMatrixDir);
    if (mUseInverse)
      data /= "inverse_data";
    else
      data /= "data";

    mData = fopen(data.string().c_str(), "r");

    fs::path genes(mMatrixDir);
    // Ugly hack: if we are using the inverted data, we simply swap out the
    // list of genes for the list of arrays, so that nGenes is actually the
    // number of arrays. This means that the normalisation occurs as normal,
    // except for each gene across arrays instead of for each array across
    // genes.
    if (mUseInverse)
      genes /= "arrays";
    else
      genes /= "genes";
    nGenes = 0;
    std::ifstream gs(genes.string().c_str());
    while (gs.good())
    {
      std::string l;
      std::getline(gs, l);
      if (!gs.good())
        break;

      nGenes++;
    }

    mBuf = new double[nGenes];
    mRanks = new double[nGenes];
    if (mQuantileNormalisation)
    {
      mRankAvgs = new double[nGenes];
      mRankCounts = new uint32_t[nGenes];
      memset(mRankAvgs, 0, sizeof(double) * nGenes);
      memset(mRankCounts, 0, sizeof(uint32_t) * nGenes);
    }
    mInvRanks = new uint32_t[nGenes];

    processAllData();
  }
コード例 #9
0
ファイル: dna.cpp プロジェクト: maleadt/genetic
// Add a gene
bool DNA::push_back_gene(unsigned char* gene, unsigned int size) {
    unsigned int amountgenes = genes();

    if (amountgenes == 0)
        push_back(gene, size);
    else {
        unsigned char* gene_mod = (unsigned char*) malloc((size+1) * sizeof(unsigned char));
        memcpy(gene_mod+1, gene, size);
        gene_mod[0] = 0;
        push_back(gene_mod, size+1);
        free(gene_mod);
    }
    
    return true;
}
コード例 #10
0
ファイル: Genome.cpp プロジェクト: jcnossen/hmmgenefinder
mvec<Genome*> Genome::Split( float wanted_ratio, int impTh )
{
	/*
	% For each gene go and try to divide the genome after each gene, make
	% sure we do not cut any genes in the middle and select the ratio
	% closest to 0.5 */
	float best_ratio = FLT_MAX;
	int best_position = 0;
	int n = genes.size();

	int last_impI = 0;
	int last_impJ = 0;
	int i = (int)(n * wanted_ratio + 0.5f) - 1;
	int j = (int)(n * wanted_ratio + 0.5f);
	while ((i > 0 && last_impI < impTh) || (j < n && last_impJ < impTh))
	{
		if (i > 0 && last_impI < impTh) {
			Feature* cur = genes(i);
			Feature* next = genes(i + 1);
			int cur_end = max(cur->indices);
			int next_start = min(next->indices);
			int middle = round(0.5 * (cur_end+next_start));
			if (CanCut(middle)) {
				float ratio = CountGenes(1, middle) / (float)n;
				if (fabsf(ratio - wanted_ratio) < fabsf(best_ratio - wanted_ratio)) {
					best_ratio = ratio;
					d_trace("[+] (%d) New best ratio attained - %f\n", i, best_ratio);
					best_position = middle;
					last_impI = 0;
				} else {
					last_impI = last_impI + 1;
				}
			}
		}
		i--;

		if (j < n && last_impJ < impTh) {
			Feature* cur = genes(j);
			Feature* next = genes(j + 1);
			int cur_end = max(cur->indices);
			int next_start = min(next->indices);
			int middle = round(0.5 * (cur_end+next_start));
			if (CanCut(middle)) {
				float ratio = CountGenes(1, middle) / (float) n;
				if (fabsf(ratio - wanted_ratio) < fabsf(best_ratio - wanted_ratio)) {
					best_ratio = ratio;
					d_trace("[+] (%d) New best ratio attained - %f\n", i, best_ratio);
					best_position = middle;
					last_impJ = 0;
				} else {
					last_impJ = last_impJ + 1;
				}
			}
		}
		j++;
	}
// 		% BTW, this works only coz the genes are sorted in incresing order of
// 		% their lower index (lower != first)

	d_trace("[i] Cutting sequence at %d\n", best_position); 
	mvec<Genome*> r;
	r.push_back(GetSubset(1, best_position)); // train
//	train.Sequence = g.Sequence(1:best_position);
	//train.gene = get_all_genes(f, 1, best_position);
	r.push_back(GetSubset(best_position + 1, sequence.size()));
// 	test.Sequence = g.Sequence(best_position + 1:seq_length);
// 	test.gene = get_all_genes(f, best_position + 1, seq_length);
// 	test.gene = shift_genes(test.gene, best_position);
	return r;
}
コード例 #11
0
ファイル: merz1999.cpp プロジェクト: MQLib/MQLib
Merz1999Solution::Merz1999Solution(const QUBOInstance& qi,
                                   const Merz1999Solution& parent_a,
                                   const Merz1999Solution& parent_b,
                                   QUBOHeuristic *heuristic) :
  QUBOSolution(parent_a) {
  // Implements the HUX cross over with restricted local search
  // Store the bits which were identical with parents before commencing local search.
  std::vector<bool> parents_identical(N_, false);
  // http://en.wikipedia.org/wiki/Crossover_(genetic_algorithm)#Uniform_Crossover_and_Half_Uniform_Crossover
  //   In the half uniform crossover scheme (HUX), exactly half of the 
  //   nonmatching bits are swapped. Thus first the Hamming distance (the
  //   number of differing bits) is calculated. This number is divided by two.
  //   The resulting number is how many of the bits that do not match between
  //   the two parents will be swapped.
  int half_hamming_distance = parent_a.SymmetricDifference(parent_b) / 2;
  std::vector<int> genes(N_, 0);
  for (int i = 0; i < N_; i++)
    genes[i] = i;
  // Pick random ordering of genes
  std::random_shuffle(genes.begin(), genes.end());
  int left_to_swap = half_hamming_distance;
  const std::vector<int>& a_genes = parent_a.get_assignments();
  const std::vector<int>& b_genes = parent_b.get_assignments();
  for (int pos = 0; pos < N_; pos++) {
    int i = genes[pos];
    if (a_genes[i] == b_genes[i]) {
      // Parents are the same in this gene
      parents_identical[i] = true;
    } else {
      // Parents are different in this gene
      // The wikipedia page is confusing because it talks about two children
      // But the paper only has one
      // So, by default we'll take the father
      if (left_to_swap > 0) {
	UpdateCutValues(i);
	--left_to_swap;
      }
    }
  }

  // We now have some combination of the parents, now do the local search
  // PAPER:  The local search applied to the resulting offspring after
  //         recombination is restricted to a region of the search space
  //         defined by the two parents: the genes with equal values in 
  //         the two parents are not modified during local search.
  while (1) {
    double best_move = 0.0;
    int best_pos = -1;
    for (int i=0; i < N_; ++i) {
      if (parents_identical[i]) continue;  // Only modification
      if (diff_weights_[i] > best_move) {
        best_move = diff_weights_[i];
        best_pos = i;
      }
    }
    if (best_pos < 0 || !ImprovingMove(best_pos)) {
      // No more profitable moves
      break;
    }
    
    // Update the diff_weights_ variable and objective
    UpdateCutValues(best_pos);
  }
}
コード例 #12
0
ファイル: dna.cpp プロジェクト: maleadt/genetic
// Returns the index of the last data part of a gene (exclusive)
// NOTE: this value is not guaranteed to be accessible!
unsigned int DNA::gene_end(unsigned int index) const {
    if (index < genes()-1)
        return separator(index+1);
    else
        return dataSize;
}