コード例 #1
0
static bool canMerge(const ConcurrentDSU &uf, unsigned x, unsigned y) {
  size_t szx = uf.set_size(x), szy = uf.set_size(y);
  const size_t hardthr = 2500;

  // Global threshold - no cluster larger than hard threshold
  if (szx + szy > hardthr)
    return false;

  // If one of the clusters is moderately large, than attach "almost" singletons
  // only.
  if ((szx > hardthr * 3 / 4 && szy > 50) ||
      (szy > hardthr * 3 / 4 && szx > 50))
    return false;

  return true;
}
コード例 #2
0
static void processBlockQuadratic(ConcurrentDSU  &uf,
                                  const std::vector<size_t> &block,
                                  const KMerData &data,
                                  unsigned tau) {
  size_t blockSize = block.size();
  for (size_t i = 0; i < blockSize; ++i) {
    unsigned x = (unsigned)block[i];
    hammer::KMer kmerx = data.kmer(x);
    for (size_t j = i + 1; j < blockSize; j++) {
      unsigned y = (unsigned)block[j];
      hammer::KMer kmery = data.kmer(y);
      if (uf.find_set(x) != uf.find_set(y) &&
          canMerge(uf, x, y) &&
          hamdistKMer(kmerx, kmery, tau) <= tau) {
        uf.unite(x, y);
      }
    }
  }
}
コード例 #3
0
	Clusterization CostructClusters() {
        InitializeReadSequenceMap();
#pragma omp parallel for
		for(size_t i = 0; i < reads_.size(); i++) {
			auto candidates = read_index_.GetCandidatesFor(i);
            for(auto it = candidates.begin(); it != candidates.end(); it++) {
                size_t cluster1 = clusters_.find_set(i);
                size_t cluster2 = clusters_.find_set(*it);
				if(cluster1 != cluster2) {
                    string read_seq1 = read_seq_map_[cluster1];
                    string read_seq2 = read_seq_map_[cluster2];
                    auto comparison_result = seq_comparer_.SequencesMatch(read_seq1, read_seq2);
					if(comparison_result.match) {
						clusters_.unite(i, *it);
                        string superstring = GetSuperString(read_seq1, read_seq2, comparison_result);
                        size_t new_cluster = clusters_.find_set(i);
                        read_seq_map_[new_cluster] = superstring;
					}
				}
			}
		}
		cout << clusters_.num_sets() << " clusters were constructed" << endl;

		Clusterization result(reads_);
		for(size_t i = 0; i < reads_.size(); i++)
			result.Add(i, clusters_.find_set(i));

        for(auto it = read_seq_map_.begin(); it != read_seq_map_.end(); it++) {
            size_t cluster_id = clusters_.find_set(it->first);
            if(it->first == cluster_id) {
                result.AddClusterSequence(cluster_id, it->second, clusters_.set_size(cluster_id));
            }
        }

        assert(result.ClustersSize() == clusters_.num_sets());

		return result;

	}
コード例 #4
0
static bool canMerge(const ConcurrentDSU &uf, unsigned x, unsigned y) {
  return (uf.set_size(x) + uf.set_size(y)) < 10000;
}