int GetNaa(double cutoff, int type) { vector<int> tmp = GetSubset(cutoff,type); int naa = 0; for (int i=0; i<Nstate; i++) { naa += tmp[i]; } return naa; }
void ToStreamSummary(ostream& os,double c, int type) { os << weight << '\t'; vector<int> tmp = GetSubset(c,type); for (int i=0; i<Nstate; i++) { if (tmp[i]) { os << AAset[i]; } } os << '\t' << GetMaxMinor(tmp); os << '\n'; }
void AddAllCombinations(const vector<int>& S, size_t Length, vector<vector<int>>& Result) { vector<int> combination(Length, 0); vector<bool> selection(S.size(), false); for (size_t i = 0; i < Length; ++i) { selection[i] = true; } do { Result.push_back(GetSubset(S, selection)); } while (NextSelection(selection)); }
Genome* Genome::GetSubsetByGeneIndex( int first, int last ) { Feature* f = genes[first]; Feature* l = genes[last]; return GetSubset(min(f->indices), max(l->indices)+1); }
mvec<Genome*> Genome::Split( float wanted_ratio, int impTh ) { /* % For each gene go and try to divide the genome after each gene, make % sure we do not cut any genes in the middle and select the ratio % closest to 0.5 */ float best_ratio = FLT_MAX; int best_position = 0; int n = genes.size(); int last_impI = 0; int last_impJ = 0; int i = (int)(n * wanted_ratio + 0.5f) - 1; int j = (int)(n * wanted_ratio + 0.5f); while ((i > 0 && last_impI < impTh) || (j < n && last_impJ < impTh)) { if (i > 0 && last_impI < impTh) { Feature* cur = genes(i); Feature* next = genes(i + 1); int cur_end = max(cur->indices); int next_start = min(next->indices); int middle = round(0.5 * (cur_end+next_start)); if (CanCut(middle)) { float ratio = CountGenes(1, middle) / (float)n; if (fabsf(ratio - wanted_ratio) < fabsf(best_ratio - wanted_ratio)) { best_ratio = ratio; d_trace("[+] (%d) New best ratio attained - %f\n", i, best_ratio); best_position = middle; last_impI = 0; } else { last_impI = last_impI + 1; } } } i--; if (j < n && last_impJ < impTh) { Feature* cur = genes(j); Feature* next = genes(j + 1); int cur_end = max(cur->indices); int next_start = min(next->indices); int middle = round(0.5 * (cur_end+next_start)); if (CanCut(middle)) { float ratio = CountGenes(1, middle) / (float) n; if (fabsf(ratio - wanted_ratio) < fabsf(best_ratio - wanted_ratio)) { best_ratio = ratio; d_trace("[+] (%d) New best ratio attained - %f\n", i, best_ratio); best_position = middle; last_impJ = 0; } else { last_impJ = last_impJ + 1; } } } j++; } // % BTW, this works only coz the genes are sorted in incresing order of // % their lower index (lower != first) d_trace("[i] Cutting sequence at %d\n", best_position); mvec<Genome*> r; r.push_back(GetSubset(1, best_position)); // train // train.Sequence = g.Sequence(1:best_position); //train.gene = get_all_genes(f, 1, best_position); r.push_back(GetSubset(best_position + 1, sequence.size())); // test.Sequence = g.Sequence(best_position + 1:seq_length); // test.gene = get_all_genes(f, best_position + 1, seq_length); // test.gene = shift_genes(test.gene, best_position); return r; }