void GetNext(vcf_file &vfile, uint entry_index, vcf_entry &entry) { Assert (entry_index < vfile.N_entries); string line; vfile.get_vcf_entry(entry_index, line); entry.reset(line); // For the locus bool PARSE_ALT=true; bool PARSE_FILTER=true; bool PARSE_INFO=true; entry.parse_basic_entry(PARSE_ALT, PARSE_FILTER, PARSE_INFO); // For the individuals bool PARSE_FORMAT=true; entry.parse_full_entry(PARSE_FORMAT); }
void vcf_file::return_site_union(vcf_file &file2, map<pair<string, int>, pair<int, int> > &CHROMPOS_to_filepos_pair) { unsigned int s; int POS; string CHROM; string vcf_line; for (s=0; s<N_entries; s++) { if (include_entry[s] == true) { get_vcf_entry(s, vcf_line); vcf_entry e(N_indv, vcf_line); e.parse_basic_entry(); CHROM = e.get_CHROM(); POS = e.get_POS(); CHROMPOS_to_filepos_pair[make_pair<string,int>(CHROM, POS)] = make_pair<int,int>(s, -1); } } for (s=0; s<file2.N_entries; s++) { if (file2.include_entry[s] == true) { file2.get_vcf_entry(s, vcf_line); vcf_entry e(file2.N_indv, vcf_line); e.parse_basic_entry(); CHROM = e.get_CHROM(); POS = e.get_POS(); if (CHROMPOS_to_filepos_pair.find(make_pair<string,int>(CHROM, POS)) != CHROMPOS_to_filepos_pair.end()) { CHROMPOS_to_filepos_pair[make_pair<string,int>(CHROM, POS)].second = s; } else { CHROMPOS_to_filepos_pair[make_pair<string,int>(CHROM, POS)] = make_pair<int,int>(-1, s); } } } }
void vcf_file::output_sites_in_files(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Comparing sites in VCF files...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); string vcf_line; string CHROM; int POS; string output_file = output_file_prefix + ".diff.sites_in_files"; ofstream sites_in_files(output_file.c_str()); sites_in_files << "CHROM\tPOS\tIN_FILE\tREF\tALT1\tALT2" << endl; int s1, s2; int N_common_SNPs = 0, N_SNPs_file1_only=0, N_SNPs_file2_only=0; for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it!=CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; CHROM = CHROMPOS_to_filepos_pair_it->first.first; POS = CHROMPOS_to_filepos_pair_it->first.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } e1.parse_basic_entry(true); e2.parse_basic_entry(true); // Set the reference to the non-missing entry (if available) string REF = e1.get_REF(); string REF2 = e2.get_REF(); if ((REF == "N") || (REF == ".")) REF = REF2; if ((REF2 == "N") || (REF2 == ".")) REF2 = REF; if ((REF != REF2) && (REF2 != "N") && (REF != "N") && (REF != ".") && (REF2 != ".")) warning("Non-matching REF at " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2 + ". Diff results may be unreliable."); sites_in_files << CHROM << "\t" << POS << "\t"; if ((s1 != -1) && (s2 != -1)) { N_common_SNPs++; sites_in_files << "B"; } else if ((s1 != -1) && (s2 == -1)) { N_SNPs_file1_only++; sites_in_files << "1"; } else if ((s1 == -1) && (s2 != -1)) { N_SNPs_file2_only++; sites_in_files << "2"; } else error("SNP in neither file!?"); sites_in_files << "\t" << REF << "\t" << e1.get_ALT() << "\t" << e2.get_ALT() << endl; } sites_in_files.close(); printLOG("Found " + int2str(N_common_SNPs) + " SNPs common to both files.\n"); printLOG("Found " + int2str(N_SNPs_file1_only) + " SNPs only in main file.\n"); printLOG("Found " + int2str(N_SNPs_file2_only) + " SNPs only in second file.\n"); }
void vcf_file::output_switch_error(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Outputting Phase Switch Errors...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); string CHROM, vcf_line; int POS; int s1, s2, indv1, indv2; string output_file = output_file_prefix + ".diff.switch"; ofstream switcherror(output_file.c_str()); if (!switcherror.is_open()) error("Could not open Switch Error file: " + output_file, 4); switcherror << "CHROM\tPOS\tINDV" << endl; unsigned int N_combined_indv = combined_individuals.size(); vector<int> N_phased_het_sites(N_combined_indv, 0); vector<int> N_switch_errors(N_combined_indv, 0); pair<string, string> missing_genotype(".","."); vector<pair<string, string> > prev_geno_file1(N_combined_indv, missing_genotype); vector<pair<string, string> > prev_geno_file2(N_combined_indv, missing_genotype); pair<string, string> file1_hap1, file1_hap2, file2_hap1; for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { CHROM = CHROMPOS_to_filepos_pair_it->first.first; POS = CHROMPOS_to_filepos_pair_it->first.second; s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } e1.parse_basic_entry(true); e2.parse_basic_entry(true); e1.parse_full_entry(true); e1.parse_genotype_entries(true); e2.parse_full_entry(true); e2.parse_genotype_entries(true); pair<string, string> genotype1, genotype2; pair<string, string> missing_genotype(".","."); unsigned int N_common_called=0; // Number of genotypes called in both files unsigned int indv_count=0; // Bug fix applied (#3354189) - July 5th 2011 for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it, indv_count++) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if ((indv1 == -1) || (indv2 == -1)) continue; // Individual not found in one of the files e1.get_indv_GENOTYPE_strings(indv1, genotype1); e2.get_indv_GENOTYPE_strings(indv2, genotype2); if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype)) { // No missing data N_common_called++; if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) || ((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) ) { // Have a matching genotypes in files 1 and 2 if (genotype1.first != genotype1.second) { // It's a heterozgote char phase1, phase2; phase1 = e1.get_indv_PHASE(indv1); phase2 = e2.get_indv_PHASE(indv2); if ((phase1 == '|') && (phase2 == '|')) { // Calculate Phasing error (switch error) N_phased_het_sites[indv_count]++; file1_hap1 = make_pair<string,string>(prev_geno_file1[indv_count].first, genotype1.first); file1_hap2 = make_pair<string,string>(prev_geno_file1[indv_count].second, genotype1.second); file2_hap1 = make_pair<string,string>(prev_geno_file2[indv_count].first, genotype2.first); if ((file2_hap1 != file1_hap1) && (file2_hap1 != file1_hap2)) { // Must be a switch error string indv_id; N_switch_errors[indv_count]++; if (indv1 != -1) indv_id = indv[indv1]; else indv_id = diff_vcf_file.indv[indv2]; switcherror << CHROM << "\t" << POS << "\t" << indv_id << endl; } prev_geno_file1[indv_count] = genotype1; prev_geno_file2[indv_count] = genotype2; } } } } } } switcherror.close(); output_file = output_file_prefix + ".diff.indv.switch"; ofstream idiscord(output_file.c_str()); if (!idiscord.is_open()) error("Could not open Individual Discordance File: " + output_file, 3); idiscord << "INDV\tN_COMMON_PHASED_HET\tN_SWITCH\tSWITCH" << endl; unsigned int indv_count=0; double switch_error; string indv_id; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if (indv1 != -1) indv_id = indv[indv1]; else indv_id = diff_vcf_file.indv[indv2]; if (N_phased_het_sites[indv_count] > 0) switch_error = double(N_switch_errors[indv_count]) / N_phased_het_sites[indv_count]; else switch_error = 0; idiscord << indv_id << "\t" << N_phased_het_sites[indv_count] << "\t" << N_switch_errors[indv_count] << "\t" << switch_error << endl; indv_count++; } idiscord.close(); }
void vcf_file::output_discordance_matrix(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Outputting Discordance Matrix\n\tFor bi-allelic loci, called in both files, with matching alleles only...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); string vcf_line; int s1, s2, indv1, indv2; vector<vector<int> > discordance_matrix(4, vector<int>(4, 0)); for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } e1.parse_basic_entry(true); e2.parse_basic_entry(true); if ((e1.get_N_alleles() != 2) || (e2.get_N_alleles() != 2)) continue; // Set the reference to the non-missing entry (if available) string REF = e1.get_REF(); string REF2 = e2.get_REF(); if (REF == "N") REF = REF2; if (REF2 == "N") REF2 = REF; if (REF.size() != REF2.size()) continue; if ((REF != REF2) && (REF2 != "N") && (REF != "N")) continue; // Do the alternative alleles match? string ALT, ALT2; ALT = e1.get_ALT(); ALT2 = e2.get_ALT(); bool alleles_match = (ALT == ALT2) && (REF == REF2); if (alleles_match == false) continue; e1.parse_full_entry(true); e1.parse_genotype_entries(true); e2.parse_full_entry(true); e2.parse_genotype_entries(true); pair<int,int> geno_ids1, geno_ids2; int N1, N2; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if ((indv1 == -1) || (indv2 == -1)) continue; // Individual not found in one of the files // Alleles match, so can compare ids instead of strings e1.get_indv_GENOTYPE_ids(indv1, geno_ids1); e2.get_indv_GENOTYPE_ids(indv2, geno_ids2); if (((geno_ids1.first != -1) && (geno_ids1.second == -1)) || ((geno_ids2.first != -1) && (geno_ids2.second == -1))) { // Haploid one_off_warning("***Warning: Haploid chromosomes not counted!***"); continue; } N1 = geno_ids1.first + geno_ids1.second; N2 = geno_ids2.first + geno_ids2.second; if ((N1 == -1) || (N1 < -2) || (N1 > 2)) error("Unhandled case"); if ((N2 == -1) || (N2 < -2) || (N2 > 2)) error("Unhandled case"); if (N1 == -2) N1 = 3; if (N2 == -2) N2 = 3; discordance_matrix[N1][N2]++; } } string output_file = output_file_prefix + ".diff.discordance_matrix"; ofstream out(output_file.c_str()); if (!out.is_open()) error("Could not open Discordance Matrix File: " + output_file, 3); out << "-\tN_0/0_file1\tN_0/1_file1\tN_1/1_file1\tN_./._file1" << endl; out << "N_0/0_file2\t" << discordance_matrix[0][0] << "\t" << discordance_matrix[1][0] << "\t" << discordance_matrix[2][0] << "\t" << discordance_matrix[3][0] << endl; out << "N_0/1_file2\t" << discordance_matrix[0][1] << "\t" << discordance_matrix[1][1] << "\t" << discordance_matrix[2][1] << "\t" << discordance_matrix[3][1] << endl; out << "N_1/1_file2\t" << discordance_matrix[0][2] << "\t" << discordance_matrix[1][2] << "\t" << discordance_matrix[2][2] << "\t" << discordance_matrix[3][2] << endl; out << "N_./._file2\t" << discordance_matrix[0][3] << "\t" << discordance_matrix[1][3] << "\t" << discordance_matrix[2][3] << "\t" << discordance_matrix[3][3] << endl; out.close(); }
void vcf_file::output_discordance_by_site(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Outputting Discordance By Site...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); string CHROM, vcf_line; int POS; int s1, s2, indv1, indv2; string output_file = output_file_prefix + ".diff.sites"; ofstream diffsites(output_file.c_str()); if (!diffsites.is_open()) error("Could not open Sites Differences File: " + output_file, 3); //diffsites << "CHROM\tPOS\tFILES\tMATCHING_ALT\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE\tN_FILE1_NONREF_GENOTYPES\tNON_REF_DISCORDANCE" << endl; diffsites << "CHROM\tPOS\tFILES\tMATCHING_ALLELES\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE" << endl; for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { CHROM = CHROMPOS_to_filepos_pair_it->first.first; POS = CHROMPOS_to_filepos_pair_it->first.second; diffsites << CHROM << "\t" << POS; s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); bool data_in_both = true; // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } else data_in_both = false; if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } else data_in_both = false; if (data_in_both) diffsites << "\tB"; else if ((s1 != -1) && (s2 == -1)) diffsites << "\t1"; else if ((s1 == -1) && (s2 != -1)) diffsites << "\t2"; else error("Unhandled condition"); e1.parse_basic_entry(true); e2.parse_basic_entry(true); // Set the reference to the non-missing entry (if available) string REF = e1.get_REF(); string REF2 = e2.get_REF(); if (REF == "N") REF = REF2; if (REF2 == "N") REF2 = REF; if (REF.size() != REF2.size()) { warning("REF sequences at " + CHROM + ":" + int2str(POS) + " are not comparable. Skipping site"); continue; } if ((REF != REF2) && (REF2 != "N") && (REF != "N")) warning("Non-matching REF " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2); // Do the alternative alleles match? string ALT, ALT2; ALT = e1.get_ALT(); ALT2 = e2.get_ALT(); bool alleles_match = ((ALT == ALT2) && (REF == REF2)); diffsites << "\t" << alleles_match; e1.parse_full_entry(true); e1.parse_genotype_entries(true); e2.parse_full_entry(true); e2.parse_genotype_entries(true); pair<string, string> genotype1, genotype2; pair<int,int> geno_ids1, geno_ids2; pair<string, string> missing_genotype(".","."); pair<int, int> missing_id(-1,-1); unsigned int N_common_called=0; // Number of genotypes called in both files unsigned int N_missing_1=0, N_missing_2=0; unsigned int N_discord=0; unsigned int N_concord_non_missing=0; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if ((indv1 == -1) || (indv2 == -1)) continue; // Individual not found in one of the files if (alleles_match) { // Alleles match, so can compare ids instead of strings e1.get_indv_GENOTYPE_ids(indv1, geno_ids1); e2.get_indv_GENOTYPE_ids(indv2, geno_ids2); if ((geno_ids1 != missing_id) && (geno_ids2 != missing_id)) { N_common_called++; if (((geno_ids1.first == geno_ids2.first) && (geno_ids1.second == geno_ids2.second)) || ((geno_ids1.first == geno_ids2.second) && (geno_ids1.second == geno_ids2.first)) ) { // Match N_concord_non_missing++; } else { // Mismatch N_discord++; } } else if ((geno_ids1 == missing_id) && (geno_ids2 == missing_id)) { // Both missing N_missing_1++; N_missing_2++; } else if (geno_ids1 != missing_id) { // Genotype 1 is not missing, genotype 2 is. N_missing_2++; } else if (geno_ids2 != missing_id) { // Genotype 2 is not missing, genotype 1 is. N_missing_1++; } else error("Unknown condition"); } else { // Alleles don't match, so need to be more careful and compare strings e1.get_indv_GENOTYPE_strings(indv1, genotype1); e2.get_indv_GENOTYPE_strings(indv2, genotype2); if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype)) { // No missing data N_common_called++; if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) || ((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) ) { // Match N_concord_non_missing++; } else { // Mismatch N_discord++; } } else if ((genotype1 == missing_genotype) && (genotype2 == missing_genotype)) { // Both missing N_missing_1++; N_missing_2++; } else if (genotype1 != missing_genotype) { // Genotype 1 is not missing, genotype 2 is. N_missing_2++; } else if (genotype2 != missing_genotype) { // Genotype 2 is not missing, genotype 1 is. N_missing_1++; } else error("Unknown condition"); } } double discordance = N_discord / double(N_common_called); diffsites << "\t" << N_common_called << "\t" << N_discord << "\t" << discordance; diffsites << endl; } diffsites.close(); }
void vcf_file::output_discordance_by_indv(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Outputting Discordance By Individual...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); map<string, pair<int, int> > indv_sums; string vcf_line, CHROM; int POS; int s1, s2, indv1, indv2; for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { CHROM = CHROMPOS_to_filepos_pair_it->first.first; POS = CHROMPOS_to_filepos_pair_it->first.second; s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } e1.parse_basic_entry(true); e2.parse_basic_entry(true); // Set the reference to the non-missing entry (if available) string REF = e1.get_REF(); string REF2 = e2.get_REF(); if (REF == "N") REF = REF2; if (REF2 == "N") REF2 = REF; if (REF.size() != REF2.size()) { warning("REF sequences at " + CHROM + ":" + int2str(POS) + " are not comparable. Skipping site"); continue; } if ((REF != REF2) && (REF2 != "N") && (REF != "N")) warning("Non-matching REF " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2); // Do the alternative alleles match? string ALT, ALT2; ALT = e1.get_ALT(); ALT2 = e2.get_ALT(); bool alleles_match = (ALT == ALT2) && (REF == REF2); e1.parse_full_entry(true); e1.parse_genotype_entries(true); e2.parse_full_entry(true); e2.parse_genotype_entries(true); pair<string, string> genotype1, genotype2; pair<int,int> geno_ids1, geno_ids2; pair<string, string> missing_genotype(".","."); pair<int, int> missing_id(-1,-1); for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if ((indv1 == -1) || (indv2 == -1)) continue; // Individual not found in one of the files if (alleles_match) { // Alleles match, so can compare ids instead of strings e1.get_indv_GENOTYPE_ids(indv1, geno_ids1); e2.get_indv_GENOTYPE_ids(indv2, geno_ids2); if ((geno_ids1 != missing_id) && (geno_ids2 != missing_id)) { indv_sums[combined_individuals_it->first].first++; if (((geno_ids1.first == geno_ids2.first) && (geno_ids1.second == geno_ids2.second)) || ((geno_ids1.first == geno_ids2.second) && (geno_ids1.second == geno_ids2.first)) ) { // Match // Don't do anything } else { // Mismatch indv_sums[combined_individuals_it->first].second++; } } else if ((geno_ids1 == missing_id) && (geno_ids2 == missing_id)) { // Both missing // Don't do anything. } else if (geno_ids1 != missing_id) { // Genotype 1 is not missing, genotype 2 is. // Don't do anything. } else if (geno_ids2 != missing_id) { // Genotype 2 is not missing, genotype 1 is. // Don't do anything. } else error("Unknown condition"); } else { // Alleles don't match, so need to be more careful and compare strings e1.get_indv_GENOTYPE_strings(indv1, genotype1); e2.get_indv_GENOTYPE_strings(indv2, genotype2); if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype)) { // No missing data indv_sums[combined_individuals_it->first].first++; if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) || ((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) ) { // Match // Don't do anything } else { // Mismatch indv_sums[combined_individuals_it->first].second++; } } else if ((genotype1 == missing_genotype) && (genotype2 == missing_genotype)) { // Both missing // Don't do anything } else if (genotype1 != missing_genotype) { // Genotype 1 is not missing, genotype 2 is. // Don't do anything } else if (genotype2 != missing_genotype) { // Genotype 2 is not missing, genotype 1 is. // Don't do anything } else error("Unknown condition"); } } } string output_file = output_file_prefix + ".diff.indv"; ofstream out(output_file.c_str()); if (!out.is_open()) error("Could not open Sites Differences File: " + output_file, 3); out << "INDV\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE" << endl; int N, N_discord; double discordance; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { out << combined_individuals_it->first; N = indv_sums[combined_individuals_it->first].first; N_discord = indv_sums[combined_individuals_it->first].second; discordance = N_discord / double(N); out << "\t" << N << "\t" << N_discord << "\t" << discordance << endl; } out.close(); }