void GetNext(vcf_file &vfile, uint entry_index, vcf_entry &entry)
{
    Assert (entry_index < vfile.N_entries);

    string line;
    vfile.get_vcf_entry(entry_index, line);
    entry.reset(line);

    // For the locus
    bool PARSE_ALT=true;
    bool PARSE_FILTER=true;
    bool PARSE_INFO=true;
    entry.parse_basic_entry(PARSE_ALT, PARSE_FILTER, PARSE_INFO);

    // For the individuals
    bool PARSE_FORMAT=true;
    entry.parse_full_entry(PARSE_FORMAT);
}
Example #2
0
void vcf_file::return_site_union(vcf_file &file2, map<pair<string, int>, pair<int, int> > &CHROMPOS_to_filepos_pair)
{
	unsigned int s;
	int POS;
	string CHROM;
	string vcf_line;
	for (s=0; s<N_entries; s++)
	{
		if (include_entry[s] == true)
		{
			get_vcf_entry(s, vcf_line);
			vcf_entry e(N_indv, vcf_line);
			e.parse_basic_entry();

			CHROM = e.get_CHROM();
			POS = e.get_POS();

			CHROMPOS_to_filepos_pair[make_pair<string,int>(CHROM, POS)] = make_pair<int,int>(s, -1);
		}
	}
	for (s=0; s<file2.N_entries; s++)
	{
		if (file2.include_entry[s] == true)
		{
			file2.get_vcf_entry(s, vcf_line);
			vcf_entry e(file2.N_indv, vcf_line);
			e.parse_basic_entry();

			CHROM = e.get_CHROM();
			POS = e.get_POS();

			if (CHROMPOS_to_filepos_pair.find(make_pair<string,int>(CHROM, POS)) != CHROMPOS_to_filepos_pair.end())
			{
				CHROMPOS_to_filepos_pair[make_pair<string,int>(CHROM, POS)].second = s;
			}
			else
			{
				CHROMPOS_to_filepos_pair[make_pair<string,int>(CHROM, POS)] = make_pair<int,int>(-1, s);
			}
		}
	}
}
Example #3
0
void vcf_file::output_sites_in_files(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Comparing sites in VCF files...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	string vcf_line;
	string CHROM;
	int POS;

	string output_file = output_file_prefix + ".diff.sites_in_files";
	ofstream sites_in_files(output_file.c_str());
	sites_in_files << "CHROM\tPOS\tIN_FILE\tREF\tALT1\tALT2" << endl;

	int s1, s2;
	int N_common_SNPs = 0, N_SNPs_file1_only=0, N_SNPs_file2_only=0;
	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it!=CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		CHROM = CHROMPOS_to_filepos_pair_it->first.first;
		POS = CHROMPOS_to_filepos_pair_it->first.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		// Set the reference to the non-missing entry (if available)
		string REF = e1.get_REF();
		string REF2 = e2.get_REF();
		if ((REF == "N") || (REF == "."))
			REF = REF2;
		if ((REF2 == "N") || (REF2 == "."))
			REF2 = REF;

		if ((REF != REF2) && (REF2 != "N") && (REF != "N") && (REF != ".") && (REF2 != "."))
			warning("Non-matching REF at " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2 + ". Diff results may be unreliable.");

		sites_in_files << CHROM << "\t" << POS << "\t";
		if ((s1 != -1) && (s2 != -1))
		{
			N_common_SNPs++;
			sites_in_files << "B";
		}
		else if ((s1 != -1) && (s2 == -1))
		{
			N_SNPs_file1_only++;
			sites_in_files << "1";
		}
		else if ((s1 == -1) && (s2 != -1))
		{
			N_SNPs_file2_only++;
			sites_in_files << "2";
		}
		else
			error("SNP in neither file!?");

		sites_in_files << "\t" << REF << "\t" << e1.get_ALT() << "\t" << e2.get_ALT() << endl;
	}

	sites_in_files.close();

	printLOG("Found " + int2str(N_common_SNPs) + " SNPs common to both files.\n");
	printLOG("Found " + int2str(N_SNPs_file1_only) + " SNPs only in main file.\n");
	printLOG("Found " + int2str(N_SNPs_file2_only) + " SNPs only in second file.\n");
}
Example #4
0
void vcf_file::output_switch_error(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Outputting Phase Switch Errors...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	string CHROM, vcf_line;
	int POS;
	int s1, s2, indv1, indv2;

	string output_file = output_file_prefix + ".diff.switch";
	ofstream switcherror(output_file.c_str());
	if (!switcherror.is_open())
		error("Could not open Switch Error file: " + output_file, 4);
	switcherror << "CHROM\tPOS\tINDV" << endl;

	unsigned int N_combined_indv = combined_individuals.size();
	vector<int> N_phased_het_sites(N_combined_indv, 0);
	vector<int> N_switch_errors(N_combined_indv, 0);

	pair<string, string> missing_genotype(".",".");
	vector<pair<string, string> > prev_geno_file1(N_combined_indv, missing_genotype);
	vector<pair<string, string> > prev_geno_file2(N_combined_indv, missing_genotype);
	pair<string, string> file1_hap1, file1_hap2, file2_hap1;

	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		CHROM = CHROMPOS_to_filepos_pair_it->first.first;
		POS = CHROMPOS_to_filepos_pair_it->first.second;

		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		e1.parse_full_entry(true);
		e1.parse_genotype_entries(true);

		e2.parse_full_entry(true);
		e2.parse_genotype_entries(true);

		pair<string, string> genotype1, genotype2;
		pair<string, string> missing_genotype(".",".");

		unsigned int N_common_called=0;	// Number of genotypes called in both files
		unsigned int indv_count=0;

		// Bug fix applied (#3354189) - July 5th 2011
		for (combined_individuals_it=combined_individuals.begin();
				combined_individuals_it!=combined_individuals.end();
				++combined_individuals_it, indv_count++)
		{
			indv1 = combined_individuals_it->second.first;
			indv2 = combined_individuals_it->second.second;

			if ((indv1 == -1) || (indv2 == -1))
				continue;	// Individual not found in one of the files

			e1.get_indv_GENOTYPE_strings(indv1, genotype1);
			e2.get_indv_GENOTYPE_strings(indv2, genotype2);

			if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype))
			{	// No missing data
				N_common_called++;
				if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) ||
					((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) )
				{	// Have a matching genotypes in files 1 and 2
					if (genotype1.first != genotype1.second)
					{	// It's a heterozgote
						char phase1, phase2;
						phase1 = e1.get_indv_PHASE(indv1);
						phase2 = e2.get_indv_PHASE(indv2);
						if ((phase1 == '|') && (phase2 == '|'))
						{	// Calculate Phasing error (switch error)
							N_phased_het_sites[indv_count]++;
							file1_hap1 = make_pair<string,string>(prev_geno_file1[indv_count].first, genotype1.first);
							file1_hap2 = make_pair<string,string>(prev_geno_file1[indv_count].second, genotype1.second);
							file2_hap1 = make_pair<string,string>(prev_geno_file2[indv_count].first, genotype2.first);

							if ((file2_hap1 != file1_hap1) && (file2_hap1 != file1_hap2))
							{	// Must be a switch error
								string indv_id;
								N_switch_errors[indv_count]++;
								if (indv1 != -1)
									indv_id = indv[indv1];
								else
									indv_id = diff_vcf_file.indv[indv2];
								switcherror << CHROM << "\t" << POS << "\t" << indv_id << endl;
							}
							prev_geno_file1[indv_count] = genotype1;
							prev_geno_file2[indv_count] = genotype2;
						}
					}
				}
			}
		}
	}
	switcherror.close();

	output_file = output_file_prefix + ".diff.indv.switch";
	ofstream idiscord(output_file.c_str());
	if (!idiscord.is_open())
		error("Could not open Individual Discordance File: " + output_file, 3);

	idiscord << "INDV\tN_COMMON_PHASED_HET\tN_SWITCH\tSWITCH" << endl;
	unsigned int indv_count=0;
	double switch_error;
	string indv_id;
	for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
	{
		indv1 = combined_individuals_it->second.first;
		indv2 = combined_individuals_it->second.second;

		if (indv1 != -1)
			indv_id = indv[indv1];
		else
			indv_id = diff_vcf_file.indv[indv2];

		if (N_phased_het_sites[indv_count] > 0)
			switch_error = double(N_switch_errors[indv_count]) / N_phased_het_sites[indv_count];
		else
			switch_error = 0;
		idiscord << indv_id << "\t" << N_phased_het_sites[indv_count] << "\t" << N_switch_errors[indv_count] << "\t" << switch_error << endl;

		indv_count++;
	}
	idiscord.close();
}
Example #5
0
void vcf_file::output_discordance_matrix(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Outputting Discordance Matrix\n\tFor bi-allelic loci, called in both files, with matching alleles only...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	string vcf_line;
	int s1, s2, indv1, indv2;

	vector<vector<int> > discordance_matrix(4, vector<int>(4, 0));

	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		if ((e1.get_N_alleles() != 2) || (e2.get_N_alleles() != 2))
			continue;

		// Set the reference to the non-missing entry (if available)
		string REF = e1.get_REF();
		string REF2 = e2.get_REF();
		if (REF == "N")
			REF = REF2;
		if (REF2 == "N")
			REF2 = REF;

		if (REF.size() != REF2.size())
			continue;

		if ((REF != REF2) && (REF2 != "N") && (REF != "N"))
			continue;

		// Do the alternative alleles match?
		string ALT, ALT2;
		ALT = e1.get_ALT();
		ALT2 = e2.get_ALT();

		bool alleles_match = (ALT == ALT2) && (REF == REF2);
		if (alleles_match == false)
			continue;

		e1.parse_full_entry(true);
		e1.parse_genotype_entries(true);

		e2.parse_full_entry(true);
		e2.parse_genotype_entries(true);

		pair<int,int> geno_ids1, geno_ids2;
		int N1, N2;

		for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
		{
			indv1 = combined_individuals_it->second.first;
			indv2 = combined_individuals_it->second.second;

			if ((indv1 == -1) || (indv2 == -1))
				continue;	// Individual not found in one of the files

			// Alleles match, so can compare ids instead of strings
			e1.get_indv_GENOTYPE_ids(indv1, geno_ids1);
			e2.get_indv_GENOTYPE_ids(indv2, geno_ids2);

			if (((geno_ids1.first != -1) && (geno_ids1.second == -1)) ||
				((geno_ids2.first != -1) && (geno_ids2.second == -1)))
			{	// Haploid
				one_off_warning("***Warning: Haploid chromosomes not counted!***");
				continue;
			}

			N1 = geno_ids1.first + geno_ids1.second;
			N2 = geno_ids2.first + geno_ids2.second;

			if ((N1 == -1) || (N1 < -2) || (N1 > 2))
				error("Unhandled case");
			if ((N2 == -1) || (N2 < -2) || (N2 > 2))
				error("Unhandled case");

			if (N1 == -2)
				N1 = 3;

			if (N2 == -2)
				N2 = 3;

			discordance_matrix[N1][N2]++;
		}
	}

	string output_file = output_file_prefix + ".diff.discordance_matrix";
	ofstream out(output_file.c_str());
	if (!out.is_open())
		error("Could not open Discordance Matrix File: " + output_file, 3);

	out << "-\tN_0/0_file1\tN_0/1_file1\tN_1/1_file1\tN_./._file1" << endl;
	out << "N_0/0_file2\t" << discordance_matrix[0][0] << "\t" << discordance_matrix[1][0] << "\t" << discordance_matrix[2][0] << "\t" << discordance_matrix[3][0] << endl;
	out << "N_0/1_file2\t" << discordance_matrix[0][1] << "\t" << discordance_matrix[1][1] << "\t" << discordance_matrix[2][1] << "\t" << discordance_matrix[3][1] << endl;
	out << "N_1/1_file2\t" << discordance_matrix[0][2] << "\t" << discordance_matrix[1][2] << "\t" << discordance_matrix[2][2] << "\t" << discordance_matrix[3][2] << endl;
	out << "N_./._file2\t" << discordance_matrix[0][3] << "\t" << discordance_matrix[1][3] << "\t" << discordance_matrix[2][3] << "\t" << discordance_matrix[3][3] << endl;
	out.close();
}
Example #6
0
void vcf_file::output_discordance_by_site(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Outputting Discordance By Site...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	string CHROM, vcf_line;
	int POS;
	int s1, s2, indv1, indv2;

	string output_file = output_file_prefix + ".diff.sites";
	ofstream diffsites(output_file.c_str());
	if (!diffsites.is_open())
		error("Could not open Sites Differences File: " + output_file, 3);
	//diffsites << "CHROM\tPOS\tFILES\tMATCHING_ALT\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE\tN_FILE1_NONREF_GENOTYPES\tNON_REF_DISCORDANCE" << endl;
	diffsites << "CHROM\tPOS\tFILES\tMATCHING_ALLELES\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE" << endl;

	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		CHROM = CHROMPOS_to_filepos_pair_it->first.first;
		POS = CHROMPOS_to_filepos_pair_it->first.second;

		diffsites << CHROM << "\t" << POS;

		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		bool data_in_both = true;
		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}
		else
			data_in_both = false;

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}
		else
			data_in_both = false;

		if (data_in_both)
			diffsites << "\tB";
		else if ((s1 != -1) && (s2 == -1))
			diffsites << "\t1";
		else if ((s1 == -1) && (s2 != -1))
			diffsites << "\t2";
		else
			error("Unhandled condition");

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		// Set the reference to the non-missing entry (if available)
		string REF = e1.get_REF();
		string REF2 = e2.get_REF();
		if (REF == "N")
			REF = REF2;
		if (REF2 == "N")
			REF2 = REF;

		if (REF.size() != REF2.size())
		{
			warning("REF sequences at " + CHROM + ":" + int2str(POS) + " are not comparable. Skipping site");
			continue;
		}

		if ((REF != REF2) && (REF2 != "N") && (REF != "N"))
			warning("Non-matching REF " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2);

		// Do the alternative alleles match?
		string ALT, ALT2;
		ALT = e1.get_ALT();
		ALT2 = e2.get_ALT();

		bool alleles_match = ((ALT == ALT2) && (REF == REF2));
		diffsites << "\t" << alleles_match;

		e1.parse_full_entry(true);
		e1.parse_genotype_entries(true);

		e2.parse_full_entry(true);
		e2.parse_genotype_entries(true);

		pair<string, string> genotype1, genotype2;
		pair<int,int> geno_ids1, geno_ids2;
		pair<string, string> missing_genotype(".",".");
		pair<int, int> missing_id(-1,-1);

		unsigned int N_common_called=0;	// Number of genotypes called in both files
		unsigned int N_missing_1=0, N_missing_2=0;
		unsigned int N_discord=0;
		unsigned int N_concord_non_missing=0;

		for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
		{
			indv1 = combined_individuals_it->second.first;
			indv2 = combined_individuals_it->second.second;

			if ((indv1 == -1) || (indv2 == -1))
				continue;	// Individual not found in one of the files

			if (alleles_match)
			{	// Alleles match, so can compare ids instead of strings
				e1.get_indv_GENOTYPE_ids(indv1, geno_ids1);
				e2.get_indv_GENOTYPE_ids(indv2, geno_ids2);

				if ((geno_ids1 != missing_id) && (geno_ids2 != missing_id))
				{
					N_common_called++;
					if (((geno_ids1.first == geno_ids2.first) && (geno_ids1.second == geno_ids2.second)) ||
						((geno_ids1.first == geno_ids2.second) && (geno_ids1.second == geno_ids2.first)) )
					{	// Match
						N_concord_non_missing++;
					}
					else
					{	// Mismatch
						N_discord++;
					}
				}
				else if ((geno_ids1 == missing_id) && (geno_ids2 == missing_id))
				{	// Both missing
					N_missing_1++; N_missing_2++;
				}
				else if (geno_ids1 != missing_id)
				{	// Genotype 1 is not missing, genotype 2 is.
					N_missing_2++;
				}
				else if (geno_ids2 != missing_id)
				{	// Genotype 2 is not missing, genotype 1 is.
					N_missing_1++;
				}
				else
					error("Unknown condition");
			}
			else
			{	// Alleles don't match, so need to be more careful and compare strings
				e1.get_indv_GENOTYPE_strings(indv1, genotype1);
				e2.get_indv_GENOTYPE_strings(indv2, genotype2);

				if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype))
				{	// No missing data
					N_common_called++;
					if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) ||
						((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) )
					{	// Match
						N_concord_non_missing++;
					}
					else
					{	// Mismatch
						N_discord++;
					}
				}
				else if ((genotype1 == missing_genotype) && (genotype2 == missing_genotype))
				{	// Both missing
					N_missing_1++; N_missing_2++;
				}
				else if (genotype1 != missing_genotype)
				{	// Genotype 1 is not missing, genotype 2 is.
					N_missing_2++;
				}
				else if (genotype2 != missing_genotype)
				{	// Genotype 2 is not missing, genotype 1 is.
					N_missing_1++;
				}
				else
					error("Unknown condition");
			}
		}
		double discordance = N_discord / double(N_common_called);
		diffsites << "\t" << N_common_called << "\t" << N_discord << "\t" << discordance;
		diffsites << endl;
	}
	diffsites.close();
}
Example #7
0
void vcf_file::output_discordance_by_indv(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Outputting Discordance By Individual...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	map<string, pair<int, int> > indv_sums;

	string vcf_line, CHROM;
	int POS;
	int s1, s2, indv1, indv2;

	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		CHROM = CHROMPOS_to_filepos_pair_it->first.first;
		POS = CHROMPOS_to_filepos_pair_it->first.second;

		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		// Set the reference to the non-missing entry (if available)
		string REF = e1.get_REF();
		string REF2 = e2.get_REF();
		if (REF == "N")
			REF = REF2;
		if (REF2 == "N")
			REF2 = REF;

		if (REF.size() != REF2.size())
		{
			warning("REF sequences at " + CHROM + ":" + int2str(POS) + " are not comparable. Skipping site");
			continue;
		}

		if ((REF != REF2) && (REF2 != "N") && (REF != "N"))
			warning("Non-matching REF " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2);

		// Do the alternative alleles match?
		string ALT, ALT2;
		ALT = e1.get_ALT();
		ALT2 = e2.get_ALT();

		bool alleles_match = (ALT == ALT2) && (REF == REF2);
		e1.parse_full_entry(true);
		e1.parse_genotype_entries(true);

		e2.parse_full_entry(true);
		e2.parse_genotype_entries(true);

		pair<string, string> genotype1, genotype2;
		pair<int,int> geno_ids1, geno_ids2;
		pair<string, string> missing_genotype(".",".");
		pair<int, int> missing_id(-1,-1);

		for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
		{
			indv1 = combined_individuals_it->second.first;
			indv2 = combined_individuals_it->second.second;

			if ((indv1 == -1) || (indv2 == -1))
				continue;	// Individual not found in one of the files

			if (alleles_match)
			{	// Alleles match, so can compare ids instead of strings
				e1.get_indv_GENOTYPE_ids(indv1, geno_ids1);
				e2.get_indv_GENOTYPE_ids(indv2, geno_ids2);

				if ((geno_ids1 != missing_id) && (geno_ids2 != missing_id))
				{
					indv_sums[combined_individuals_it->first].first++;
					if (((geno_ids1.first == geno_ids2.first) && (geno_ids1.second == geno_ids2.second)) ||
						((geno_ids1.first == geno_ids2.second) && (geno_ids1.second == geno_ids2.first)) )
					{	// Match
						// Don't do anything
					}
					else
					{	// Mismatch
						indv_sums[combined_individuals_it->first].second++;
					}
				}
				else if ((geno_ids1 == missing_id) && (geno_ids2 == missing_id))
				{	// Both missing
					// Don't do anything.
				}
				else if (geno_ids1 != missing_id)
				{	// Genotype 1 is not missing, genotype 2 is.
					// Don't do anything.
				}
				else if (geno_ids2 != missing_id)
				{	// Genotype 2 is not missing, genotype 1 is.
					// Don't do anything.
				}
				else
					error("Unknown condition");
			}
			else
			{	// Alleles don't match, so need to be more careful and compare strings
				e1.get_indv_GENOTYPE_strings(indv1, genotype1);
				e2.get_indv_GENOTYPE_strings(indv2, genotype2);

				if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype))
				{	// No missing data
					indv_sums[combined_individuals_it->first].first++;
					if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) ||
						((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) )
					{	// Match
						// Don't do anything
					}
					else
					{	// Mismatch
						indv_sums[combined_individuals_it->first].second++;
					}
				}
				else if ((genotype1 == missing_genotype) && (genotype2 == missing_genotype))
				{	// Both missing
					// Don't do anything
				}
				else if (genotype1 != missing_genotype)
				{	// Genotype 1 is not missing, genotype 2 is.
					// Don't do anything
				}
				else if (genotype2 != missing_genotype)
				{	// Genotype 2 is not missing, genotype 1 is.
					// Don't do anything
				}
				else
					error("Unknown condition");
			}
		}
	}

	string output_file = output_file_prefix + ".diff.indv";
	ofstream out(output_file.c_str());
	if (!out.is_open())
		error("Could not open Sites Differences File: " + output_file, 3);
	out << "INDV\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE" << endl;

	int N, N_discord;
	double discordance;
	for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
	{
		out << combined_individuals_it->first;
		N = indv_sums[combined_individuals_it->first].first;
		N_discord = indv_sums[combined_individuals_it->first].second;
		discordance = N_discord / double(N);
		out << "\t" << N << "\t" << N_discord << "\t" << discordance << endl;
	}

	out.close();
}