Exemplo n.º 1
0
/*---------------------------------------------------------------
  Name     : SetJoystick
  Argument : void
  Return   : 0 (succeed), other (failed)
  About    : Set up the Joystick controler
  Version  : Ver 1.0
  Date     : 2014/03/21
  Author   : Ryodo Tanaka (Kyushu Institute of Technology)
----------------------------------------------------------------- */
int SetJoystick(void)
{
  //File open
  if( (JSfd=open(PORT, O_RDONLY)) == -1){
    printLOG("File Open JoyStick");
    return 1;
  }

  //Get JoyStick information
  ioctl(JSfd, JSIOCGAXES, &num_of_axis);
  ioctl(JSfd, JSIOCGBUTTONS, &num_of_buttons);
  ioctl(JSfd, JSIOCGNAME(80), &JSname);

  //Get data space for axis & buttons
  axis = (int*)calloc(num_of_axis, sizeof(int));
  if(!axis){
    printLOG("calloc JoyStick axis");
    return 2;
  }
  button = (char*)calloc(num_of_buttons, sizeof(char));
  if(!button){
    printLOG("calloc JoyStick axis");
    return 3;
  }

  //Use non-blocking mode
  fcntl(JSfd, F_SETFL, O_NONBLOCK);

  printf("%s\tis Connected ...\n", JSname);
  
  return 0;

}
Exemplo n.º 2
0
void vcf_file::open()
{
	if (!compressed)
	{
		if (filename.substr(filename.size()-3) == ".gz")
		{
			warning("Filename ends in '.gz'. Shouldn't you be using --gzvcf?\n");
		}
		vcf_in.open(filename.c_str(), ios::in);
		if (!vcf_in.is_open())
			error("Could not open VCF file: " + filename, 0);
	}
	else
	{
		gzMAX_LINE_LEN = 1024*1024;
		gz_readbuffer = new char[gzMAX_LINE_LEN];
		gzvcf_in = gzopen(filename.c_str(), "rb");
		if (gzvcf_in == NULL)
			error("Could not open GZVCF file: " + filename, 0);
#ifdef ZLIB_VERNUM
		string tmp(ZLIB_VERSION);
		printLOG("Using zlib version: " + tmp + "\n");
	#if (ZLIB_VERNUM >= 0x1240)
		gzbuffer(gzvcf_in, gzMAX_LINE_LEN); // Included in zlib v1.2.4 and makes things MUCH faster
	#else
		printLOG("Versions of zlib >= 1.2.4 will be *much* faster when reading zipped VCF files.\n");
	#endif
#endif
	}
}
void error(string err_msg, double value1, double value2, int error_code)
{
	printLOG("Error:" + err_msg + "\n");
	stringstream ss;
	ss << "Value1=" << value1 << " Value2=" << value2 << endl;
	printLOG(ss.str());
	exit(error_code);
}
Exemplo n.º 4
0
void counted_warning(string err_msg)
{
	static unsigned int warning_count = 0;
	printLOG(err_msg + "\n");
	warning_count++;
	if (warning_count > 1000)
		error("Stopping at 1000 entry-level warnings", 10);
}
Exemplo n.º 5
0
void vcf_file::output_indv_in_files(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Comparing individuals in VCF files...\n");

	string output_file = output_file_prefix + ".diff.indv_in_files";

	ofstream out(output_file.c_str());
	if (!out.is_open())
		error("Could not open Indv Differences File: " + output_file, 3);
	out << "INDV\tFILES" << endl;

	// Build a list of individuals contained in each file
	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	unsigned int N_combined_indv = combined_individuals.size();
	unsigned int N[3]={0,0,0};
	for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
	{
		if ((combined_individuals_it->second.first != -1) && (combined_individuals_it->second.second != -1))
		{
			N[0]++;
			out << combined_individuals_it->first << "\tB" << endl;
		}
		else if (combined_individuals_it->second.first != -1)
		{
			N[1]++;
			out << combined_individuals_it->first << "\t1" << endl;
		}
		else if (combined_individuals_it->second.second != -1)
		{
			N[2]++;
			out << combined_individuals_it->first << "\t2" << endl;
		}
		else
			error("Unhandled case");
	}
	out.close();

	printLOG("N_combined_individuals:\t" + int2str(N_combined_indv) + "\n");
	printLOG("N_individuals_common_to_both_files:\t" + int2str(N[0]) + "\n");
	printLOG("N_individuals_unique_to_file1:\t" + int2str(N[1]) + "\n");
	printLOG("N_individuals_unique_to_file2:\t" + int2str(N[2]) + "\n");
}
void one_off_warning(string err_msg)
{
	static set<string> previous_warnings;
	if (previous_warnings.find(err_msg) == previous_warnings.end())
	{
		printLOG(err_msg + "\n");
		previous_warnings.insert(err_msg);
	}
}
Exemplo n.º 7
0
/*-----------------------------------------------------------
  Name     : SetLRFShow
  Argument : int id (LRF ID)
  Return   : 0 (success) other(failed) 
  About    : Setup for LRFShow 
  Version  : Ver 1.0
  Date     : 2014/05/25
  Author   : Ryodo Tanaka (Kyushu Institute of Technology)
------------------------------------------------------------*/
int SetLRFShow(const int id)
{
  int i;

  if(id == LRF_ALL_ID){
    for(i=0; i<NUM_OF_LRF; i++){
      img[i] = cvCreateImage(cvSize(LRF_WINDOW_SIZE,LRF_WINDOW_SIZE), IPL_DEPTH_8U, 3);
      if(!img[i]){
	printLOG("cvCreateImage() LRFShow");
	exit(1);
      }
    }
  }
  else {
    img[id] = cvCreateImage(cvSize(LRF_WINDOW_SIZE,LRF_WINDOW_SIZE), IPL_DEPTH_8U, 3);
    if(!img[id]){
      printLOG("cvCreateImage() LRFShow");
      exit(1);
    }

  }

  return 0;
}
Exemplo n.º 8
0
void vcf_file::print(const string &output_file_prefix, const set<string> &INFO_to_keep, bool keep_all_INFO)
{
	printLOG("Outputting VCF file... ");
	unsigned int ui;

	string output_file = output_file_prefix + ".recode.vcf";
	ofstream out(output_file.c_str());
	if (!out.is_open())
		error("Could not open VCF Output File: " + output_file, 3);

	for (ui=0; ui<meta.size(); ui++)
		out << meta[ui] << endl;

	out << "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
	if (N_indv > 0)
		out << "\tFORMAT";
	for (ui=0; ui<N_indv; ui++)
		if (include_indv[ui])
			out << "\t" << indv[ui];
	out << endl;

	string vcf_line;
	for (unsigned int s=0; s<N_entries; s++)
		if (include_entry[s] == true)
		{
			get_vcf_entry(s, vcf_line);
			vcf_entry e(N_indv, vcf_line);
			e.parse_basic_entry(true, true, true);
			e.parse_full_entry(true);
			e.parse_genotype_entries(true,true,true,true);
			e.print(out, INFO_to_keep, keep_all_INFO, include_indv, include_genotype[s]);
		}

	out.close();
	printLOG("Done\n");
}
Exemplo n.º 9
0
int WAV::freeWAVData()
{
    try
    {
        if (_monoral8 != (Monoral8*) NULL)
        {
            std::cout << "free monoral8" << std::endl;
            free(_monoral8);
        }
        if (_monoral16 != (Monoral16*) NULL)
        {
            free(_monoral16);
        }
        if (_stereo8 != (Stereo8*) NULL)
        {
            free(_stereo8);
        }
        if (_stereo16 != (Stereo16*) NULL)
        {
            free(_stereo16);
        }
    }
    catch (const char* e)
    {
        printLOG("free error");
        printf("%s\n", e);
        exit(-1);
    }
    //_dataSize=0;
    //_sampleCount=0;
    _monoral8 = (Monoral8*) NULL;
    _monoral16 = (Monoral16*) NULL;
    _stereo8 = (Stereo8*) NULL;
    _stereo16 = (Stereo16*) NULL;
    return 0;
}
Exemplo n.º 10
0
map<Range,vector<int> > Plink::mkBlks(int null1, int null2 )
{
  

  // First SNP, vector of SNPs (inc. first)
  
  map< int, vector<int> > blocks;
  
  
  // Some constants
  
  const double cutHighCI = 0.98;
  const double cutLowCI = 0.70;
  const double cutLowCIVar [5] = {0,0,0.80,0.50,0.50};
  const double maxDist [5] = {0,0,20000,30000,1000000};
  const double recHighCI = 0.90;
  const double informFrac = 0.95;
  const double fourGameteCutoff = 0.01;
  const double mafThresh = 0.05;
  
  // Set to skip SNPs with low MAFs
  // Uses genome-wide reference number: need to allocate for all SNPs here
  
  vector<bool> skipMarker(nl_all,false);
  for (int x = 0; x < nl_all; x++)
    skipMarker[x] = locus[x]->freq < mafThresh;
  
  // Consider each chromosome one at a time; skip X for now
  
  int startChromosome = locus[ 0 ]->chr;
  int finalChromosome = locus[ nl_all - 1 ]->chr;
  
  for (int chr = startChromosome ; chr <= finalChromosome; chr++)
    {

      if ( scaffold.find(chr) == scaffold.end() )
	continue;

      int fromPosition = scaffold[chr].lstart;
      int toPosition = scaffold[chr].lstop;
      
      int nsnps = toPosition - fromPosition + 1;
      

      /////////////////////////////////////////////////////////////////////////
      // Make a list of marker pairs in "strong LD", sorted by distance apart 
      
      set<LDPair,Pair_cmp> strongPairs;
      map<int2,DPrime> dpStore;
      
      int numStrong = 0; 
      int numRec = 0; 
      int numInGroup = 0;
      
      // Each pair of markers
      
      for (int x = fromPosition; x < toPosition; x++)
	{

 	  if ( ! par::silent )
 	    {
	      std::cerr << "Chromosome " <<  locus[x]->chr
			<< ", position " << locus[x]->bp/1000000.0 
			<< "Mb                \r";
	    }
	  
	  for (int y = x+1; y <= toPosition; y++)
	    {

	      if ( locus[x]->chr != locus[y]->chr ) 
		continue;
	      
	      if ( ( locus[y]->bp - locus[x]->bp ) > par::disp_r_window_kb )
		{
		  continue;
		}

	      if ( locus[x]->freq == 0 || locus[y]->freq == 0 )
		continue;
	      
	      PairwiseLinkage thisPair(x,y);
	      thisPair.calculateLD();
	      thisPair.calculateCI();
	      
	      double lod = thisPair.lod;
	      double lowCI = thisPair.dp_lower;
	      double highCI = thisPair.dp_upper;
	      
	      int2 t(x,y);
	      DPrime d;
	      d.dp = thisPair.dp;
	      d.dpl = lowCI;
	      d.dpu = highCI;
	      d.lod = lod;
	      dpStore.insert( make_pair( t,d ) );
	      
	      // Is this pair in strong LD?
	      if (lod < -90) continue; //missing data
	      
	      if (highCI < cutHighCI || lowCI < cutLowCI) 
		continue; //must pass "strong LD" test
	
	      // Store this pair
	      LDPair p(x,y, abs( locus[x]->bp - locus[y]->bp ) );
	      
	      
	      strongPairs.insert( p );

	    }
	}

      
      // Now we have a list of SNPs in strong LD within this region
      // Now construct blocks based on this
      
      set<int> used;
      
      // #blocks:
      vector<vector<int> > blockArray;
      
      int cnt = 0;
      
      for ( set<LDPair>::reverse_iterator i = strongPairs.rbegin();
	    i != strongPairs.rend();
	    ++i )
	{

	  int numStrong = 0; 
	  int numRec = 0; 
	  int numInGroup = 0;
	  
	  vector<int> thisBlock;
	  
	  int first = i->s1; 
	  int last = i->s2;
	  long sep = i->dist;
	  

	  // See if this block overlaps with another:
	  
	  if ( used.find(first) != used.end() 
	       || used.find(last)  != used.end() ) 
	    {	      
	      continue;
	    }
	  
	  // Next, count the number of markers in the block.
	  // (nb. assume all SNPs belong)
	  
	  for (int x = first; x <=last ; x++)
	    {
	      if( !skipMarker[x] ) 
		numInGroup++;
	    }


	  // Skip it if it is too long in bases for it's size in markers
	  if (numInGroup < 4 && sep > maxDist[numInGroup]) 
	    {
	      continue;
	    }
	  
	  // Add first SNP
	  
	  thisBlock.push_back( first );

	  // Test block: requires 95% of informative markers to be "strong"

	  for (int y = first+1; y <= last; y++)
	    {
	      if (skipMarker[y]) 
		{
		  continue;
		}
	      
	      thisBlock.push_back(y);
	      
	      
	      //loop over columns in row y
	      
	      for (int x = first; x < y; x++)
		{
		  
		  if (skipMarker[x]) 
		    continue;
		  
		  double lod; 
		  double lowCI; 
		  double highCI;
		  
		  map<int2,DPrime>::iterator l = dpStore.find( int2(x,y) );
		  
		  if ( l == dpStore.end() ) 
		    {
		      // Recalculate
		      PairwiseLinkage thisPair(x,y);
		      thisPair.calculateLD();
		      thisPair.calculateCI();
		      
		      lod = thisPair.lod;
		      lowCI = thisPair.dp_lower;
		      highCI = thisPair.dp_upper;
		    }
		  else
		    {
		      // Get the right bits
		      
		      lod = l->second.lod;
		      lowCI = l->second.dpl;
		      highCI = l->second.dpu;
		    }
		  
		  
		  // Monomorphic marker error
		  if ( lod < -90)
		    continue;   
		  
		  
		  // Skip bad markers
		  if ( lod == 0 && lowCI == 0 && highCI == 0)
		    continue; 
		  
		  // For small blocks use different CI cutoffs
		  
		  if (numInGroup < 5)
		    {
		      if (lowCI > cutLowCIVar[numInGroup] && highCI >= cutHighCI) 
			numStrong++;
		    }
		  else
		    {
		      if (lowCI > cutLowCI &&  highCI >= cutHighCI) 
			numStrong++; //strong LD
		    }
		  
		  if (highCI < recHighCI) 
		    numRec++; //recombination
		  
		}
	    }
	  
	  
	  // Change the definition somewhat for small blocks
	  
	  if (numInGroup > 3)
	    {
	      if (numStrong + numRec < 6) 
		{
		  continue;
		}
	    }
	  else if (numInGroup > 2)
	    {
	      if (numStrong + numRec < 3) 
		{
		  continue;
		}
	    }
	  else
	    {
	      if (numStrong + numRec < 1) 
		{
		  continue;
		}
	    }
	  

	  // If this qualifies as a block, add to the block list, but in
	  // order by first marker number:
	  
	  if ( (double)numStrong/(double)(numStrong + numRec) > informFrac)
	    { 
	      blocks.insert( make_pair( first , thisBlock ));  
	      
	      // Track that these SNPs belong to a block
	      for (int u = first; u <= last; u++)
		used.insert(u);
	    }
	  
	  
	}
      
      
      // Next chromosome
    }


  if ( ! par::silent )
    cerr << "\n";

  map<int,vector<int> >::iterator j = blocks.begin();

  printLOG(int2str( blocks.size() ) 
	   + " blocks called, writing list to [ " 
	   + par::output_file_name + ".blocks ]\n");
  ofstream O1( (par::output_file_name+".blocks").c_str() , ios::out );
  
  printLOG("Writing extra block details to [ " + 
	   par::output_file_name + ".blocks.det ]\n");
  ofstream O2( (par::output_file_name+".blocks.det").c_str() , ios::out );

  O2 << setw(4) << "CHR" << " " 
     << setw(12) << "BP1" << " "
     << setw(12) << "BP2" << " "
     << setw(12) << "KB" << " "
     << setw(6) << "NSNPS" << " "
     << setw(4) << "SNPS" << "\n";

  while ( j != blocks.end() )
    {
      O1 << "*";
      vector<int> & b = j->second;
      for (int k=0; k<b.size(); k++)
	O1 << " " << PP->locus[b[k]]->name;
      O1 << "\n";
      
      O2 << setw(4) << PP->locus[b[0]]->chr << " " 
	 << setw(12) << PP->locus[b[0]]->bp << " "
	 << setw(12) << PP->locus[b[b.size()-1]]->bp << " "
	 << setw(12) << (double)(PP->locus[b[b.size()-1]]->bp - PP->locus[b[0]]->bp + 1)/1000.0 << " "
	 << setw(6) << b.size() << " ";
      for (int k=0; k<b.size(); k++)
	{
	  if ( k>0 )
	    O2 << "|" << PP->locus[b[k]]->name;
	  else
	    O2 << PP->locus[b[k]]->name;
	}
      O2 << "\n";

      ++j;

    }
  

  O1.close();
  O2.close();
  

  // List of blocks created here
  // (dummy; not used)

  map<Range,vector<int> > blocks0;
  return blocks0;
  
}
Exemplo n.º 11
0
void vcf_file::output_sites_in_files(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Comparing sites in VCF files...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	string vcf_line;
	string CHROM;
	int POS;

	string output_file = output_file_prefix + ".diff.sites_in_files";
	ofstream sites_in_files(output_file.c_str());
	sites_in_files << "CHROM\tPOS\tIN_FILE\tREF\tALT1\tALT2" << endl;

	int s1, s2;
	int N_common_SNPs = 0, N_SNPs_file1_only=0, N_SNPs_file2_only=0;
	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it!=CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		CHROM = CHROMPOS_to_filepos_pair_it->first.first;
		POS = CHROMPOS_to_filepos_pair_it->first.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		// Set the reference to the non-missing entry (if available)
		string REF = e1.get_REF();
		string REF2 = e2.get_REF();
		if ((REF == "N") || (REF == "."))
			REF = REF2;
		if ((REF2 == "N") || (REF2 == "."))
			REF2 = REF;

		if ((REF != REF2) && (REF2 != "N") && (REF != "N") && (REF != ".") && (REF2 != "."))
			warning("Non-matching REF at " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2 + ". Diff results may be unreliable.");

		sites_in_files << CHROM << "\t" << POS << "\t";
		if ((s1 != -1) && (s2 != -1))
		{
			N_common_SNPs++;
			sites_in_files << "B";
		}
		else if ((s1 != -1) && (s2 == -1))
		{
			N_SNPs_file1_only++;
			sites_in_files << "1";
		}
		else if ((s1 == -1) && (s2 != -1))
		{
			N_SNPs_file2_only++;
			sites_in_files << "2";
		}
		else
			error("SNP in neither file!?");

		sites_in_files << "\t" << REF << "\t" << e1.get_ALT() << "\t" << e2.get_ALT() << endl;
	}

	sites_in_files.close();

	printLOG("Found " + int2str(N_common_SNPs) + " SNPs common to both files.\n");
	printLOG("Found " + int2str(N_SNPs_file1_only) + " SNPs only in main file.\n");
	printLOG("Found " + int2str(N_SNPs_file2_only) + " SNPs only in second file.\n");
}
Exemplo n.º 12
0
// Read VCF file
void vcf_file::scan_file(const string &chr, const string &exclude_chr, bool force_write_index)
{
	bool filter_by_chr = (chr != "");
	bool exclude_by_chr = (exclude_chr != "");
	string index_filename = filename + ".vcfidx";
	bool could_read_index_file = false;
	if (force_write_index == false)
		could_read_index_file = read_index_file(index_filename);
	string CHROM, last_CHROM="";
	int POS, last_POS = -1;
	if (could_read_index_file == false)
	{
		printLOG("Building new index file.\n");
		string line, CHROM, last_CHROM = "";
		streampos filepos;
		char c;
		N_entries=0;
		N_indv = 0;

		while (!feof())
		{
			filepos = get_filepos();
			c = peek();

			if ((c == '\n') || (c == '\r'))
			{
				read_line(line);
				continue;
			}
			else if (c == EOF)
				break;

			if (c == '#')
			{
				read_line(line);
				if (line[1] == '#')
				{	// Meta information
					parse_meta(line);
				}
				else
				{	// Must be header information: #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	(FORMAT	NA00001 NA00002 ... )
					parse_header(line);
				}
			}
			else
			{	// Must be a data line
				read_CHROM_and_POS_and_skip_remainder_of_line(CHROM, POS);
				if (last_CHROM != CHROM)
				{
					printLOG("\tScanning Chromosome: " + CHROM + "\n");
					last_CHROM = CHROM;
				}
				if (POS == last_POS)
				{
					one_off_warning("\tWarning - file contains entries with the same position. This is not supported by vcftools, and may cause unexpected behaviour.\n");
				}
				last_POS = POS;
				entry_file_locations.push_back(filepos);
				N_entries++;
			}
		}

		write_index_file(index_filename);
	}

	printLOG("File contains " + int2str(N_entries) + " entries and " + int2str(N_indv) + " individuals.\n");
	vector<string> meta_lines = meta; meta.resize(0);
	for (unsigned int ui=0; ui<meta_lines.size(); ui++)
		parse_meta(meta_lines[ui]);
	has_genotypes = (N_indv > 0);

	bool already_found_required_chr = false;
	bool already_filtered_required_chr = false;
	if ((exclude_by_chr == true) || (filter_by_chr == true))
	{
		printLOG("Filtering by chromosome.\n");
		for (unsigned int ui=0; ui<N_entries; ui++)
		{
			if (already_found_required_chr == true)
			{
				printLOG("Skipping Remainder.\n");
				entry_file_locations.erase(entry_file_locations.begin()+ui, entry_file_locations.end());
				break;
			}
			if (already_filtered_required_chr == true)
			{
				printLOG("Skipping Remainder.\n");
				break;
			}

			set_filepos(entry_file_locations[ui]);
			read_CHROM_only(CHROM);

			if (last_CHROM != CHROM)
			{
				printLOG("\tChromosome: " + CHROM + "\n");
				if ((filter_by_chr == true) && (last_CHROM == chr))
					already_found_required_chr = true;

				if ((exclude_by_chr == true) && (last_CHROM == exclude_chr))
					already_filtered_required_chr = true;

				last_CHROM = CHROM;
			}
			if ((exclude_by_chr == true) && (CHROM == exclude_chr))
			{
				entry_file_locations[ui] = -1;
				continue;
			}
			if ((filter_by_chr == true) && (CHROM != chr))
			{
				entry_file_locations[ui] = -1;
				continue;
			}
		}
		sort(entry_file_locations.begin(), entry_file_locations.end());
		while((entry_file_locations.size() > 0) && (entry_file_locations[0] < 0))
			entry_file_locations.pop_front();

		N_entries = entry_file_locations.size();
		printLOG("Keeping " + int2str(N_entries) + " entries on specified chromosomes.\n");
	}

	include_indv.clear();
	include_indv.resize(N_indv, true);
	include_entry.clear();
	include_entry.resize(N_entries, true);
	include_genotype.clear();
	include_genotype.resize(N_entries, vector<bool>(N_indv, true));
}
void warning(string err_msg)
{
	printLOG(err_msg + "\n");
}
void error(string err_msg, int error_code)
{
	printLOG("Error:" + err_msg + "\n");
	exit(error_code);
}
Exemplo n.º 15
0
vector_t Plink::glmAssoc(bool print_results, Perm & perm)
{
  
  // The model.cpp functions require a SNP-major structure, if SNP
  // data are being used.  There are some exceptions to this however, 
  // listed below

  if ( par::SNP_major && 
       ! ( par::epi_genebased 
	   || par::set_score 
	   || par::set_step 
	   || par::proxy_glm 
	   || par::dosage_assoc
	   || par::cnv_enrichment_test
	   || par::cnv_glm 
	   || par::score_test 
	   || par::rare_test 
	   || par::gvar ) )
    SNP2Ind();
  


  // Test all SNPs 1 at a time automatically, or is this 
  // a tailored single test?

  int ntests = par::assoc_glm_without_main_snp ? 1 : nl_all;

  vector<double> results(ntests);

  if ( print_results && par::qt && par::multtest )
    tcnt.resize(ntests);

  ofstream ASC;
  if (print_results)  
    {
      string f = par::output_file_name;
      if ( par::bt)
	{
	  f += ".assoc.logistic";
	  printLOG("Writing logistic model association results to [ " 
		   + f + " ] \n");
	}
      else
	{
	  f += ".assoc.linear";
	  printLOG("Writing linear model association results to [ " 
		   + f + " ] \n");
	}

      ASC.open(f.c_str(),ios::out);
      ASC << setw(4) << "CHR" << " " 
	  << setw(par::pp_maxsnp) << "SNP" << " " 
	  << setw(10) << "BP" << " "
	  << setw(4) << "A1" << " "
	  << setw(10) << "TEST" << " "
	  << setw(8) << "NMISS" << " ";
      if ( par::bt && ! par::return_beta )
	ASC << setw(10) << "OR" << " ";
      else
	ASC << setw(10) << "BETA" << " ";
      
      if (par::display_ci)
	ASC << setw(8) << "SE" << " "
	    << setw(8) << string("L"+dbl2str(par::ci_level*100)) << " "
	    << setw(8) << string("U"+dbl2str(par::ci_level*100)) << " ";
      
      ASC << setw(12) << "STAT" << " " 
	  << setw(12) << "P" << " " 	  
	  << "\n";
      ASC.precision(4);
    }
  
  
  /////////////////////////////
  // Determine sex distribution

  int nmales = 0, nfemales = 0;
  for (int i=0; i<n; i++)
    if ( ! sample[i]->missing )
      {
	if ( sample[i]->sex )
	  nmales++;
	else
	  nfemales++;
      }
  
  bool variationInSex = nmales > 0 && nfemales > 0;

  
  //////////////////////////////////////////
  // Iterate over each locus, or just once

  for (int l=0; l<ntests; l++)
    {	

      // Skip possibly (in all-locus mode)
      
      if ( par::adaptive_perm && 
	   ( ! par::assoc_glm_without_main_snp ) && 
	   ( ! perm.snp_test[l]) )
	continue;
      

      //////////////////////////////////////////////////////////
      // X-chromosome, haploid?
      // xchr_model 0: skip non-autosomal SNPs
      
      bool X=false;
      bool automaticSex=false;

      if ( ! par::assoc_glm_without_main_snp )
	{
	  if ( par::xchr_model == 0 )
	    {
	      if ( par::chr_sex[locus[l]->chr] ||
		   par::chr_haploid[locus[l]->chr] )
		continue;
	    }
	  else 
	    if (par::chr_sex[locus[l]->chr]) 
	      X=true;
	}
      

      //////////////////////////////////////////////////////////
      // A new GLM
      
      Model * lm;

       
      //////////////////////////////////////////////////////////
      // Linear or logistic?

      if (par::bt)
	{
	  LogisticModel * m = new LogisticModel(this);
	  lm = m;
	}
      else
	{
	  LinearModel * m = new LinearModel(this);
	  lm = m;
	}
      
  
      //////////////////////////////////////////////////////////
      // A temporary fix

      if ( par::dosage_assoc || 
	   par::cnv_enrichment_test || 
	   par::cnv_glm ||
	   par::score_test || 
	   par::set_score || 
	   par::proxy_glm || 
	   par::gvar || 
	   par::rare_test ) 
 	lm->hasSNPs(false);


      //////////////////////////////////////////////////////////
      // Set missing data

      lm->setMissing();


      //////////////////////////////////////////////////////////
      // Set genetic model

      if ( par::glm_dominant )
	lm->setDominant();
      else if ( par::glm_recessive || par::twoDFmodel_hethom )
	lm->setRecessive();
      
      string mainEffect = "";
      
      bool genotypic = false;

      /////////////////////////////////////////////////
      // Main SNP
      
      if ( ! par::assoc_glm_without_main_snp ) 
	{
	  
	  genotypic = par::chr_haploid[locus[l]->chr] ? false : par::twoDFmodel ;
	  
	  // Models
	  //               AA    AB    BB
	  // Additive       0     1     2

	  // Dominant       0     1     1
	  // Recessive      0     0     1

	  // Genotypic(1)
	  // Additive       0     1     2
	  // Dom Dev.       0     1     0

	  // Genotypic(2)
	  // Homozygote     0     0     1
	  // Heterozygote   0     1     0

	  
	  ////////////////////////////////////////////////////////////
	  // An additive effect? (or single coded effect) of main SNP

	  if ( par::glm_recessive )
	    mainEffect = "REC";
	  else if ( par::glm_dominant ) 
	    mainEffect = "DOM";
	  else if ( par::twoDFmodel_hethom )
	    mainEffect = "HOM";
	  else
	    mainEffect = "ADD";
	  
	  lm->addAdditiveSNP(l); 
	  lm->label.push_back(mainEffect);
	  
	  
 	  //////////////////////////////////////////////////////////
 	  // Or a 2-df additive + dominance model?
	  
 	  if ( genotypic ) 
 	    {
 	      lm->addDominanceSNP(l);
	      
 	      if ( par::twoDFmodel_hethom )
 		lm->label.push_back("HET");
 	      else
 		lm->label.push_back("DOMDEV");
 	    }
	  
	}


      
      //////////////////////////////////////////////////////////
      // Haplotypes: WHAP test (grouped?)
      
      if ( par::chap_test )
	{
	  
	  // Use whap->group (a list of sets) to specify these, from
	  // the current model (either alternate or null)

	  // Start from second category (i.e. first is reference)
	  for (int h=1; h < whap->current->group.size(); h++)
	    {
	      lm->addHaplotypeDosage( whap->current->group[h] );	    
	      lm->label.push_back( "WHAP"+int2str(h+1) );
	    }
	}


      //////////////////////////////////////////////////////////
      // Haplotypes: proxy test
      
      if ( par::proxy_glm )
	{
	  
	  // Unlike WHAP tests, we now will only ever have two
	  // categories; and a single tested coefficient

	  set<int> t1 = haplo->makeSetFromMap(haplo->testSet);
	  lm->addHaplotypeDosage( t1 );
	  lm->label.push_back( "PROXY" );
	    
	}

      if ( par::test_hap_GLM )
	{
	  // Assume model specified in haplotype sets
	  // Either 1 versus all others, or H-1 versus  
	  // terms for omnibus

	  set<int>::iterator i = haplo->sets.begin();
	  while ( i != haplo->sets.end() )
	    {
	      set<int> t;
	      t.insert(*i);
	      lm->addHaplotypeDosage( t );
	      lm->label.push_back( haplo->haplotypeName( *i ) );
	      ++i;
	    }
	}


      
      //////////////////////////////////////////////////////////
      // Conditioning SNPs?
      // (might be X or autosomal, dealth with automatically)
      
      if (par::conditioning_snps)
	{
	  if ( par::chap_test ) 
	    {
	      for (int c=0; c<conditioner.size(); c++)
		{
		  if ( whap->current->masked_conditioning_snps[c] )
		    {
		      lm->addAdditiveSNP(conditioner[c]); 
		      lm->label.push_back(locus[conditioner[c]]->name);
		    }
		}
	    }
	  else
	    {
	      for (int c=0; c<conditioner.size(); c++)
		{
		  lm->addAdditiveSNP(conditioner[c]); 
		  lm->label.push_back(locus[conditioner[c]]->name);
		}
	    }
	}
      


      //////////////////////////////////////////////////////////      
      // Sex-covariate (necessary for X chromosome models, unless
      // explicitly told otherwise)
      
      if ( ( par::glm_sex_effect || ( X && !par::glm_no_auto_sex_effect ) )
	   && variationInSex )
	{
	  automaticSex = true;
	  lm->addSexEffect();
	  lm->label.push_back("SEX");	  
	}
      
  

      //////////////////////////////////////////////////////////
      // Covariates?

      if (par::clist)
	{
	  for (int c=0; c<par::clist_number; c++)
	    {
	      lm->addCovariate(c);
	      lm->label.push_back(clistname[c]);
	    }
	}


      //////////////////////////////////////////////////////////
      // Interactions

      // addInteraction() takes parameter numbers
      // i.e. not covariate codes
      
      // 0 intercept
      // 1 {A}
      //   {D}
      //   {conditioning SNPs}
      //   {sex efffect}
      //   {covariates}

      // Allow for interactions between conditioning SNPs, sex, covariates, etc
      	
      
      ////////////////////////////////////////
      // Basic SNP x covariate interaction? 
      
      // Currently -- do not allow interactions if no main effect 
      // SNP -- i.e. we need a recoding of things here.

      if ( par::simple_interaction && ! par::assoc_glm_without_main_snp )
	{
	  
	  // A, D and haplotypes by conditioning SNPs, sex, covariates
	  
	  int cindex = 2;
	  if ( genotypic )
	    cindex = 3;
	    	  
	  for (int c=0; c<conditioner.size(); c++)
	    {
	      lm->addInteraction(1,cindex);
	      lm->label.push_back(mainEffect+"xCSNP"+int2str(c+1));	  
	      
	      if ( genotypic )
		{
		  lm->addInteraction(2,cindex);
		  if ( par::twoDFmodel_hethom )
		    lm->label.push_back("HETxCSNP"+int2str(c+1));	  
		  else
		    lm->label.push_back("DOMDEVxCSNP"+int2str(c+1));	  
		}
	      
	      cindex++;
	    }

	  if ( automaticSex )
	    {
	      lm->addInteraction(1,cindex);
	      lm->label.push_back(mainEffect+"xSEX");	  
	      
	      if ( genotypic )
		{
		  
		  lm->addInteraction(2,cindex);
		  if ( par::twoDFmodel_hethom )
		    lm->label.push_back("HETxSEX");
		  else
		    lm->label.push_back("DOMDEVxSEX");
		}
	      
	      cindex++;
	    }
	  for (int c=0; c<par::clist_number; c++)
	    {
	      lm->addInteraction(1,cindex);
	      lm->label.push_back(mainEffect+"x"+clistname[c]);	  
	      
	      if ( genotypic )
		{
		  lm->addInteraction(2,cindex);
		  
		  if ( par::twoDFmodel_hethom )		  
		    lm->label.push_back("HETx"+clistname[c]); 
		  else
		    lm->label.push_back("DOMDEVx"+clistname[c]); 
		}
	      
	      cindex++;
	      
	    }
	}
      


      
      //////////////////////////////
      // Fancy X chromosome models
      
      if ( X && automaticSex && par::xchr_model > 2 )
	{
	  
	  // Interaction between allelic term and sex (i.e. 
	  // allow scale of male effect to vary)
	  
	  int sindex = 2;
	  if ( genotypic )
	    sindex++;
	  sindex += conditioner.size();
	  
	  lm->addInteraction(2,sindex);
	  lm->label.push_back("XxSEX");	  
	  
	  // xchr model 3 : test ADD + XxSEX
	  // xchr model 4 : test ADD + DOM + XxSEX
	}

      

      //////////////////////////////
      // Build design matrix

      lm->buildDesignMatrix();
      

      //////////////////////////////
      // Clusters specified?

      if ( par::include_cluster ) 
	{
	  lm->setCluster();
	}
      
      //////////////////////////////////////////////////
      // Fit linear or logistic model (Newton-Raphson)
      
      lm->fitLM();


      ////////////////////////////////////////
      // Check for multi-collinearity

      lm->validParameters();
      

      ////////////////////////////////////////
      // Obtain estimates and statistic

      if (print_results)
	lm->displayResults(ASC,locus[l]);
	//cout << setw(25) << lm->getVar()[1] << " " << lm->isValid() << " " << realnum(lm->getVar()[1]) << endl; //for test purpose only
      


      ////////////////////////////////////////////////
      // Test linear hypothesis (multiple parameters)

      // Perform if:
      //   automatic 2df genotypic test  ( --genotypic )
      //     OR
      //   sex-tests            ( --xchr-model )
      //     OR
      //   test of everything   ( --test-all )
      //     OR
      //   user has specified user-defined test  ( --tests )
      
      if ( ( genotypic && ! par::glm_user_parameters ) 
	   || par::glm_user_test 
	   || par::test_full_model )
	{

	  vector_t h; // dim = number of fixes (to =0)
	  matrix_t H; // row = number of fixes; cols = np
	  int df;
	  string testname;
	  

	  ////////////////////////////////////////////////
	  // Joint test of all parameters

	  if (par::test_full_model) 
	    {
	      df = lm->getNP() - 1;
	      h.resize(df,0);
	      testname = "FULL_"+int2str(df)+"DF";
	      sizeMatrix(H,df,lm->getNP());
	      for (int i=0; i<df; i++)
		H[i][i+1] = 1;
	    }
	  
	  ////////////////////////////////////////////////
	  // Joint test of user-specified parameters
	  
	  else if (par::glm_user_test) 
	    {
	      df = par::test_list.size();
	      h.resize(df,0);
	      testname = "USER_"+int2str(df)+"DF";
	      sizeMatrix(H,df,lm->getNP());
	      for (int i=0; i<df; i++)
		if ( par::test_list[i]<lm->getNP() )
		  H[i][par::test_list[i]] = 1;
	      
	    }
	  
	  ////////////////////////////////////////////////
	  // Joint test of additive and dominant models
	  
	  else if ( genotypic )
	    {
	      testname = "GENO_2DF";
	      df = 2;
	      h.resize(2,0);
	      sizeMatrix(H,2,lm->getNP());
	      H[0][1] = H[1][2] = 1; 	  
	    }
	  
	  else if ( X && par::xchr_model == 3 )	  
	    {
	      testname = "XMOD_2DF";
	    }


	  ////////////////////////////////////////////////
	  // Joint test of all parameters

	  double chisq = lm->isValid() ? lm->linearHypothesis(H,h) : 0;
	  double pvalue = chiprobP(chisq,df);
	  
	  // If filtering p-values
	  if ( (!par::pfilter) || pvalue <= par::pfvalue ) 
	    {	 
	      
	      ASC << setw(4) << locus[l]->chr << " " 
		  << setw(par::pp_maxsnp) << locus[l]->name << " " 
		  << setw(10) << locus[l]->bp << " "		
		  << setw(4) << locus[l]->allele1 << " " 
		  << setw(10) << testname << " "
		  << setw(8) << lm->Ysize() << " " 
		  << setw(10) << "NA" << " ";
	      
	      if (par::display_ci)
		ASC << setw(8) << "NA" << " " 
		    << setw(8) << "NA" << " "
		    << setw(8) << "NA" << " ";
	      
	      if (lm->isValid() && realnum(chisq) )
		ASC << setw(12) << chisq << " " 
		    << setw(12) << pvalue << "\n"; 
	      else
		ASC << setw(12) << "NA" << " " 
		    << setw(12) << "NA" << "\n"; 
	    }


 	}
    
      
      ////////////////////////////////////////
      // Store statistic (1 df chisq), and p-value
      // if need be ( based on value of testParameter )

      if ( ! par::assoc_glm_without_main_snp )
	results[l] = lm->getStatistic();


      if ( par::qt && print_results && par::multtest )
	tcnt[l] = lm->Ysize() - lm->getNP();
      


      //////////////////////////////////////////////
      // Clear up linear model, if no longer needed
      
      if ( par::chap_test || 
	   par::test_hap_GLM || 
	   par::set_step || 
	   par::set_score || 
	   par::proxy_glm || 
	   par::dosage_assoc || 
	   par::cnv_enrichment_test ||	   
	   par::cnv_glm ||
	   par::score_test ||
	   par::gvar || 	   
	   par::rare_test ) 
	{
	  // Responsibility to clear up in parent routine
	  model = lm; 
	}
      else
	{
	  delete lm;	  
	}

      // Flush output buffer
      ASC.flush();


      // Next SNP
    }

  
 
  
  if (print_results)
    ASC.close();
  
  return results;

}
Exemplo n.º 16
0
vector<double> Plink::calcMantelHaenszel_2x2xK(Perm & perm, bool original)
{

  // Should we perform BD test (K>1)
  if (nk<2) par::breslowday = false;

  ofstream MHOUT;

  if ( original )
    {
      
      //////////////////////////////////
      // Any individual not assigned to a cluster, making missing
      // phenotype (only need to do this once, for original)
      
      vector<Individual*>::iterator person = sample.begin();
      while ( person != sample.end() )
	{
	  if ( (*person)->sol < 0 ) 
	    (*person)->missing = true;
	  person++;
	}
      
      
      string f = par::output_file_name + ".cmh";
      MHOUT.open(f.c_str(),ios::out);
      
      MHOUT << setw(4) << "CHR" << " "
	    << setw(par::pp_maxsnp) << "SNP" << " "
	    << setw(10) << "BP" << " "
	    << setw(4) << "A1" << " " 
	    << setw(8) << "MAF" << " "
	    << setw(4) << "A2" << " "	
	    << setw(10) << "CHISQ" << " "
	    << setw(10) << "P" << " "
	    << setw(10) << "OR" << " "
	    << setw(10) << "SE" << " "
	    << setw(10) << string("L"+dbl2str(par::ci_level*100)) << " "  
	    << setw(10) << string("U"+dbl2str(par::ci_level*100)) << " ";    
      
      
      if (par::breslowday)
	MHOUT << setw(10) << "CHISQ_BD" << " "
	      << setw(10) << "P_BD" << " ";
      
      MHOUT << "\n";
      
      MHOUT.precision(4);
      
      printLOG("Cochran-Mantel-Haenszel 2x2xK test, K = " + int2str( nk) + "\n");

      if (par::breslowday)
	printLOG("Performing Breslow-Day test of homogeneous odds ratios\n");
      
      printLOG("Writing results to [ " + f + " ]\n");
      
      // Warnings, 
      if (par::breslowday && nk>10) 
	printLOG("** Warning ** Breslow-Day statistics require large N per cluster ** \n");
      
    }


  double zt = ltqnorm( 1 - (1 - par::ci_level) / 2  ) ; 
  

  // Cochran-Mantel-Haenszel 2x2xK test
  
  vector<double> results(nl_all);

  vector<CSNP*>::iterator s = SNP.begin();
  int l=0;
  while ( s != SNP.end() )
    {
      
      // Skip possibly
      if (par::adaptive_perm && !perm.snp_test[l])
	{
	  s++;
	  l++;
	  continue;
	}

      // Disease X allele X strata
      
      // Calculate mean of 11 cell for each strata
      vector<double> mean_11(nk,0);
      vector<double> var_11(nk,0);
 
      // Calculate statistic
      vector<double> n_11(nk,0);
      vector<double> n_12(nk,0);
      vector<double> n_21(nk,0);
      vector<double> n_22(nk,0);

      // Disease marginals
      vector<double> n_1X(nk,0); // disease
      vector<double> n_2X(nk,0); // no disease

      vector<double> n_X1(nk,0); // F allele
      vector<double> n_X2(nk,0); // T allele
      
      vector<double> n_TT(nk,0); // Total allele count
      

      ///////////////////////// 	      
      // Autosomal or haploid?
	      
      bool X=false, haploid=false;
      if (par::chr_sex[locus[l]->chr]) X=true;
      else if (par::chr_haploid[locus[l]->chr]) haploid=true;

      ////////////////////////
      // Consider each person
      
      vector<bool>::iterator i1 = (*s)->one.begin();
      vector<bool>::iterator i2 = (*s)->two.begin();
      vector<Individual*>::iterator gperson = sample.begin();

      while ( gperson != sample.end() )
	{
	  Individual * pperson = (*gperson)->pperson;

	  bool s1 = *i1;
	  bool s2 = *i2;
	  
	  // Affected individuals
	  if ( pperson->aff && !pperson->missing )
	    {

		// Haploid?
		if ( haploid || ( X && (*gperson)->sex ) ) 
		{

	      // Allelic marginal
	      if ( ! s1  )
		{
		  // FF hom
		      n_11[ pperson->sol ] ++ ;
		      n_X1[ pperson->sol ] ++ ;		    
		}
	      else 
		{
		  if ( ! s2 ) // FT
		    {
		      gperson++;
		      i1++;
		      i2++;
		      continue;  // skip missing genotypes
		    }
		  else // TT
		    {
		      n_12[ pperson->sol ] ++ ;
		      n_X2[ pperson->sol ] ++ ;		      
		    }
		}

	      // Disease marginal
	      n_1X[ pperson->sol ] ++;
	      n_TT[ pperson->sol ] ++;

		}
		else   // autosomal
		{
		
	      // Allelic marginal
	      if ( ! s1  )
		{
		  if ( ! s2 ) // FF hom
		    {
		      n_11[ pperson->sol ] +=2 ;
		      n_X1[ pperson->sol ] +=2 ;
		    }
		  else
		    {
		      n_11[ pperson->sol ]++ ; // FT het
		      n_12[ pperson->sol ]++ ;
		      n_X1[ pperson->sol ]++ ;
		      n_X2[ pperson->sol ]++ ;		      
		    }
		}
	      else 
		{
		  if ( ! s2 ) // FT
		    {
		      gperson++;
		      i1++;
		      i2++;
		      continue;  // skip missing genotypes
		    }
		  else // TT
		    {
		      n_12[ pperson->sol ] +=2 ;
		      n_X2[ pperson->sol ] +=2 ;		      
		    }
		}

	      // Disease marginal
	      n_1X[ pperson->sol ] += 2;
	      n_TT[ pperson->sol ] += 2;

	      } // end autosomal

	    }
	  else if ( ! pperson->missing ) // Unaffecteds
	    {

		// Haploid?
		if ( haploid || ( X && (*gperson)->sex ) ) 
		{

	      // Allelic marginal
	      if ( ! s1  )
		{
		  // FF hom
		      n_21[ pperson->sol ] ++ ;
		      n_X1[ pperson->sol ] ++ ;		    
		}
	      else 
		{
		  if ( ! s2 ) // FT
		    {
		      gperson++;
		      i1++;
		      i2++;
		      continue;  // skip missing genotypes
		    }
		  else // TT
		    {
		      n_22[ pperson->sol ] ++ ;
		      n_X2[ pperson->sol ] ++ ;		      
		    }
		}

	      // Disease marginal
	      n_2X[ pperson->sol ] ++;
	      n_TT[ pperson->sol ] ++;

		}
		else   // autosomal 
		{
	      // Allelic marginal
	      if ( ! s1 )
		{
		  if ( ! s2 ) // FF
		    {
		      n_X1[ pperson->sol ] +=2 ;
		      n_21[ pperson->sol ] +=2 ;
		    }
		  else
		    {
		      n_X1[ pperson->sol ] ++ ;
		      n_X2[ pperson->sol ] ++ ;		      
		      n_21[ pperson->sol ] ++ ;
		      n_22[ pperson->sol ] ++ ;
		    }
		}
	      else 
		{
		  if ( ! s2 ) // FT
		    {
		      gperson++;
		      i1++;
		      i2++;
		      continue;  // skip missing genotypes
		    }
		  else // TT
		    {
		      n_X2[ pperson->sol ] +=2 ;		      		      
		      n_22[ pperson->sol ] +=2 ;
		    }
		}     

	      // disease marginal
	      n_2X[ pperson->sol ] += 2;
	      n_TT[ pperson->sol ] += 2;

		}  // end autosomal			      
	    } // end unaffected

	  gperson++;
	  i1++;
	  i2++;
	  
	} // count next individual
    
      
      
      

      // Finished iterating over individuals: cluster needs at least 2 
      // nonmissing individuals
      
      vector<bool> validK(nk,false);
      for (int k=0; k<nk; k++)
	if (n_TT[k]>=2) validK[k]=true;
      
      for (int k=0; k<nk; k++)
	{
	  if (validK[k])
	    {
	      mean_11[k] = ( n_X1[k] * n_1X[k] ) / n_TT[k] ;
	      var_11[k] = ( n_X1[k] * n_X2[k] * n_1X[k] * n_2X[k] ) 
		/ ( n_TT[k]*n_TT[k]*(n_TT[k]-1) );

// 	      cout << k << " " 
// 		   << n_11[k] << " " 
// 		   << n_12[k] << " " 
// 		   << n_21[k] << " " 
// 		   << n_22[k] << "\n";
	      
	    }
	}

      
      double CMH = 0;
      double denom = 0;
      for (int k=0; k<nk; k++)
	{
	  if (validK[k])
	    {
	      CMH += n_11[k] - mean_11[k];
	      denom += var_11[k];
	    }
	}

      CMH *= CMH;
      CMH /= denom;
 
      // MH Odds ratio & CI
      double R = 0;
      double S = 0;
      vector<double> r2(nk);
      vector<double> s2(nk);      

      for (int k=0; k<nk; k++)
	{
	  if (validK[k])
	    {
	      r2[k] = (n_11[k]*n_22[k]) / n_TT[k];
	      s2[k] = (n_12[k]*n_21[k]) / n_TT[k];
	      R += r2[k]; 
	      S += s2[k];
	    }
	}
      double OR = R / S ;

      double v1 = 0, v2 = 0, v3 = 0;
      for (int k=0; k<nk; k++)
	{
	  if (validK[k])
	    {
	      v1 += (1/n_TT[k]) * ( n_11[k] + n_22[k] ) * r2[k] ;
	      v2 += (1/n_TT[k]) * ( n_12[k] + n_21[k] ) * s2[k] ;
	      v3 += (1/n_TT[k]) * ( ( n_11[k] + n_22[k] ) * s2[k] 
				+ ( n_12[k] + n_21[k] ) * r2[k] );
	    }
	}
      
      double SE = ( 1/(2*R*R) )  * v1
	+ (1/(2*S*S)) * v2
	+ (1/(2*R*S)) * v3 ;
      SE = sqrt(SE);
      
      double OR_lower = exp( log(OR) - zt * SE );
      double OR_upper = exp( log(OR) + zt * SE );
      
      if ( original )
	{

	  double pvalue = chiprobP(CMH,1);
	  
	  // Skip?, if filtering p-values
	  if ( par::pfilter && ( pvalue > par::pfvalue || pvalue < 0 ) ) 
	    goto skip_p_cmh;
	  

	  MHOUT << setw(4) << locus[l]->chr << " "
		<< setw(par::pp_maxsnp) << locus[l]->name << " "
		<< setw(10) << locus[l]->bp << " "
		<< setw(4) << locus[l]->allele1 << " " 
		<< setw(8) << locus[l]->freq << " " 
		<< setw(4) << locus[l]->allele2 << " ";
	  

	  if (realnum(CMH))
	    MHOUT << setw(10) << CMH << " "
		  << setw(10) << chiprobP(CMH,1) << " ";
	  else
	    MHOUT << setw(10) << "NA" << " "
		  << setw(10) << "NA" << " ";
	  
	  
	  if (realnum(OR))
	    MHOUT << setw(10) << OR << " ";
	  else
	    MHOUT << setw(10) << "NA" << " ";

	  if (realnum(SE))
	    MHOUT << setw(10) << SE << " ";
	  else 
	    MHOUT << setw(10) << "NA" << " ";

	  if (realnum(OR_lower))
	    MHOUT << setw(10) << OR_lower << " ";
	  else
	    MHOUT << setw(10) << "NA" << " ";

	  if (realnum(OR_upper))
	    MHOUT << setw(10) << OR_upper << " ";
	  else
	    MHOUT << setw(10) << "NA" << " ";
	  

	  // Optional Breslow-Day test of homogeneity of odds ratios
	  if (par::breslowday)
	    {
	      
	      double amax;
	      double bb;
	      double determ;
	      double as_plus;
	      double as_minus;
	      double Astar;
	      double Bstar;
	      double Cstar;
	      double Dstar;
	      double Var;
	      double BDX2 = 0;
	      int df = 0;
	      for (int k=0; k<nk; k++)
		{
		  if (validK[k])
		    {
		      df++;
		      amax = (n_1X[k] < n_X1[k]) ? n_1X[k] : n_X1[k];
		      bb = n_2X[k] + n_1X[k] * OR - n_X1[k] * (1-OR);
		      determ = sqrt(bb*bb + 4*(1-OR) * OR * n_1X[k] * n_X1[k]);
		      as_plus = ( -bb + determ ) / ( 2 - 2 * OR );
		      as_minus = ( -bb - determ ) / ( 2 - 2 * OR );		      
		      Astar =  as_minus <= amax && as_minus >= 0 ? as_minus  : as_plus ;
		      Bstar = n_1X[k] - Astar;
		      Cstar = n_X1[k] - Astar;
		      Dstar = n_2X[k] - n_X1[k] + Astar;
		      Var = 1/(1/Astar + 1/Bstar + 1/Cstar + 1/Dstar);
		      BDX2 += ( (n_11[k] - Astar) * ( n_11[k] - Astar ) ) / Var ; 
		    }
		}
	      
	      double BDp = chiprobP( BDX2 , df-1 ); 
	      
	      if ( BDp > -1 )   
		MHOUT << setw(10) << BDX2 << " " 
		      << setw(10) << BDp << " ";	  
	      else
		MHOUT << setw(10) << "NA" << " " 
		      << setw(10) << "NA" << " ";	  
	      
	    }
	  
	  MHOUT << "\n";
	  
	}

    skip_p_cmh:
      
      // Store for permutation procedure, based 2x2xK CMH result
      results[l] = CMH;
      
      // Next SNP
      s++;
      l++;

    }
  

  if (original)
    MHOUT.close();

  return results;
  
}
Exemplo n.º 17
0
void Plink::driverSCREEPI()
{

  ///////////////////////////////
  // Gene-based epistasis
  

  //////////////////////////////////////////
  // Case-control samples only

  affCoding(*this);


  //////////////////////////////////////////
  // SNP-major mode analysis

  if (!par::SNP_major)
    Ind2SNP();
  
  //////////////////////////////////////////
  // Requires that sets have been speciefied
  if (par::set_test) readSet();
  else error("Need to specify genes with --set {filename} when using --genepi\n");

    
  //////////////////
  // SET statistics

  Set S(snpset);


  //////////////////////////////////////////////
  // Prune SET (0-sized sets, MAF==0 SNPs, etc) 

  S.pruneSets(*this);
  
  int ns = snpset.size();
  if (ns < 2)
    error("Need to specify at least two fully valid sets\n");


  int n = 0;
  int ncase = 0;
  
  /////////////////////////////////////////////////////////
  // Prune based on VIF

  string original_outfile = par::output_file_name;

  // Case-control? Prune cases and controls together...
  if (!par::epi_caseonly)
  {   
      printLOG("\nConsidering cases and controls: ");
      setFlags(false);
      vector<Individual*>::iterator person = sample.begin();
      while ( person != sample.end() )
      {
	  if ( ! (*person)->missing )
	  {
	      (*person)->flag = true;
	      n++;
	  }
	  person++;
      }
  
      par::output_file_name += ".all";
      S.pruneMC(*this,false,par::vif_threshold);
      //S.pruneMC(*this,false,1000);
  }

  // Case-only? Prune cases only...
  else
  {
      printLOG("\nConsidering cases: ");
      setFlags(false);
      vector<Individual*>::iterator person = sample.begin();
      while ( person != sample.end() )
      {
	  if ( (*person)->aff && ! (*person)->missing )
	  {
	      (*person)->flag = true;
	      ncase++;
	  }
	  person++;
          n++;
      }

      par::output_file_name += ".case";
      S.pruneMC(*this,false,par::vif_threshold);
      //S.pruneMC(*this,false,1000);
  }

  par::output_file_name = original_outfile;

  // Write finalized set
  ofstream SET1, SET2;
  string f = par::output_file_name + ".all.set.in";
  
  printLOG("Writing combined pruned-in set file to [ " + f + " ]\n");
  SET1.open(f.c_str(),ios::out);

  f = par::output_file_name + ".all.set.out";
  printLOG("Writing combined pruned-out set file to [ " + f + " ]\n");
  SET2.open(f.c_str(),ios::out);

  for (int s=0; s<snpset.size(); s++)
  {
      
      int nss = snpset[s].size();
      
      SET1 << setname[s] << "\n";
      SET2 << setname[s] << "\n";
      
      for (int j=0; j<nss; j++)
      {
	  if (S.cur[s][j])
	      SET1 << locus[snpset[s][j]]->name << "\n";
	  else
	      SET2 << locus[snpset[s][j]]->name << "\n";
      }
      
      SET1 << "END\n\n";
      SET2 << "END\n\n";
  }
  
  SET1.close();
  SET2.close();
  

  // Prune empty sets once more:

  S.pruneSets(*this);
  
  ns = snpset.size();
  if (ns < 2)
      error("Need to specify at least two fully valid sets\n");


  ////////////////////////////////
  // Set up permutation structure

  // Specialized (i.e. cannot use Perm class) as this 
  // requires a block-locus permutation

  // First block is fixed
  
  vector<vector<int> > blperm(ns);
  vector<vector<int> > blperm_case(ns);
  vector<vector<int> > blperm_control(ns);

  for (int i=0; i<ns; i++)
  {
      // A slot for each individual per locus
      for (int j=0; j<n; j++)
	  if ( ! sample[j]->missing )
	      blperm[i].push_back(j);
 
      // A slot for each individual per locus
      for (int j=0; j<n; j++)
	  if ( ! sample[j]->missing && sample[j]->aff )
	      blperm_case[i].push_back(j);

      // A slot for each individual per locus
      for (int j=0; j<n; j++)
	  if ( ! sample[j]->missing && !sample[j]->aff )
	      blperm_control[i].push_back(j);
  }


  ////////////////////////////////////////////
  // Open file and print header for results

  ofstream EPI(f.c_str(), ios::out);
  EPI.open(f.c_str(), ios::out);
  EPI.precision(4);


  ////////////////////////////////////////
  // Analysis (calls genepi functions)

  if (!par::epi_caseonly)
      CCA_logit(false,blperm,S,*this);
  else
      CCA_caseonly(false,blperm_case,S,*this);

  if (!par::permute) 
   return;

  if (!par::silent)
    cout << "\n";


} // End of screepi
Exemplo n.º 18
0
void output_log::warning(string err_msg)
{
	printLOG(err_msg + "\n");
}
Exemplo n.º 19
0
void output_log::error(string err_msg, int error_code)
{
	printLOG("Error: " + err_msg + "\n");
	exit(error_code);
}
Exemplo n.º 20
0
void Plink::calcMH()
{

  ///////////////////////////////////
  // Basic 2 x 2 x K CMH test
  // i.e. Disease x allele x strata
  // is taken care of in assoc.cpp 
  // (i.e. allows for permutation, sets, etc)


  if (!par::SNP_major) Ind2SNP();


  //////////////////////////////////
  // Any individual not assigned to a cluster, 
  // making missing phenotype
  
  vector<Individual*>::iterator person = sample.begin();
  while ( person != sample.end() )
    {
      if ( (*person)->sol < 0 ) 
	(*person)->missing = true;
      person++;
    }


  ///////////////////////////////////
  // Generalized I x J x K CMH test

  // Either ordinal or normal
  // i.e. test strata X SNP controlling for disease
  
  if (par::CMH_test_2 || par::CMH_test_ORD )
    {

      if (par::CMH_test_ORD && !par::bt) 
      error("--mh-ord specified but the phenotype is only binary: use --mh");

      if (nk==1) 
	error("No clusters defined for --mh2 test, i.e. K=1");

      string f = par::output_file_name + ".cmh2";
	if (par::CMH_test_ORD)
        f = par::output_file_name + ".cmh.ord";

      ofstream MHOUT;
      MHOUT.open(f.c_str(),ios::out);
      
      MHOUT << setw(4) << "CHR" << " "
	      << setw(par::pp_maxsnp) << "SNP" << " "
	      << setw(10) << "CHISQ" << " "
	      << setw(10) << "P" << "\n";
	        
      MHOUT.precision(4);
      
      if (par::CMH_test_ORD)
	{
	  printLOG("Cochran-Mantel-Haenszel IxJxK ordinal test, K = " 
		   + int2str(nk) + "\n");
	  printLOG("Testing SNP x ORDINAL DISEASE | STRATUM (option --mh-ord)\n");
	} 
      else 
	{ 
	  printLOG("Cochran-Mantel-Haenszel IxJxK test, K = " 
		   + int2str(nk) + "\n");
	  printLOG("Testing SNP x STRATUM | DISEASE (option --mh2)\n");
	}
      printLOG("Writing results to [ " + f + " ]\n");
      
      vector<CSNP*>::iterator s = SNP.begin();
      int l=0;
      while ( s != SNP.end() )
	{
	  
	  ///////////////////////// 	      
	  // Autosomal or haploid?
	  
	  bool Xchr=false, haploid=false;
	  if (par::chr_sex[locus[l]->chr]) Xchr=true;
	  else if (par::chr_haploid[locus[l]->chr]) haploid=true;
	  
	  if (haploid || Xchr ) 
	    error("--mh2 / --mh-ord cannot handle X/Y markers currently...");
	  
	  vector<int> X(0);  // SNP
	  vector<int> Y(0);  // Cluster
	  vector<int> Z(0);  // Phenotype
	  
	  vector<Individual*>::iterator person = sample.begin();
	  vector<bool>::iterator i1 = (*s)->one.begin();
	  vector<bool>::iterator i2 = (*s)->two.begin();
	  
	  while ( person != sample.end() )
	    {
	      
	      if ((*person)->missing) 
		{
		  // Next person
		  person++;
		  i1++;
		  i2++;
		  continue;
		}            	      
	      
	      // Only consider individuals who have been assigned to a cluster
	      if ( (*person)->sol >= 0 )
		{
		  if ( (!(*i1)) && (!(*i2)) ) 
		    { 
		      X.push_back(1); 
		      X.push_back(1); 
		    }
		 else if ( (!(*i1)) && *i2 ) 
		   { 
		     X.push_back(1); 
		     X.push_back(2); 
		   }
		  else if ( *i1 && *i2 ) 
		    { 
		      X.push_back(2); 
		      X.push_back(2); 
		    }
		  else 
		    {
		      // Next person
		      person++;
		      i1++;
		      i2++;
		      continue;
		    }
		  
		  Y.push_back((*person)->sol);
		  Y.push_back((*person)->sol);
		  
		  if (par::CMH_test_ORD) 
		    Z.push_back( (int)(*person)->phenotype );
		  else 
		    {
		      if ((*person)->phenotype==2) 
			{ 
			  Z.push_back(2); 
			  Z.push_back(2);
			} 
		      else { 
			Z.push_back(1); 
			Z.push_back(1); 
		      }       
		    }
		}
	      
	      // Next person
	      person++;
	      i1++;
	      i2++;
	      
	    }
	  
	  
	  vector<double> res;
	 
	  if ( par::CMH_test_ORD ) 
	    res = calcMantelHaenszel_ORD(X,Z,Y);
	  else 
	    res = calcMantelHaenszel_IxJxK(X,Y,Z);
	  
	  MHOUT << setw(4) << locus[l]->chr << " "
		<< setw(par::pp_maxsnp) << locus[l]->name << " "
		<< setw(10) << res[0] << " "
		<< setw(10) << chiprobP(res[0],res[1]) << "\n";
	  
	  // Next SNP
	  s++;
	  l++;
	}
      
      MHOUT.close();
    }
  
}
Exemplo n.º 21
0
int WAV::Read(const char* WAVFName)
{
    std::ifstream ifs(WAVFName, std::ios::binary);
    //WAVHeader wav;
    //unsigned char header_buf[44];
    if (!ifs)
    {
        std::cerr << "[ERROR] can't open " << WAVFName << std::endl;
        return -1;
    }

    //ifs.read((char*)header_buf, 44);

    //ファイルがRIFF形式であるか;  
    ifs.read(_riffID, 4);
    if (ifs.bad() || strncmp(_riffID, "RIFF", 4) != 0) return -1;
    ifs.read((char*) &_size, 4); // fileSize


    //ファイルがWAVEファイルであるか
    ifs.read((char*) _wavID, 4);
    if (ifs.bad() || strncmp(_wavID, "WAVE", 4) != 0) return -1;

    //fmt のチェック
    ifs.read((char*) _fmtID, 4);
    if (strncmp(_fmtID, "fmt ", 4))
    {
        std::cerr << "fmt not found" << std::endl;
        return -1;
    }
    // fmt チャンクのバイト数
    ifs.read((char*) &_fmtSize, 4);
    if (_fmtSize != 16)
    {
        std::cerr << "not LinearPCM" << std::endl;
        return -1;
    }
    //フォーマットIDから拡張部分までのヘッダ部分を取り込む
    // LinearPCMなので16byte分のデータ読み込む
    ifs.read((char*) &_format, 2); //LinearPCMファイルならば1が入る
    if (_format != 1)
    {
        std::cerr << "not LinearPCM" << std::endl;
        return -1;
    }
    ifs.read((char*) &_channels, 2);
    ifs.read((char*) &_sampleRate, 4);
    ifs.read((char*) &_bytePerSec, 4);
    ifs.read((char*) &_blockSize, 2);
    ifs.read((char*) &_bit, 2);
    //--

    // "data"
    ifs.read((char*) _dataID, 4);
    if (ifs.bad() || strncmp(_dataID, "data", 4) != 0) return -1;
    // 波形データのバイト数
    ifs.read((char*) &_dataSize, 4);

    //モノラルならサンプル数を、ステレオなら左右1サンプルずつの組の数
    _sampleCount = _dataSize / (_channels*(_bit / 8));
    //_monoral8=NULL;
    //_monoral16=NULL;
    //_stereo8=NULL;
    //_stereo16=NULL;
    try
    {
        if (_channels == 1)
        {
            if (_bit == 8)
            {
                if ((_monoral8 = (Monoral8*) malloc(_dataSize)) == NULL)
                {
                    return -1;
                }
                ifs.read((char*) _monoral8, _dataSize);
            }
            else if (_bit == 16)
            {
                if ((_monoral16 = (Monoral16*) malloc(_dataSize)) == NULL)
                {
                    return -1;
                }
                //std::cout << "1block=" << sizeof(Monoral16) << std::endl;
                //std::cout << "samplecount=" << _sampleCount << std::endl;
                //std::cout << "dataSize=1block*sampleCount=" << sizeof(Monoral16)* _sampleCount << "=" <<_dataSize << std::endl;
                ifs.read((char*) _monoral16, _dataSize);

            }
            else
            {
                return -1;
            }
        }
        else   if (_channels == 2)
        {
            // ToDO LR insert data
            if (_bit == 8)
            {
                if ((_stereo8 = (Stereo8*) malloc(_dataSize)) == NULL)
                {
                    return -1;
                }
                ifs.read((char*) _stereo8, _dataSize);
            }
            else if (_bit == 16)
            {
                if ((_stereo16 = (Stereo16*) malloc(_dataSize)) == NULL)
                {
                    return -1;
                }
                ifs.read((char*) _stereo16, _dataSize);
            }
            else
            {
                return -1;
            }
        }
        else
        {
            return -1;
        }
    }
    catch (const char* e)
    {
        printLOG("malloc error");
        printf("%s\n", e);
        exit(-1);
    }


    ifs.close();
    return 0;
}
Exemplo n.º 22
0
// Read VCF file
void vcf_file::scan_file(const string &chr, const string &exclude_chr)
{
	printLOG("Scanning " + filename + " ... \n");

	bool filter_by_chr = (chr != "");
	bool exclude_by_chr = (exclude_chr != "");
	string line, tmp;
	N_indv = 0;
	unsigned int N_read = 0;
	istringstream ss;
	string last_CHROM = "";
	N_entries=0;
	string CHROM;
	bool finish = false;
	int last_POS = -1;
	int POS;
	streampos filepos;

	while(!feof())
	{
		filepos = get_filepos();
		read_line(line);

		if (line.length() <= 2)
			continue;

		if (line[0] == '#')
		{
			if (line[1] == '#')
			{	// Meta information
				parse_meta(line);
			}
			else
			{	// Must be header information: #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	(FORMAT	NA00001 NA00002 ... )
				parse_header(line);
			}
		}
		else
		{	// Must be a data line
			ss.clear(); ss.str(line);
			ss >> CHROM;

			N_read++;

			if ((filter_by_chr == true) && (last_CHROM == chr) && (CHROM != chr))
			{	// Presuming the file to be sorted (it should be), we have already found the chromosome we wanted, so there's no need to continue.
				printLOG("\tCompleted reading required chromosome. Skipping remainder of file.\n");
				finish = true;
				break;
			}

			if (CHROM != last_CHROM)
			{
				printLOG("Currently scanning CHROM: " + CHROM);
				if ((exclude_by_chr == true) && (CHROM == exclude_chr))
					printLOG(" - excluded.");
				printLOG("\n");
				last_CHROM = CHROM;
				last_POS = -1;
			}

			if ((exclude_by_chr == true) && (CHROM == exclude_chr))
				continue;

			if (filter_by_chr == true)
			{	// For speed, only parse the entry if it's needed
				if (CHROM == chr)
				{
					ss >> POS;
					if (POS < last_POS)
						error("VCF file is not sorted at: " + CHROM + ":" + int2str(POS));
					last_POS = POS;
					entry_file_locations.push_back(filepos);
					N_entries++;
				}
			}
			else
			{
				ss >> POS;
				if (POS < last_POS)
					error("VCF file is not sorted at: " + CHROM + ":" + int2str(POS));
				last_POS = POS;
				entry_file_locations.push_back(filepos);
				N_entries++;
			}
		}
Exemplo n.º 23
0
void vcf_file::output_discordance_by_site(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Outputting Discordance By Site...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	string CHROM, vcf_line;
	int POS;
	int s1, s2, indv1, indv2;

	string output_file = output_file_prefix + ".diff.sites";
	ofstream diffsites(output_file.c_str());
	if (!diffsites.is_open())
		error("Could not open Sites Differences File: " + output_file, 3);
	//diffsites << "CHROM\tPOS\tFILES\tMATCHING_ALT\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE\tN_FILE1_NONREF_GENOTYPES\tNON_REF_DISCORDANCE" << endl;
	diffsites << "CHROM\tPOS\tFILES\tMATCHING_ALLELES\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE" << endl;

	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		CHROM = CHROMPOS_to_filepos_pair_it->first.first;
		POS = CHROMPOS_to_filepos_pair_it->first.second;

		diffsites << CHROM << "\t" << POS;

		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		bool data_in_both = true;
		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}
		else
			data_in_both = false;

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}
		else
			data_in_both = false;

		if (data_in_both)
			diffsites << "\tB";
		else if ((s1 != -1) && (s2 == -1))
			diffsites << "\t1";
		else if ((s1 == -1) && (s2 != -1))
			diffsites << "\t2";
		else
			error("Unhandled condition");

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		// Set the reference to the non-missing entry (if available)
		string REF = e1.get_REF();
		string REF2 = e2.get_REF();
		if (REF == "N")
			REF = REF2;
		if (REF2 == "N")
			REF2 = REF;

		if (REF.size() != REF2.size())
		{
			warning("REF sequences at " + CHROM + ":" + int2str(POS) + " are not comparable. Skipping site");
			continue;
		}

		if ((REF != REF2) && (REF2 != "N") && (REF != "N"))
			warning("Non-matching REF " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2);

		// Do the alternative alleles match?
		string ALT, ALT2;
		ALT = e1.get_ALT();
		ALT2 = e2.get_ALT();

		bool alleles_match = ((ALT == ALT2) && (REF == REF2));
		diffsites << "\t" << alleles_match;

		e1.parse_full_entry(true);
		e1.parse_genotype_entries(true);

		e2.parse_full_entry(true);
		e2.parse_genotype_entries(true);

		pair<string, string> genotype1, genotype2;
		pair<int,int> geno_ids1, geno_ids2;
		pair<string, string> missing_genotype(".",".");
		pair<int, int> missing_id(-1,-1);

		unsigned int N_common_called=0;	// Number of genotypes called in both files
		unsigned int N_missing_1=0, N_missing_2=0;
		unsigned int N_discord=0;
		unsigned int N_concord_non_missing=0;

		for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
		{
			indv1 = combined_individuals_it->second.first;
			indv2 = combined_individuals_it->second.second;

			if ((indv1 == -1) || (indv2 == -1))
				continue;	// Individual not found in one of the files

			if (alleles_match)
			{	// Alleles match, so can compare ids instead of strings
				e1.get_indv_GENOTYPE_ids(indv1, geno_ids1);
				e2.get_indv_GENOTYPE_ids(indv2, geno_ids2);

				if ((geno_ids1 != missing_id) && (geno_ids2 != missing_id))
				{
					N_common_called++;
					if (((geno_ids1.first == geno_ids2.first) && (geno_ids1.second == geno_ids2.second)) ||
						((geno_ids1.first == geno_ids2.second) && (geno_ids1.second == geno_ids2.first)) )
					{	// Match
						N_concord_non_missing++;
					}
					else
					{	// Mismatch
						N_discord++;
					}
				}
				else if ((geno_ids1 == missing_id) && (geno_ids2 == missing_id))
				{	// Both missing
					N_missing_1++; N_missing_2++;
				}
				else if (geno_ids1 != missing_id)
				{	// Genotype 1 is not missing, genotype 2 is.
					N_missing_2++;
				}
				else if (geno_ids2 != missing_id)
				{	// Genotype 2 is not missing, genotype 1 is.
					N_missing_1++;
				}
				else
					error("Unknown condition");
			}
			else
			{	// Alleles don't match, so need to be more careful and compare strings
				e1.get_indv_GENOTYPE_strings(indv1, genotype1);
				e2.get_indv_GENOTYPE_strings(indv2, genotype2);

				if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype))
				{	// No missing data
					N_common_called++;
					if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) ||
						((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) )
					{	// Match
						N_concord_non_missing++;
					}
					else
					{	// Mismatch
						N_discord++;
					}
				}
				else if ((genotype1 == missing_genotype) && (genotype2 == missing_genotype))
				{	// Both missing
					N_missing_1++; N_missing_2++;
				}
				else if (genotype1 != missing_genotype)
				{	// Genotype 1 is not missing, genotype 2 is.
					N_missing_2++;
				}
				else if (genotype2 != missing_genotype)
				{	// Genotype 2 is not missing, genotype 1 is.
					N_missing_1++;
				}
				else
					error("Unknown condition");
			}
		}
		double discordance = N_discord / double(N_common_called);
		diffsites << "\t" << N_common_called << "\t" << N_discord << "\t" << discordance;
		diffsites << endl;
	}
	diffsites.close();
}
Exemplo n.º 24
0
void Plink::perm_testQTDT(Perm & perm)
{

  //////////////////////////////
  // Use individual-major coding
  
  if (par::SNP_major) 
    SNP2Ind();
  
  
  // for now, no covariates
  if ( par::clist_number > 0 ) 
    error("Cannot specify covariates with QFAM for now...\n");


  ////////////////////////////////////////////////
  // Specify special adaptive QFAM mode (i.e. one SNP
  // at a time)



  /////////////////////////////
  // Set up permutation indices
  
  vector<int> pbetween(family.size());
  vector<bool> pwithin(family.size(),false);
  for (int i=0; i < family.size(); i++)
    pbetween[i] = i;
  
  
  ///////////////
  // Output files

  string f = ".qfam";
  if (par::QFAM_within1) f += ".within";
  else if (par::QFAM_within2) f += ".parents";
  else if (par::QFAM_between) f += ".between";
  else if (par::QFAM_total) f += ".total";
  
  printLOG("Writing QFAM statistics to [ " + par::output_file_name + f + " ]\n");
  
  if (!par::permute) 
    printLOG("** Warning ** QFAM results require permutation to correct for family structure\n");
  else
    printLOG("Important: asymptotic p-values not necessarily corrected for family-structure:\n"
	     "           use empirical p-values for robust p-values from QFAM\n"
	     "           and consult the above file only for parameter estimates\n");
  

  ofstream QOUT((par::output_file_name+f).c_str(),ios::out); // dummy
  QOUT.precision(4);
  QOUT << setw(4) << "CHR" << " " 
       << setw(par::pp_maxsnp) << "SNP" << " " 
       << setw(10) << "BP" << " "
       << setw(4) << "A1" << " "
       << setw(10) << "TEST" << " "
       << setw(8) << "NIND" << " "
       << setw(10) << "BETA" << " ";
  if (par::display_ci)    
    QOUT << setw(8) << "SE" << " "
	 << setw(8) << "LOWER" << " "
	 << setw(8) << "UPPER" << " ";	    	      
  QOUT << setw(12) << "STAT" << " "
       << setw(12) << "P\n";


  //////////////////////
  // Familial clustering

  // C holds which family an individual belongs to 
  // (as element in the family[] array
  
  vector<int> C;
  map<Family*,int> famcnt;
  for (int f = 0 ; f < family.size() ; f++)
    famcnt.insert( make_pair( family[f] , f ) );      
  
  vector<Individual*>::iterator person = sample.begin(); 
  while ( person != sample.end() )
    {
      map<Family*,int>::iterator f = famcnt.find( (*person)->family );
      
      if ( f == famcnt.end() )
	error("Internal error in QFAM, allocating families to individuals...\n");
      else
	C.push_back( f->second );
      
      person++;  
    } 
  
  printLOG(int2str(family.size())+" nuclear families in analysis\n");
      
  if ( family.size()<2 )
    error("Halting: not enough nuclear families for this analysis\n");
  


  ////////////////////
  // Run original QFAM

  perm.setTests(nl_all);
  perm.setPermClusters(*this);

  // Force adaptive perm
  par::adaptive_perm = true;

  vector_t orig = calcQTDT(C, QOUT, false, perm, pbetween, pwithin);

  QOUT.close(); 



  ////////////////
  // Permutation

  if ( ! par::permute ) 
    return;
  
  // Adpative permutation will already have been conducted in original 
  // function call for QFAM (i.e. per-SNP adaptive permutation)

  if (!par::silent)
    cout << "\n\n";
  

  ////////////////////
  // Display results
  
  ofstream TDT;   

  f += ".perm";    
  TDT.open((par::output_file_name+f).c_str(),ios::out);
  printLOG("Writing QFAM permutation results to [ " 
	   + par::output_file_name + f + " ] \n"); 
  TDT.precision(4);
  
  TDT << setw(4) << "CHR" << " "
      << setw(par::pp_maxsnp) << "SNP" << " ";
  
  if (par::perm_TDT_basic) TDT << setw(12) << "STAT" << " ";
  
  TDT << setw(12) << "EMP1" << " ";
  TDT << setw(12) << "NP" << " " << "\n";  
  
  for (int l=0; l<nl_all; l++)
    {	
      
      TDT << setw(4) << locus[l]->chr << " "
	  << setw(par::pp_maxsnp) << locus[l]->name << " "; 
      
      if (orig[l] < -0.5)
	TDT << setw(12) << "NA"  << " " 
	    << setw(12) << "NA"  << " " 
	    << setw(12) << "NA";
      else
	{
	  TDT << setw(12) << orig[l] << " "
	      << setw(12) << perm.pvalue(l) << " "
	      << setw(12) << perm.reps_done(l);	  
	}
      TDT << "\n";
    }
  
  TDT.close();

  
  // Adjusted p-values, assumes 1-df chi-squares
  
  if (par::multtest)
    {
      
      vector<double> obp(0);
      for (int l=0; l<nl_all;l++)
	obp.push_back(inverse_chiprob(perm.pvalue(l),1));      
      
      multcomp(obp,f);
    }

  
   
}
Exemplo n.º 25
0
void vcf_file::output_discordance_matrix(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Outputting Discordance Matrix\n\tFor bi-allelic loci, called in both files, with matching alleles only...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	string vcf_line;
	int s1, s2, indv1, indv2;

	vector<vector<int> > discordance_matrix(4, vector<int>(4, 0));

	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		if ((e1.get_N_alleles() != 2) || (e2.get_N_alleles() != 2))
			continue;

		// Set the reference to the non-missing entry (if available)
		string REF = e1.get_REF();
		string REF2 = e2.get_REF();
		if (REF == "N")
			REF = REF2;
		if (REF2 == "N")
			REF2 = REF;

		if (REF.size() != REF2.size())
			continue;

		if ((REF != REF2) && (REF2 != "N") && (REF != "N"))
			continue;

		// Do the alternative alleles match?
		string ALT, ALT2;
		ALT = e1.get_ALT();
		ALT2 = e2.get_ALT();

		bool alleles_match = (ALT == ALT2) && (REF == REF2);
		if (alleles_match == false)
			continue;

		e1.parse_full_entry(true);
		e1.parse_genotype_entries(true);

		e2.parse_full_entry(true);
		e2.parse_genotype_entries(true);

		pair<int,int> geno_ids1, geno_ids2;
		int N1, N2;

		for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
		{
			indv1 = combined_individuals_it->second.first;
			indv2 = combined_individuals_it->second.second;

			if ((indv1 == -1) || (indv2 == -1))
				continue;	// Individual not found in one of the files

			// Alleles match, so can compare ids instead of strings
			e1.get_indv_GENOTYPE_ids(indv1, geno_ids1);
			e2.get_indv_GENOTYPE_ids(indv2, geno_ids2);

			if (((geno_ids1.first != -1) && (geno_ids1.second == -1)) ||
				((geno_ids2.first != -1) && (geno_ids2.second == -1)))
			{	// Haploid
				one_off_warning("***Warning: Haploid chromosomes not counted!***");
				continue;
			}

			N1 = geno_ids1.first + geno_ids1.second;
			N2 = geno_ids2.first + geno_ids2.second;

			if ((N1 == -1) || (N1 < -2) || (N1 > 2))
				error("Unhandled case");
			if ((N2 == -1) || (N2 < -2) || (N2 > 2))
				error("Unhandled case");

			if (N1 == -2)
				N1 = 3;

			if (N2 == -2)
				N2 = 3;

			discordance_matrix[N1][N2]++;
		}
	}

	string output_file = output_file_prefix + ".diff.discordance_matrix";
	ofstream out(output_file.c_str());
	if (!out.is_open())
		error("Could not open Discordance Matrix File: " + output_file, 3);

	out << "-\tN_0/0_file1\tN_0/1_file1\tN_1/1_file1\tN_./._file1" << endl;
	out << "N_0/0_file2\t" << discordance_matrix[0][0] << "\t" << discordance_matrix[1][0] << "\t" << discordance_matrix[2][0] << "\t" << discordance_matrix[3][0] << endl;
	out << "N_0/1_file2\t" << discordance_matrix[0][1] << "\t" << discordance_matrix[1][1] << "\t" << discordance_matrix[2][1] << "\t" << discordance_matrix[3][1] << endl;
	out << "N_1/1_file2\t" << discordance_matrix[0][2] << "\t" << discordance_matrix[1][2] << "\t" << discordance_matrix[2][2] << "\t" << discordance_matrix[3][2] << endl;
	out << "N_./._file2\t" << discordance_matrix[0][3] << "\t" << discordance_matrix[1][3] << "\t" << discordance_matrix[2][3] << "\t" << discordance_matrix[3][3] << endl;
	out.close();
}
Exemplo n.º 26
0
void Plink::displayGeneReport()
{

  // Simply read in any generic results file and list of SNPs by
  // ranges (which may be subsetted).
  
  //   if ( false )
  //     readMapFile(par::mapfile,include,include_pos,nl_actual);

  ofstream GREP;

  GREP.open( (par::output_file_name + ".range.report").c_str() , ios::out);
  
  map<string, set<Range> > ranges;
  

  // Read list of ranges
  ranges = readRange( par::greport_gene_list );

  // Filter ranges 
  
  if ( par::greport_subset ) 
    ranges = filterRanges( ranges, par::greport_subset_file );



  // Open a single results file
  
  ifstream RESIN;
  RESIN.open( par::greport_results.c_str() , ios::in );
  
  // Read first (header) row

  char cline[par::MAX_LINE_LENGTH];
  RESIN.getline(cline,par::MAX_LINE_LENGTH,'\n');
  
  string sline = cline;
  if (sline=="") 
    error("Problem reading [ " + par::greport_results + " ]\n");

  string buf; 
  stringstream ss(sline); 
  vector<string> tokens; 
  while (ss >> buf)
    tokens.push_back(buf);

  int chr_column = -1;
  int bp_column = -1;
  int pval_column = -1;
  int snp_column = -1;

  for (int i=0; i<tokens.size(); i++)
    {
      if ( tokens[i] == "CHR" )
	chr_column = i;
      
      if ( tokens[i] == "BP" )
	bp_column = i;	  
      
      if ( tokens[i] == "SNP" )
	snp_column = i;	        

      if ( tokens[i] == "P" )
	pval_column = i;	        
    }


  // Do we have a list of SNPs to specifically extract?

  set<string> extractSNP;
  if ( par::extract_set )
    {
      if ( snp_column == -1 ) 
	error("Did not find a SNP field, so cannot use --extract");

      checkFileExists( par::extract_file );
      PP->printLOG("Only extracting SNPs listed in [ " + par::extract_file + " ]\n");
      ifstream IN(par::extract_file.c_str(), ios::in);
      
      while ( ! IN.eof() )
	{
	  string snpname;
	  IN >> snpname;
	  if ( snpname=="" )
	    continue;
	  extractSNP.insert(snpname);
	}
      IN.close();      
      PP->printLOG("Read " + int2str( extractSNP.size() ) + " SNPs to extract\n");
    }


  if ( chr_column < 0 || bp_column < 0 )
    error("Could not find CHR and BP fields in results file");
  
  map<Range*,vector<string> > annotatedResults;

  string headerline = sline;
  int cnt = 0;
  while ( ! RESIN.eof() )
    {

//       if ( ! par::silent ) 
// 	cout << "Processing results line " << ++cnt << "        \r";

      //      vector<string> tokens = tokenizeLine( RESIN ); 

      char cline[par::MAX_LINE_LENGTH];
      RESIN.getline(cline,par::MAX_LINE_LENGTH,'\n');
      
      string sline = cline;
      if (sline=="") 
	continue;
      
      string buf; 
      stringstream ss(sline); 
      vector<string> tokens; 
      while (ss >> buf)
	tokens.push_back(buf);
    
      if ( tokens.size() <= chr_column ||
	   tokens.size() <= bp_column )
	continue;
      
      // Using a p-value-filtering field? 

      double pvalue = 0;
      if ( pval_column != -1 )
	{
	  if ( tokens.size() <= pval_column )
	    continue;
	  
	  if ( ! from_string<double>( pvalue, tokens[pval_column] , std::dec))
	    continue;
	  
	  if ( par::pfilter && pvalue > par::pfvalue ) 
	    continue;
	  
	}

      if ( par::extract_set ) 
	{
	  if ( tokens.size() <= snp_column )
	    continue;
	  
	  if ( extractSNP.find( tokens[snp_column] ) == extractSNP.end() )
	    continue;
	}

      int thisChr = -1;
      int thisBP = -1;

      if ( ! from_string<int>( thisChr, tokens[chr_column] , std::dec))
	continue;

      if ( ! from_string<int>( thisBP, tokens[bp_column] , std::dec))
	continue;
      
      // Do we need to store this? i.e. what ranges is it actually in?
      // This information is in snp2range

      Range r1(thisChr,thisBP,thisBP,"dummy"); 
      
      set<Range*> implicated = rangeIntersect(r1,ranges);
      set<Range*>::iterator ri = implicated.begin();
      while ( ri != implicated.end() )
	{

	  string distance = dbl2str(( thisBP - ((*ri)->start + par::make_set_border)) /1000.00 , 4 ) + "kb" ;
	  
	  if ( annotatedResults.find( *ri ) == annotatedResults.end() )
	    {
	      vector<string> t(2);
	      t[0] = distance;
	      t[1] = sline;
	      annotatedResults.insert(make_pair( (Range *)(*ri) , t ) );
	    }
	  else
	    {
	      vector<string> & v = annotatedResults.find( *ri )->second;
	      v.push_back(distance);
	      v.push_back(sline);
	    }

	  ++ri;
	}

      // Read next line of results

    }


      
  // Iterate through these -- they will be in genomic order, hopefully
  
  map<string, set<Range> >::iterator ri = ranges.begin();

  while ( ri != ranges.end() )
    {
      set<Range>::iterator si = ri->second.begin();
      

      while ( si != ri->second.end() )
	{
	  
	  bool displayed = false;
	  
	  	  
	  map<Range*,vector<string> >::iterator ari;
	  ari = annotatedResults.find( (Range *)&(*si) );
	  	 	  
	  if ( ari != annotatedResults.end() )
	    {
	      for (int l=0; l< ari->second.size(); l+=2)
		{
		  if ( ! displayed ) 
		    {
		      GREP << ri->first << " -- chr" 
			   << chromosomeName( si->chr ) << ":" 
			   << si->start << ".."
			   << si->stop << " ( "
			   << (si->stop - si->start ) / 1000.00 << "kb ) ";
		      if ( par::make_set_border > 0 ) 
			GREP << " including " << par::make_set_border/1000.00 << "kb border ";		      
		      GREP << "\n\n" 
			   << setw(12) << "DIST" << " "
			   << headerline << "\n";
		      displayed = true;
		    }
		  
		  GREP << setw(12) << ari->second[l] << " "
		       << ari->second[l+1] << "\n";
		}
	    }
	  
	  if ( ! displayed ) 
	    {
	      if ( par::greport_display_empty ) 
		{
		  GREP << ri->first << " -- chr" 
		       << chromosomeName( si->chr ) << ":" 
		       << si->start << ".."
		       << si->stop << " ( "
		       << (si->stop - si->start ) / 1000.00 << "kb ) ";
		  if ( par::make_set_border > 0 ) 
		    GREP << " including " << par::make_set_border/1000.00 << "kb border ";		      
		  GREP << "   { nothing to report }\n\n";
		}
	    }
	  else
	    GREP << "\n\n";
	  
	  ++si;
	}
            
      ++ri;
    }
	 
  RESIN.close(); 
  GREP.close();

  if ( ! par::silent ) 
    cout << "\n";

  printLOG("Writing per-range report to [ " 
	   + par::output_file_name 
	   + ".range.report ]\n");

  shutdown();

}
Exemplo n.º 27
0
void vcf_file::output_switch_error(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Outputting Phase Switch Errors...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	string CHROM, vcf_line;
	int POS;
	int s1, s2, indv1, indv2;

	string output_file = output_file_prefix + ".diff.switch";
	ofstream switcherror(output_file.c_str());
	if (!switcherror.is_open())
		error("Could not open Switch Error file: " + output_file, 4);
	switcherror << "CHROM\tPOS\tINDV" << endl;

	unsigned int N_combined_indv = combined_individuals.size();
	vector<int> N_phased_het_sites(N_combined_indv, 0);
	vector<int> N_switch_errors(N_combined_indv, 0);

	pair<string, string> missing_genotype(".",".");
	vector<pair<string, string> > prev_geno_file1(N_combined_indv, missing_genotype);
	vector<pair<string, string> > prev_geno_file2(N_combined_indv, missing_genotype);
	pair<string, string> file1_hap1, file1_hap2, file2_hap1;

	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		CHROM = CHROMPOS_to_filepos_pair_it->first.first;
		POS = CHROMPOS_to_filepos_pair_it->first.second;

		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		e1.parse_full_entry(true);
		e1.parse_genotype_entries(true);

		e2.parse_full_entry(true);
		e2.parse_genotype_entries(true);

		pair<string, string> genotype1, genotype2;
		pair<string, string> missing_genotype(".",".");

		unsigned int N_common_called=0;	// Number of genotypes called in both files
		unsigned int indv_count=0;

		// Bug fix applied (#3354189) - July 5th 2011
		for (combined_individuals_it=combined_individuals.begin();
				combined_individuals_it!=combined_individuals.end();
				++combined_individuals_it, indv_count++)
		{
			indv1 = combined_individuals_it->second.first;
			indv2 = combined_individuals_it->second.second;

			if ((indv1 == -1) || (indv2 == -1))
				continue;	// Individual not found in one of the files

			e1.get_indv_GENOTYPE_strings(indv1, genotype1);
			e2.get_indv_GENOTYPE_strings(indv2, genotype2);

			if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype))
			{	// No missing data
				N_common_called++;
				if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) ||
					((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) )
				{	// Have a matching genotypes in files 1 and 2
					if (genotype1.first != genotype1.second)
					{	// It's a heterozgote
						char phase1, phase2;
						phase1 = e1.get_indv_PHASE(indv1);
						phase2 = e2.get_indv_PHASE(indv2);
						if ((phase1 == '|') && (phase2 == '|'))
						{	// Calculate Phasing error (switch error)
							N_phased_het_sites[indv_count]++;
							file1_hap1 = make_pair<string,string>(prev_geno_file1[indv_count].first, genotype1.first);
							file1_hap2 = make_pair<string,string>(prev_geno_file1[indv_count].second, genotype1.second);
							file2_hap1 = make_pair<string,string>(prev_geno_file2[indv_count].first, genotype2.first);

							if ((file2_hap1 != file1_hap1) && (file2_hap1 != file1_hap2))
							{	// Must be a switch error
								string indv_id;
								N_switch_errors[indv_count]++;
								if (indv1 != -1)
									indv_id = indv[indv1];
								else
									indv_id = diff_vcf_file.indv[indv2];
								switcherror << CHROM << "\t" << POS << "\t" << indv_id << endl;
							}
							prev_geno_file1[indv_count] = genotype1;
							prev_geno_file2[indv_count] = genotype2;
						}
					}
				}
			}
		}
	}
	switcherror.close();

	output_file = output_file_prefix + ".diff.indv.switch";
	ofstream idiscord(output_file.c_str());
	if (!idiscord.is_open())
		error("Could not open Individual Discordance File: " + output_file, 3);

	idiscord << "INDV\tN_COMMON_PHASED_HET\tN_SWITCH\tSWITCH" << endl;
	unsigned int indv_count=0;
	double switch_error;
	string indv_id;
	for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
	{
		indv1 = combined_individuals_it->second.first;
		indv2 = combined_individuals_it->second.second;

		if (indv1 != -1)
			indv_id = indv[indv1];
		else
			indv_id = diff_vcf_file.indv[indv2];

		if (N_phased_het_sites[indv_count] > 0)
			switch_error = double(N_switch_errors[indv_count]) / N_phased_het_sites[indv_count];
		else
			switch_error = 0;
		idiscord << indv_id << "\t" << N_phased_het_sites[indv_count] << "\t" << N_switch_errors[indv_count] << "\t" << switch_error << endl;

		indv_count++;
	}
	idiscord.close();
}
Exemplo n.º 28
0
void vcf_file::output_discordance_by_indv(const string &output_file_prefix, vcf_file &diff_vcf_file)
{
	printLOG("Outputting Discordance By Individual...\n");
	map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair;
	map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it;
	return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair);

	map<string, pair< int, int> > combined_individuals;
	map<string, pair< int, int> >::iterator combined_individuals_it;
	return_indv_union(diff_vcf_file, combined_individuals);

	map<string, pair<int, int> > indv_sums;

	string vcf_line, CHROM;
	int POS;
	int s1, s2, indv1, indv2;

	for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it)
	{
		CHROM = CHROMPOS_to_filepos_pair_it->first.first;
		POS = CHROMPOS_to_filepos_pair_it->first.second;

		s1 = CHROMPOS_to_filepos_pair_it->second.first;
		s2 = CHROMPOS_to_filepos_pair_it->second.second;

		vcf_entry e1(N_indv);
		vcf_entry e2(diff_vcf_file.N_indv);

		// Read entries from file (if available)
		if (s1 != -1)
		{
			get_vcf_entry(s1, vcf_line);
			e1.reset(vcf_line);
		}

		if (s2 != -1)
		{
			diff_vcf_file.get_vcf_entry(s2, vcf_line);
			e2.reset(vcf_line);
		}

		e1.parse_basic_entry(true);
		e2.parse_basic_entry(true);

		// Set the reference to the non-missing entry (if available)
		string REF = e1.get_REF();
		string REF2 = e2.get_REF();
		if (REF == "N")
			REF = REF2;
		if (REF2 == "N")
			REF2 = REF;

		if (REF.size() != REF2.size())
		{
			warning("REF sequences at " + CHROM + ":" + int2str(POS) + " are not comparable. Skipping site");
			continue;
		}

		if ((REF != REF2) && (REF2 != "N") && (REF != "N"))
			warning("Non-matching REF " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2);

		// Do the alternative alleles match?
		string ALT, ALT2;
		ALT = e1.get_ALT();
		ALT2 = e2.get_ALT();

		bool alleles_match = (ALT == ALT2) && (REF == REF2);
		e1.parse_full_entry(true);
		e1.parse_genotype_entries(true);

		e2.parse_full_entry(true);
		e2.parse_genotype_entries(true);

		pair<string, string> genotype1, genotype2;
		pair<int,int> geno_ids1, geno_ids2;
		pair<string, string> missing_genotype(".",".");
		pair<int, int> missing_id(-1,-1);

		for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
		{
			indv1 = combined_individuals_it->second.first;
			indv2 = combined_individuals_it->second.second;

			if ((indv1 == -1) || (indv2 == -1))
				continue;	// Individual not found in one of the files

			if (alleles_match)
			{	// Alleles match, so can compare ids instead of strings
				e1.get_indv_GENOTYPE_ids(indv1, geno_ids1);
				e2.get_indv_GENOTYPE_ids(indv2, geno_ids2);

				if ((geno_ids1 != missing_id) && (geno_ids2 != missing_id))
				{
					indv_sums[combined_individuals_it->first].first++;
					if (((geno_ids1.first == geno_ids2.first) && (geno_ids1.second == geno_ids2.second)) ||
						((geno_ids1.first == geno_ids2.second) && (geno_ids1.second == geno_ids2.first)) )
					{	// Match
						// Don't do anything
					}
					else
					{	// Mismatch
						indv_sums[combined_individuals_it->first].second++;
					}
				}
				else if ((geno_ids1 == missing_id) && (geno_ids2 == missing_id))
				{	// Both missing
					// Don't do anything.
				}
				else if (geno_ids1 != missing_id)
				{	// Genotype 1 is not missing, genotype 2 is.
					// Don't do anything.
				}
				else if (geno_ids2 != missing_id)
				{	// Genotype 2 is not missing, genotype 1 is.
					// Don't do anything.
				}
				else
					error("Unknown condition");
			}
			else
			{	// Alleles don't match, so need to be more careful and compare strings
				e1.get_indv_GENOTYPE_strings(indv1, genotype1);
				e2.get_indv_GENOTYPE_strings(indv2, genotype2);

				if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype))
				{	// No missing data
					indv_sums[combined_individuals_it->first].first++;
					if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) ||
						((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) )
					{	// Match
						// Don't do anything
					}
					else
					{	// Mismatch
						indv_sums[combined_individuals_it->first].second++;
					}
				}
				else if ((genotype1 == missing_genotype) && (genotype2 == missing_genotype))
				{	// Both missing
					// Don't do anything
				}
				else if (genotype1 != missing_genotype)
				{	// Genotype 1 is not missing, genotype 2 is.
					// Don't do anything
				}
				else if (genotype2 != missing_genotype)
				{	// Genotype 2 is not missing, genotype 1 is.
					// Don't do anything
				}
				else
					error("Unknown condition");
			}
		}
	}

	string output_file = output_file_prefix + ".diff.indv";
	ofstream out(output_file.c_str());
	if (!out.is_open())
		error("Could not open Sites Differences File: " + output_file, 3);
	out << "INDV\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE" << endl;

	int N, N_discord;
	double discordance;
	for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it)
	{
		out << combined_individuals_it->first;
		N = indv_sums[combined_individuals_it->first].first;
		N_discord = indv_sums[combined_individuals_it->first].second;
		discordance = N_discord / double(N);
		out << "\t" << N << "\t" << N_discord << "\t" << discordance << endl;
	}

	out.close();
}
Exemplo n.º 29
0
void Plink::calcHomog()
{
  
  if (!par::SNP_major) Ind2SNP();

  string f = par::output_file_name + ".homog";
  ofstream MHOUT;
  MHOUT.open(f.c_str(),ios::out);

  MHOUT.precision(4);

  if (nk==0) error("No clusters (K=0)... cannot perform CMH tests");
 
  printLOG("Homogeneity of odds ratio test, K = " + int2str(nk) + "\n");

  if (nk<2) 
    {
      printLOG("** Warning ** less then 2 clusters specified... \n");
      printLOG("              cannot compute between-cluster effects ** \n");
      return;
    }

  if (nk>10) 
    printLOG("** Warning ** statistics can be unreliable if strata have small N ** \n");
  

  printLOG("Writing results to [ " + f + " ]\n");
  
  

  MHOUT << setw(4) << "CHR" << " " 
	<< setw(par::pp_maxsnp) << "SNP" << " " 
	<< setw(4) << "A1" << " "
	<< setw(4) << "A2" << " "
	<< setw(8) << "F_A" << " "
	<< setw(8) << "F_U" << " "
	<< setw(8) << "N_A" << " "
	<< setw(8) << "N_U" << " "
    	<< setw(8) << "TEST" << " "
	<< setw(10) << "CHISQ" << " "
	<< setw(4)  << "DF" << " "
	<< setw(10) << "P" << " "
	<< setw(10) << "OR" << "\n";
  
  
  ///////////////////////////////////
  // Create boolean affection coding

  affCoding(*this);


  //////////////////////////////////
  // Any individual not assigned to a cluster, 
  // making missing phenotype
  
  vector<Individual*>::iterator person = sample.begin();
  while ( person != sample.end() )
    {
      if ( (*person)->sol < 0 ) 
	(*person)->missing = true;
      person++;
    }

  	
  ///////////////////////////////
  // Iterate over SNPs
  
  vector<CSNP*>::iterator s = SNP.begin();
  int l=0;
  
  while ( s != SNP.end() )
    {	
      
      // Uncomment this if we allow permutation for the CMH
      // tests

      // In adaptive mode, possibly skip this test
      //      if (par::adaptive_perm && (!perm.snp_test[l])) 
      //	{
      //	  s++;
      //	  l++;
      //	  continue;
      //	}
      
      // Calculate statistic
      vector<double> n_11(nk,0);
      vector<double> n_12(nk,0);
      vector<double> n_21(nk,0);
      vector<double> n_22(nk,0);

      vector<double> lnOR(nk,0);
      vector<double> SEsq(nk,0);

      ///////////////// 	      
      // Autosomal or haploid?
      
      bool X=false, haploid=false;
      if (par::chr_sex[locus[l]->chr]) X=true;
      else if (par::chr_haploid[locus[l]->chr]) haploid=true;

      /////////////////////////////
      // Iterate over individuals
      
      vector<bool>::iterator i1 = (*s)->one.begin();
      vector<bool>::iterator i2 = (*s)->two.begin();
      vector<Individual*>::iterator gperson = sample.begin();
      

      while ( gperson != sample.end() )
	{
	  
	  // Phenotype for this person (i.e. might be permuted)
	  Individual * pperson = (*gperson)->pperson;
	  
	  // SNP alleles
	  
	  bool s1 = *i1;
	  bool s2 = *i2;

	  int hom = 2;
	  if ( haploid || ( X && (*gperson)->sex ) )
	    hom = 1;
	  
	  // Affected individuals
	  if ( pperson->aff && !pperson->missing )
	    {
	      
	      // Allelic marginal
	      if ( !s1 )
		{
		  if ( !s2 ) // FF hom
		    {
		      n_11[ pperson->sol ] += hom ;
		      
		    }
		  else
		    {
		      n_11[ pperson->sol ]++ ; // FT het
		      n_12[ pperson->sol ]++ ;
		    }
		}
	      else 
		{
		  if ( !s2 ) // FT
		    {
		      gperson++;
		      i1++;
		      i2++;
		      continue;  // skip missing genotypes
		    }
		  else // TT
		    {
		      n_12[ pperson->sol ] += hom ;
		    }
		}
	    }
	  else if ( ! pperson->missing ) // Unaffecteds
	    {
	      // Allelic marginal
	      if ( ! s1 )
		{
		  if ( ! s2 ) // FF
		    {
		      n_21[ pperson->sol ] += hom ;
		    }
	      else
		{
		  n_21[ pperson->sol ] ++ ;
		  n_22[ pperson->sol ] ++ ;
		}
		}
	      else 
		{
		  if ( ! s2 ) // FT
		    {
		      gperson++;
		      i1++;
		      i2++;
		      continue;  // skip missing genotypes
		    }
		  else // TT
		    {
		      n_22[ pperson->sol ] += hom ;
		    }
		}     
	    }
	 
	  // Next individual
	  gperson++;
	  i1++;
	  i2++;
 
	} 
      
      
      // Calculate log(OR) and SE(ln(OR)) for eacsh strata

      double X_total = 0;
      double X_assoc1 = 0;
      double X_assoc2 = 0;
      vector<double> X_indiv(nk,0);
     
      for (int k=0; k<nk; k++)
	{
	  
	  // Add 0.5 to each cell to reduce bias

  	  n_11[k] += 0.5;
  	  n_12[k] += 0.5;
  	  n_21[k] += 0.5;
  	  n_22[k] += 0.5;

	  // ln(OR)
	  
	  lnOR[k] = log ( ( n_11[k] * n_22[k] ) / ( n_12[k] * n_21[k] ) );
	  SEsq[k] =  1/n_11[k] + 1/n_12[k] + 1/n_21[k] + 1/n_22[k] ;
	  
	  X_indiv[k] = (lnOR[k] * lnOR[k]) / SEsq[k];
	  X_total += X_indiv[k];

	  // For the common, strata-adjusted test
	  X_assoc1 += lnOR[k] / SEsq[k];
	  X_assoc2 += 1/ SEsq[k];
	}
      
      // X_total is total chi-square on nk df
      // X_indiv are individual chi-squares, each on 1 df
      // X_homog is test for homogeneity of OR, with nk-1 df 
      // X_assoc is strata-adjusted test, with 1 df

      double X_assoc = (X_assoc1*X_assoc1)/X_assoc2;
      double X_homog = X_total - X_assoc;

      MHOUT << setw(4) << locus[l]->chr << " " 
	    << setw(par::pp_maxsnp) << locus[l]->name << " "
	    << setw(4) << locus[l]->allele1 << " "
	    << setw(4) << locus[l]->allele2 << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "	
	    << setw(6) << "TOTAL" << " "
	    << setw(10) << X_total << " "
	    << setw(4) << nk << " "
	    << setw(10) << chiprobP(X_total,nk) << " "
	    << setw(10) << "NA" << "\n";

      MHOUT << setw(4) << locus[l]->chr << " " 
	    << setw(par::pp_maxsnp) << locus[l]->name << " "
	    << setw(4) << locus[l]->allele1 << " "
	    << setw(4) << locus[l]->allele2 << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "
	    << setw(6) << "ASSOC" << " "
	    << setw(10) << X_assoc << " "
	    << setw(4) << 1 << " "
	    << setw(10) << chiprobP(X_assoc,1) << " "
	    << setw(10) << "NA" << "\n";

      MHOUT << setw(4) << locus[l]->chr << " " 
	    << setw(par::pp_maxsnp) << locus[l]->name << " "
	    << setw(4) << locus[l]->allele1 << " "
	    << setw(4) << locus[l]->allele2 << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "
	    << setw(8) << "NA" << " "
	    << setw(6) << "HOMOG" << " "
	    << setw(10) << X_homog << " "
	    << setw(4) << nk-1 << " "
	    << setw(10) << chiprobP(X_homog,nk-1) << " "
	    << setw(10) << "NA" << "\n";

      for (int k=0; k<nk; k++)
	{

	  if ( n_11[k] + n_12[k] <= 1.0001 || 
	       n_21[k] + n_22[k] <= 1.0001 ) 
	    {

	      MHOUT << setw(4) << locus[l]->chr << " " 
		    << setw(par::pp_maxsnp) << locus[l]->name << " "
		    << setw(4) << locus[l]->allele1 << " "
		    << setw(4) << locus[l]->allele2 << " "
		    << setw(8) << "NA" << " "
		    << setw(8) << "NA" << " "
		    << setw(8) << n_11[k] + n_12[k] - 1 << " "
		    << setw(8) << n_21[k] + n_22[k] - 1 << " "
		    << setw(6) << kname[k] << " "
		    << setw(10) << "NA" << " "
		    << setw(4) << "NA" << " "
		    << setw(10) << "NA" << " "
		    << setw(10) << "NA" << "\n";
	    }
	  else
	    {
	      
	      MHOUT << setw(4) << locus[l]->chr << " " 
		    << setw(par::pp_maxsnp) << locus[l]->name << " "
		    << setw(4) << locus[l]->allele1 << " "
		    << setw(4) << locus[l]->allele2 << " "
		    << setw(8) << n_11[k]/double(n_11[k]+n_12[k]) << " "
		    << setw(8) << n_21[k]/double(n_21[k]+n_22[k]) << " "
		    << setw(8) << n_11[k] + n_12[k] - 1 << " "
		    << setw(8) << n_21[k] + n_22[k] - 1 << " "
		    << setw(6) << kname[k] << " "
		    << setw(10) << X_indiv[k] << " "
		    << setw(4) << 1 << " "
		    << setw(10) << chiprobP(X_indiv[k],1) <<  " ";
	      double odr = ( n_11[k] * n_22[k] ) / ( n_12[k] * n_21[k] );
	      if ( realnum(odr) )
		MHOUT << setw(10) 
		      << odr << "\n";
	      else
		MHOUT << setw(10) 
		      << "NA" << "\n";
	      
	    }
	}

      
      // Next locus
      s++;
      l++;

    } 

  MHOUT.close();


}