/*--------------------------------------------------------------- Name : SetJoystick Argument : void Return : 0 (succeed), other (failed) About : Set up the Joystick controler Version : Ver 1.0 Date : 2014/03/21 Author : Ryodo Tanaka (Kyushu Institute of Technology) ----------------------------------------------------------------- */ int SetJoystick(void) { //File open if( (JSfd=open(PORT, O_RDONLY)) == -1){ printLOG("File Open JoyStick"); return 1; } //Get JoyStick information ioctl(JSfd, JSIOCGAXES, &num_of_axis); ioctl(JSfd, JSIOCGBUTTONS, &num_of_buttons); ioctl(JSfd, JSIOCGNAME(80), &JSname); //Get data space for axis & buttons axis = (int*)calloc(num_of_axis, sizeof(int)); if(!axis){ printLOG("calloc JoyStick axis"); return 2; } button = (char*)calloc(num_of_buttons, sizeof(char)); if(!button){ printLOG("calloc JoyStick axis"); return 3; } //Use non-blocking mode fcntl(JSfd, F_SETFL, O_NONBLOCK); printf("%s\tis Connected ...\n", JSname); return 0; }
void vcf_file::open() { if (!compressed) { if (filename.substr(filename.size()-3) == ".gz") { warning("Filename ends in '.gz'. Shouldn't you be using --gzvcf?\n"); } vcf_in.open(filename.c_str(), ios::in); if (!vcf_in.is_open()) error("Could not open VCF file: " + filename, 0); } else { gzMAX_LINE_LEN = 1024*1024; gz_readbuffer = new char[gzMAX_LINE_LEN]; gzvcf_in = gzopen(filename.c_str(), "rb"); if (gzvcf_in == NULL) error("Could not open GZVCF file: " + filename, 0); #ifdef ZLIB_VERNUM string tmp(ZLIB_VERSION); printLOG("Using zlib version: " + tmp + "\n"); #if (ZLIB_VERNUM >= 0x1240) gzbuffer(gzvcf_in, gzMAX_LINE_LEN); // Included in zlib v1.2.4 and makes things MUCH faster #else printLOG("Versions of zlib >= 1.2.4 will be *much* faster when reading zipped VCF files.\n"); #endif #endif } }
void error(string err_msg, double value1, double value2, int error_code) { printLOG("Error:" + err_msg + "\n"); stringstream ss; ss << "Value1=" << value1 << " Value2=" << value2 << endl; printLOG(ss.str()); exit(error_code); }
void counted_warning(string err_msg) { static unsigned int warning_count = 0; printLOG(err_msg + "\n"); warning_count++; if (warning_count > 1000) error("Stopping at 1000 entry-level warnings", 10); }
void vcf_file::output_indv_in_files(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Comparing individuals in VCF files...\n"); string output_file = output_file_prefix + ".diff.indv_in_files"; ofstream out(output_file.c_str()); if (!out.is_open()) error("Could not open Indv Differences File: " + output_file, 3); out << "INDV\tFILES" << endl; // Build a list of individuals contained in each file map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); unsigned int N_combined_indv = combined_individuals.size(); unsigned int N[3]={0,0,0}; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { if ((combined_individuals_it->second.first != -1) && (combined_individuals_it->second.second != -1)) { N[0]++; out << combined_individuals_it->first << "\tB" << endl; } else if (combined_individuals_it->second.first != -1) { N[1]++; out << combined_individuals_it->first << "\t1" << endl; } else if (combined_individuals_it->second.second != -1) { N[2]++; out << combined_individuals_it->first << "\t2" << endl; } else error("Unhandled case"); } out.close(); printLOG("N_combined_individuals:\t" + int2str(N_combined_indv) + "\n"); printLOG("N_individuals_common_to_both_files:\t" + int2str(N[0]) + "\n"); printLOG("N_individuals_unique_to_file1:\t" + int2str(N[1]) + "\n"); printLOG("N_individuals_unique_to_file2:\t" + int2str(N[2]) + "\n"); }
void one_off_warning(string err_msg) { static set<string> previous_warnings; if (previous_warnings.find(err_msg) == previous_warnings.end()) { printLOG(err_msg + "\n"); previous_warnings.insert(err_msg); } }
/*----------------------------------------------------------- Name : SetLRFShow Argument : int id (LRF ID) Return : 0 (success) other(failed) About : Setup for LRFShow Version : Ver 1.0 Date : 2014/05/25 Author : Ryodo Tanaka (Kyushu Institute of Technology) ------------------------------------------------------------*/ int SetLRFShow(const int id) { int i; if(id == LRF_ALL_ID){ for(i=0; i<NUM_OF_LRF; i++){ img[i] = cvCreateImage(cvSize(LRF_WINDOW_SIZE,LRF_WINDOW_SIZE), IPL_DEPTH_8U, 3); if(!img[i]){ printLOG("cvCreateImage() LRFShow"); exit(1); } } } else { img[id] = cvCreateImage(cvSize(LRF_WINDOW_SIZE,LRF_WINDOW_SIZE), IPL_DEPTH_8U, 3); if(!img[id]){ printLOG("cvCreateImage() LRFShow"); exit(1); } } return 0; }
void vcf_file::print(const string &output_file_prefix, const set<string> &INFO_to_keep, bool keep_all_INFO) { printLOG("Outputting VCF file... "); unsigned int ui; string output_file = output_file_prefix + ".recode.vcf"; ofstream out(output_file.c_str()); if (!out.is_open()) error("Could not open VCF Output File: " + output_file, 3); for (ui=0; ui<meta.size(); ui++) out << meta[ui] << endl; out << "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"; if (N_indv > 0) out << "\tFORMAT"; for (ui=0; ui<N_indv; ui++) if (include_indv[ui]) out << "\t" << indv[ui]; out << endl; string vcf_line; for (unsigned int s=0; s<N_entries; s++) if (include_entry[s] == true) { get_vcf_entry(s, vcf_line); vcf_entry e(N_indv, vcf_line); e.parse_basic_entry(true, true, true); e.parse_full_entry(true); e.parse_genotype_entries(true,true,true,true); e.print(out, INFO_to_keep, keep_all_INFO, include_indv, include_genotype[s]); } out.close(); printLOG("Done\n"); }
int WAV::freeWAVData() { try { if (_monoral8 != (Monoral8*) NULL) { std::cout << "free monoral8" << std::endl; free(_monoral8); } if (_monoral16 != (Monoral16*) NULL) { free(_monoral16); } if (_stereo8 != (Stereo8*) NULL) { free(_stereo8); } if (_stereo16 != (Stereo16*) NULL) { free(_stereo16); } } catch (const char* e) { printLOG("free error"); printf("%s\n", e); exit(-1); } //_dataSize=0; //_sampleCount=0; _monoral8 = (Monoral8*) NULL; _monoral16 = (Monoral16*) NULL; _stereo8 = (Stereo8*) NULL; _stereo16 = (Stereo16*) NULL; return 0; }
map<Range,vector<int> > Plink::mkBlks(int null1, int null2 ) { // First SNP, vector of SNPs (inc. first) map< int, vector<int> > blocks; // Some constants const double cutHighCI = 0.98; const double cutLowCI = 0.70; const double cutLowCIVar [5] = {0,0,0.80,0.50,0.50}; const double maxDist [5] = {0,0,20000,30000,1000000}; const double recHighCI = 0.90; const double informFrac = 0.95; const double fourGameteCutoff = 0.01; const double mafThresh = 0.05; // Set to skip SNPs with low MAFs // Uses genome-wide reference number: need to allocate for all SNPs here vector<bool> skipMarker(nl_all,false); for (int x = 0; x < nl_all; x++) skipMarker[x] = locus[x]->freq < mafThresh; // Consider each chromosome one at a time; skip X for now int startChromosome = locus[ 0 ]->chr; int finalChromosome = locus[ nl_all - 1 ]->chr; for (int chr = startChromosome ; chr <= finalChromosome; chr++) { if ( scaffold.find(chr) == scaffold.end() ) continue; int fromPosition = scaffold[chr].lstart; int toPosition = scaffold[chr].lstop; int nsnps = toPosition - fromPosition + 1; ///////////////////////////////////////////////////////////////////////// // Make a list of marker pairs in "strong LD", sorted by distance apart set<LDPair,Pair_cmp> strongPairs; map<int2,DPrime> dpStore; int numStrong = 0; int numRec = 0; int numInGroup = 0; // Each pair of markers for (int x = fromPosition; x < toPosition; x++) { if ( ! par::silent ) { std::cerr << "Chromosome " << locus[x]->chr << ", position " << locus[x]->bp/1000000.0 << "Mb \r"; } for (int y = x+1; y <= toPosition; y++) { if ( locus[x]->chr != locus[y]->chr ) continue; if ( ( locus[y]->bp - locus[x]->bp ) > par::disp_r_window_kb ) { continue; } if ( locus[x]->freq == 0 || locus[y]->freq == 0 ) continue; PairwiseLinkage thisPair(x,y); thisPair.calculateLD(); thisPair.calculateCI(); double lod = thisPair.lod; double lowCI = thisPair.dp_lower; double highCI = thisPair.dp_upper; int2 t(x,y); DPrime d; d.dp = thisPair.dp; d.dpl = lowCI; d.dpu = highCI; d.lod = lod; dpStore.insert( make_pair( t,d ) ); // Is this pair in strong LD? if (lod < -90) continue; //missing data if (highCI < cutHighCI || lowCI < cutLowCI) continue; //must pass "strong LD" test // Store this pair LDPair p(x,y, abs( locus[x]->bp - locus[y]->bp ) ); strongPairs.insert( p ); } } // Now we have a list of SNPs in strong LD within this region // Now construct blocks based on this set<int> used; // #blocks: vector<vector<int> > blockArray; int cnt = 0; for ( set<LDPair>::reverse_iterator i = strongPairs.rbegin(); i != strongPairs.rend(); ++i ) { int numStrong = 0; int numRec = 0; int numInGroup = 0; vector<int> thisBlock; int first = i->s1; int last = i->s2; long sep = i->dist; // See if this block overlaps with another: if ( used.find(first) != used.end() || used.find(last) != used.end() ) { continue; } // Next, count the number of markers in the block. // (nb. assume all SNPs belong) for (int x = first; x <=last ; x++) { if( !skipMarker[x] ) numInGroup++; } // Skip it if it is too long in bases for it's size in markers if (numInGroup < 4 && sep > maxDist[numInGroup]) { continue; } // Add first SNP thisBlock.push_back( first ); // Test block: requires 95% of informative markers to be "strong" for (int y = first+1; y <= last; y++) { if (skipMarker[y]) { continue; } thisBlock.push_back(y); //loop over columns in row y for (int x = first; x < y; x++) { if (skipMarker[x]) continue; double lod; double lowCI; double highCI; map<int2,DPrime>::iterator l = dpStore.find( int2(x,y) ); if ( l == dpStore.end() ) { // Recalculate PairwiseLinkage thisPair(x,y); thisPair.calculateLD(); thisPair.calculateCI(); lod = thisPair.lod; lowCI = thisPair.dp_lower; highCI = thisPair.dp_upper; } else { // Get the right bits lod = l->second.lod; lowCI = l->second.dpl; highCI = l->second.dpu; } // Monomorphic marker error if ( lod < -90) continue; // Skip bad markers if ( lod == 0 && lowCI == 0 && highCI == 0) continue; // For small blocks use different CI cutoffs if (numInGroup < 5) { if (lowCI > cutLowCIVar[numInGroup] && highCI >= cutHighCI) numStrong++; } else { if (lowCI > cutLowCI && highCI >= cutHighCI) numStrong++; //strong LD } if (highCI < recHighCI) numRec++; //recombination } } // Change the definition somewhat for small blocks if (numInGroup > 3) { if (numStrong + numRec < 6) { continue; } } else if (numInGroup > 2) { if (numStrong + numRec < 3) { continue; } } else { if (numStrong + numRec < 1) { continue; } } // If this qualifies as a block, add to the block list, but in // order by first marker number: if ( (double)numStrong/(double)(numStrong + numRec) > informFrac) { blocks.insert( make_pair( first , thisBlock )); // Track that these SNPs belong to a block for (int u = first; u <= last; u++) used.insert(u); } } // Next chromosome } if ( ! par::silent ) cerr << "\n"; map<int,vector<int> >::iterator j = blocks.begin(); printLOG(int2str( blocks.size() ) + " blocks called, writing list to [ " + par::output_file_name + ".blocks ]\n"); ofstream O1( (par::output_file_name+".blocks").c_str() , ios::out ); printLOG("Writing extra block details to [ " + par::output_file_name + ".blocks.det ]\n"); ofstream O2( (par::output_file_name+".blocks.det").c_str() , ios::out ); O2 << setw(4) << "CHR" << " " << setw(12) << "BP1" << " " << setw(12) << "BP2" << " " << setw(12) << "KB" << " " << setw(6) << "NSNPS" << " " << setw(4) << "SNPS" << "\n"; while ( j != blocks.end() ) { O1 << "*"; vector<int> & b = j->second; for (int k=0; k<b.size(); k++) O1 << " " << PP->locus[b[k]]->name; O1 << "\n"; O2 << setw(4) << PP->locus[b[0]]->chr << " " << setw(12) << PP->locus[b[0]]->bp << " " << setw(12) << PP->locus[b[b.size()-1]]->bp << " " << setw(12) << (double)(PP->locus[b[b.size()-1]]->bp - PP->locus[b[0]]->bp + 1)/1000.0 << " " << setw(6) << b.size() << " "; for (int k=0; k<b.size(); k++) { if ( k>0 ) O2 << "|" << PP->locus[b[k]]->name; else O2 << PP->locus[b[k]]->name; } O2 << "\n"; ++j; } O1.close(); O2.close(); // List of blocks created here // (dummy; not used) map<Range,vector<int> > blocks0; return blocks0; }
void vcf_file::output_sites_in_files(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Comparing sites in VCF files...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); string vcf_line; string CHROM; int POS; string output_file = output_file_prefix + ".diff.sites_in_files"; ofstream sites_in_files(output_file.c_str()); sites_in_files << "CHROM\tPOS\tIN_FILE\tREF\tALT1\tALT2" << endl; int s1, s2; int N_common_SNPs = 0, N_SNPs_file1_only=0, N_SNPs_file2_only=0; for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it!=CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; CHROM = CHROMPOS_to_filepos_pair_it->first.first; POS = CHROMPOS_to_filepos_pair_it->first.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } e1.parse_basic_entry(true); e2.parse_basic_entry(true); // Set the reference to the non-missing entry (if available) string REF = e1.get_REF(); string REF2 = e2.get_REF(); if ((REF == "N") || (REF == ".")) REF = REF2; if ((REF2 == "N") || (REF2 == ".")) REF2 = REF; if ((REF != REF2) && (REF2 != "N") && (REF != "N") && (REF != ".") && (REF2 != ".")) warning("Non-matching REF at " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2 + ". Diff results may be unreliable."); sites_in_files << CHROM << "\t" << POS << "\t"; if ((s1 != -1) && (s2 != -1)) { N_common_SNPs++; sites_in_files << "B"; } else if ((s1 != -1) && (s2 == -1)) { N_SNPs_file1_only++; sites_in_files << "1"; } else if ((s1 == -1) && (s2 != -1)) { N_SNPs_file2_only++; sites_in_files << "2"; } else error("SNP in neither file!?"); sites_in_files << "\t" << REF << "\t" << e1.get_ALT() << "\t" << e2.get_ALT() << endl; } sites_in_files.close(); printLOG("Found " + int2str(N_common_SNPs) + " SNPs common to both files.\n"); printLOG("Found " + int2str(N_SNPs_file1_only) + " SNPs only in main file.\n"); printLOG("Found " + int2str(N_SNPs_file2_only) + " SNPs only in second file.\n"); }
// Read VCF file void vcf_file::scan_file(const string &chr, const string &exclude_chr, bool force_write_index) { bool filter_by_chr = (chr != ""); bool exclude_by_chr = (exclude_chr != ""); string index_filename = filename + ".vcfidx"; bool could_read_index_file = false; if (force_write_index == false) could_read_index_file = read_index_file(index_filename); string CHROM, last_CHROM=""; int POS, last_POS = -1; if (could_read_index_file == false) { printLOG("Building new index file.\n"); string line, CHROM, last_CHROM = ""; streampos filepos; char c; N_entries=0; N_indv = 0; while (!feof()) { filepos = get_filepos(); c = peek(); if ((c == '\n') || (c == '\r')) { read_line(line); continue; } else if (c == EOF) break; if (c == '#') { read_line(line); if (line[1] == '#') { // Meta information parse_meta(line); } else { // Must be header information: #CHROM POS ID REF ALT QUAL FILTER INFO (FORMAT NA00001 NA00002 ... ) parse_header(line); } } else { // Must be a data line read_CHROM_and_POS_and_skip_remainder_of_line(CHROM, POS); if (last_CHROM != CHROM) { printLOG("\tScanning Chromosome: " + CHROM + "\n"); last_CHROM = CHROM; } if (POS == last_POS) { one_off_warning("\tWarning - file contains entries with the same position. This is not supported by vcftools, and may cause unexpected behaviour.\n"); } last_POS = POS; entry_file_locations.push_back(filepos); N_entries++; } } write_index_file(index_filename); } printLOG("File contains " + int2str(N_entries) + " entries and " + int2str(N_indv) + " individuals.\n"); vector<string> meta_lines = meta; meta.resize(0); for (unsigned int ui=0; ui<meta_lines.size(); ui++) parse_meta(meta_lines[ui]); has_genotypes = (N_indv > 0); bool already_found_required_chr = false; bool already_filtered_required_chr = false; if ((exclude_by_chr == true) || (filter_by_chr == true)) { printLOG("Filtering by chromosome.\n"); for (unsigned int ui=0; ui<N_entries; ui++) { if (already_found_required_chr == true) { printLOG("Skipping Remainder.\n"); entry_file_locations.erase(entry_file_locations.begin()+ui, entry_file_locations.end()); break; } if (already_filtered_required_chr == true) { printLOG("Skipping Remainder.\n"); break; } set_filepos(entry_file_locations[ui]); read_CHROM_only(CHROM); if (last_CHROM != CHROM) { printLOG("\tChromosome: " + CHROM + "\n"); if ((filter_by_chr == true) && (last_CHROM == chr)) already_found_required_chr = true; if ((exclude_by_chr == true) && (last_CHROM == exclude_chr)) already_filtered_required_chr = true; last_CHROM = CHROM; } if ((exclude_by_chr == true) && (CHROM == exclude_chr)) { entry_file_locations[ui] = -1; continue; } if ((filter_by_chr == true) && (CHROM != chr)) { entry_file_locations[ui] = -1; continue; } } sort(entry_file_locations.begin(), entry_file_locations.end()); while((entry_file_locations.size() > 0) && (entry_file_locations[0] < 0)) entry_file_locations.pop_front(); N_entries = entry_file_locations.size(); printLOG("Keeping " + int2str(N_entries) + " entries on specified chromosomes.\n"); } include_indv.clear(); include_indv.resize(N_indv, true); include_entry.clear(); include_entry.resize(N_entries, true); include_genotype.clear(); include_genotype.resize(N_entries, vector<bool>(N_indv, true)); }
void warning(string err_msg) { printLOG(err_msg + "\n"); }
void error(string err_msg, int error_code) { printLOG("Error:" + err_msg + "\n"); exit(error_code); }
vector_t Plink::glmAssoc(bool print_results, Perm & perm) { // The model.cpp functions require a SNP-major structure, if SNP // data are being used. There are some exceptions to this however, // listed below if ( par::SNP_major && ! ( par::epi_genebased || par::set_score || par::set_step || par::proxy_glm || par::dosage_assoc || par::cnv_enrichment_test || par::cnv_glm || par::score_test || par::rare_test || par::gvar ) ) SNP2Ind(); // Test all SNPs 1 at a time automatically, or is this // a tailored single test? int ntests = par::assoc_glm_without_main_snp ? 1 : nl_all; vector<double> results(ntests); if ( print_results && par::qt && par::multtest ) tcnt.resize(ntests); ofstream ASC; if (print_results) { string f = par::output_file_name; if ( par::bt) { f += ".assoc.logistic"; printLOG("Writing logistic model association results to [ " + f + " ] \n"); } else { f += ".assoc.linear"; printLOG("Writing linear model association results to [ " + f + " ] \n"); } ASC.open(f.c_str(),ios::out); ASC << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " " << setw(10) << "BP" << " " << setw(4) << "A1" << " " << setw(10) << "TEST" << " " << setw(8) << "NMISS" << " "; if ( par::bt && ! par::return_beta ) ASC << setw(10) << "OR" << " "; else ASC << setw(10) << "BETA" << " "; if (par::display_ci) ASC << setw(8) << "SE" << " " << setw(8) << string("L"+dbl2str(par::ci_level*100)) << " " << setw(8) << string("U"+dbl2str(par::ci_level*100)) << " "; ASC << setw(12) << "STAT" << " " << setw(12) << "P" << " " << "\n"; ASC.precision(4); } ///////////////////////////// // Determine sex distribution int nmales = 0, nfemales = 0; for (int i=0; i<n; i++) if ( ! sample[i]->missing ) { if ( sample[i]->sex ) nmales++; else nfemales++; } bool variationInSex = nmales > 0 && nfemales > 0; ////////////////////////////////////////// // Iterate over each locus, or just once for (int l=0; l<ntests; l++) { // Skip possibly (in all-locus mode) if ( par::adaptive_perm && ( ! par::assoc_glm_without_main_snp ) && ( ! perm.snp_test[l]) ) continue; ////////////////////////////////////////////////////////// // X-chromosome, haploid? // xchr_model 0: skip non-autosomal SNPs bool X=false; bool automaticSex=false; if ( ! par::assoc_glm_without_main_snp ) { if ( par::xchr_model == 0 ) { if ( par::chr_sex[locus[l]->chr] || par::chr_haploid[locus[l]->chr] ) continue; } else if (par::chr_sex[locus[l]->chr]) X=true; } ////////////////////////////////////////////////////////// // A new GLM Model * lm; ////////////////////////////////////////////////////////// // Linear or logistic? if (par::bt) { LogisticModel * m = new LogisticModel(this); lm = m; } else { LinearModel * m = new LinearModel(this); lm = m; } ////////////////////////////////////////////////////////// // A temporary fix if ( par::dosage_assoc || par::cnv_enrichment_test || par::cnv_glm || par::score_test || par::set_score || par::proxy_glm || par::gvar || par::rare_test ) lm->hasSNPs(false); ////////////////////////////////////////////////////////// // Set missing data lm->setMissing(); ////////////////////////////////////////////////////////// // Set genetic model if ( par::glm_dominant ) lm->setDominant(); else if ( par::glm_recessive || par::twoDFmodel_hethom ) lm->setRecessive(); string mainEffect = ""; bool genotypic = false; ///////////////////////////////////////////////// // Main SNP if ( ! par::assoc_glm_without_main_snp ) { genotypic = par::chr_haploid[locus[l]->chr] ? false : par::twoDFmodel ; // Models // AA AB BB // Additive 0 1 2 // Dominant 0 1 1 // Recessive 0 0 1 // Genotypic(1) // Additive 0 1 2 // Dom Dev. 0 1 0 // Genotypic(2) // Homozygote 0 0 1 // Heterozygote 0 1 0 //////////////////////////////////////////////////////////// // An additive effect? (or single coded effect) of main SNP if ( par::glm_recessive ) mainEffect = "REC"; else if ( par::glm_dominant ) mainEffect = "DOM"; else if ( par::twoDFmodel_hethom ) mainEffect = "HOM"; else mainEffect = "ADD"; lm->addAdditiveSNP(l); lm->label.push_back(mainEffect); ////////////////////////////////////////////////////////// // Or a 2-df additive + dominance model? if ( genotypic ) { lm->addDominanceSNP(l); if ( par::twoDFmodel_hethom ) lm->label.push_back("HET"); else lm->label.push_back("DOMDEV"); } } ////////////////////////////////////////////////////////// // Haplotypes: WHAP test (grouped?) if ( par::chap_test ) { // Use whap->group (a list of sets) to specify these, from // the current model (either alternate or null) // Start from second category (i.e. first is reference) for (int h=1; h < whap->current->group.size(); h++) { lm->addHaplotypeDosage( whap->current->group[h] ); lm->label.push_back( "WHAP"+int2str(h+1) ); } } ////////////////////////////////////////////////////////// // Haplotypes: proxy test if ( par::proxy_glm ) { // Unlike WHAP tests, we now will only ever have two // categories; and a single tested coefficient set<int> t1 = haplo->makeSetFromMap(haplo->testSet); lm->addHaplotypeDosage( t1 ); lm->label.push_back( "PROXY" ); } if ( par::test_hap_GLM ) { // Assume model specified in haplotype sets // Either 1 versus all others, or H-1 versus // terms for omnibus set<int>::iterator i = haplo->sets.begin(); while ( i != haplo->sets.end() ) { set<int> t; t.insert(*i); lm->addHaplotypeDosage( t ); lm->label.push_back( haplo->haplotypeName( *i ) ); ++i; } } ////////////////////////////////////////////////////////// // Conditioning SNPs? // (might be X or autosomal, dealth with automatically) if (par::conditioning_snps) { if ( par::chap_test ) { for (int c=0; c<conditioner.size(); c++) { if ( whap->current->masked_conditioning_snps[c] ) { lm->addAdditiveSNP(conditioner[c]); lm->label.push_back(locus[conditioner[c]]->name); } } } else { for (int c=0; c<conditioner.size(); c++) { lm->addAdditiveSNP(conditioner[c]); lm->label.push_back(locus[conditioner[c]]->name); } } } ////////////////////////////////////////////////////////// // Sex-covariate (necessary for X chromosome models, unless // explicitly told otherwise) if ( ( par::glm_sex_effect || ( X && !par::glm_no_auto_sex_effect ) ) && variationInSex ) { automaticSex = true; lm->addSexEffect(); lm->label.push_back("SEX"); } ////////////////////////////////////////////////////////// // Covariates? if (par::clist) { for (int c=0; c<par::clist_number; c++) { lm->addCovariate(c); lm->label.push_back(clistname[c]); } } ////////////////////////////////////////////////////////// // Interactions // addInteraction() takes parameter numbers // i.e. not covariate codes // 0 intercept // 1 {A} // {D} // {conditioning SNPs} // {sex efffect} // {covariates} // Allow for interactions between conditioning SNPs, sex, covariates, etc //////////////////////////////////////// // Basic SNP x covariate interaction? // Currently -- do not allow interactions if no main effect // SNP -- i.e. we need a recoding of things here. if ( par::simple_interaction && ! par::assoc_glm_without_main_snp ) { // A, D and haplotypes by conditioning SNPs, sex, covariates int cindex = 2; if ( genotypic ) cindex = 3; for (int c=0; c<conditioner.size(); c++) { lm->addInteraction(1,cindex); lm->label.push_back(mainEffect+"xCSNP"+int2str(c+1)); if ( genotypic ) { lm->addInteraction(2,cindex); if ( par::twoDFmodel_hethom ) lm->label.push_back("HETxCSNP"+int2str(c+1)); else lm->label.push_back("DOMDEVxCSNP"+int2str(c+1)); } cindex++; } if ( automaticSex ) { lm->addInteraction(1,cindex); lm->label.push_back(mainEffect+"xSEX"); if ( genotypic ) { lm->addInteraction(2,cindex); if ( par::twoDFmodel_hethom ) lm->label.push_back("HETxSEX"); else lm->label.push_back("DOMDEVxSEX"); } cindex++; } for (int c=0; c<par::clist_number; c++) { lm->addInteraction(1,cindex); lm->label.push_back(mainEffect+"x"+clistname[c]); if ( genotypic ) { lm->addInteraction(2,cindex); if ( par::twoDFmodel_hethom ) lm->label.push_back("HETx"+clistname[c]); else lm->label.push_back("DOMDEVx"+clistname[c]); } cindex++; } } ////////////////////////////// // Fancy X chromosome models if ( X && automaticSex && par::xchr_model > 2 ) { // Interaction between allelic term and sex (i.e. // allow scale of male effect to vary) int sindex = 2; if ( genotypic ) sindex++; sindex += conditioner.size(); lm->addInteraction(2,sindex); lm->label.push_back("XxSEX"); // xchr model 3 : test ADD + XxSEX // xchr model 4 : test ADD + DOM + XxSEX } ////////////////////////////// // Build design matrix lm->buildDesignMatrix(); ////////////////////////////// // Clusters specified? if ( par::include_cluster ) { lm->setCluster(); } ////////////////////////////////////////////////// // Fit linear or logistic model (Newton-Raphson) lm->fitLM(); //////////////////////////////////////// // Check for multi-collinearity lm->validParameters(); //////////////////////////////////////// // Obtain estimates and statistic if (print_results) lm->displayResults(ASC,locus[l]); //cout << setw(25) << lm->getVar()[1] << " " << lm->isValid() << " " << realnum(lm->getVar()[1]) << endl; //for test purpose only //////////////////////////////////////////////// // Test linear hypothesis (multiple parameters) // Perform if: // automatic 2df genotypic test ( --genotypic ) // OR // sex-tests ( --xchr-model ) // OR // test of everything ( --test-all ) // OR // user has specified user-defined test ( --tests ) if ( ( genotypic && ! par::glm_user_parameters ) || par::glm_user_test || par::test_full_model ) { vector_t h; // dim = number of fixes (to =0) matrix_t H; // row = number of fixes; cols = np int df; string testname; //////////////////////////////////////////////// // Joint test of all parameters if (par::test_full_model) { df = lm->getNP() - 1; h.resize(df,0); testname = "FULL_"+int2str(df)+"DF"; sizeMatrix(H,df,lm->getNP()); for (int i=0; i<df; i++) H[i][i+1] = 1; } //////////////////////////////////////////////// // Joint test of user-specified parameters else if (par::glm_user_test) { df = par::test_list.size(); h.resize(df,0); testname = "USER_"+int2str(df)+"DF"; sizeMatrix(H,df,lm->getNP()); for (int i=0; i<df; i++) if ( par::test_list[i]<lm->getNP() ) H[i][par::test_list[i]] = 1; } //////////////////////////////////////////////// // Joint test of additive and dominant models else if ( genotypic ) { testname = "GENO_2DF"; df = 2; h.resize(2,0); sizeMatrix(H,2,lm->getNP()); H[0][1] = H[1][2] = 1; } else if ( X && par::xchr_model == 3 ) { testname = "XMOD_2DF"; } //////////////////////////////////////////////// // Joint test of all parameters double chisq = lm->isValid() ? lm->linearHypothesis(H,h) : 0; double pvalue = chiprobP(chisq,df); // If filtering p-values if ( (!par::pfilter) || pvalue <= par::pfvalue ) { ASC << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(10) << locus[l]->bp << " " << setw(4) << locus[l]->allele1 << " " << setw(10) << testname << " " << setw(8) << lm->Ysize() << " " << setw(10) << "NA" << " "; if (par::display_ci) ASC << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " "; if (lm->isValid() && realnum(chisq) ) ASC << setw(12) << chisq << " " << setw(12) << pvalue << "\n"; else ASC << setw(12) << "NA" << " " << setw(12) << "NA" << "\n"; } } //////////////////////////////////////// // Store statistic (1 df chisq), and p-value // if need be ( based on value of testParameter ) if ( ! par::assoc_glm_without_main_snp ) results[l] = lm->getStatistic(); if ( par::qt && print_results && par::multtest ) tcnt[l] = lm->Ysize() - lm->getNP(); ////////////////////////////////////////////// // Clear up linear model, if no longer needed if ( par::chap_test || par::test_hap_GLM || par::set_step || par::set_score || par::proxy_glm || par::dosage_assoc || par::cnv_enrichment_test || par::cnv_glm || par::score_test || par::gvar || par::rare_test ) { // Responsibility to clear up in parent routine model = lm; } else { delete lm; } // Flush output buffer ASC.flush(); // Next SNP } if (print_results) ASC.close(); return results; }
vector<double> Plink::calcMantelHaenszel_2x2xK(Perm & perm, bool original) { // Should we perform BD test (K>1) if (nk<2) par::breslowday = false; ofstream MHOUT; if ( original ) { ////////////////////////////////// // Any individual not assigned to a cluster, making missing // phenotype (only need to do this once, for original) vector<Individual*>::iterator person = sample.begin(); while ( person != sample.end() ) { if ( (*person)->sol < 0 ) (*person)->missing = true; person++; } string f = par::output_file_name + ".cmh"; MHOUT.open(f.c_str(),ios::out); MHOUT << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " " << setw(10) << "BP" << " " << setw(4) << "A1" << " " << setw(8) << "MAF" << " " << setw(4) << "A2" << " " << setw(10) << "CHISQ" << " " << setw(10) << "P" << " " << setw(10) << "OR" << " " << setw(10) << "SE" << " " << setw(10) << string("L"+dbl2str(par::ci_level*100)) << " " << setw(10) << string("U"+dbl2str(par::ci_level*100)) << " "; if (par::breslowday) MHOUT << setw(10) << "CHISQ_BD" << " " << setw(10) << "P_BD" << " "; MHOUT << "\n"; MHOUT.precision(4); printLOG("Cochran-Mantel-Haenszel 2x2xK test, K = " + int2str( nk) + "\n"); if (par::breslowday) printLOG("Performing Breslow-Day test of homogeneous odds ratios\n"); printLOG("Writing results to [ " + f + " ]\n"); // Warnings, if (par::breslowday && nk>10) printLOG("** Warning ** Breslow-Day statistics require large N per cluster ** \n"); } double zt = ltqnorm( 1 - (1 - par::ci_level) / 2 ) ; // Cochran-Mantel-Haenszel 2x2xK test vector<double> results(nl_all); vector<CSNP*>::iterator s = SNP.begin(); int l=0; while ( s != SNP.end() ) { // Skip possibly if (par::adaptive_perm && !perm.snp_test[l]) { s++; l++; continue; } // Disease X allele X strata // Calculate mean of 11 cell for each strata vector<double> mean_11(nk,0); vector<double> var_11(nk,0); // Calculate statistic vector<double> n_11(nk,0); vector<double> n_12(nk,0); vector<double> n_21(nk,0); vector<double> n_22(nk,0); // Disease marginals vector<double> n_1X(nk,0); // disease vector<double> n_2X(nk,0); // no disease vector<double> n_X1(nk,0); // F allele vector<double> n_X2(nk,0); // T allele vector<double> n_TT(nk,0); // Total allele count ///////////////////////// // Autosomal or haploid? bool X=false, haploid=false; if (par::chr_sex[locus[l]->chr]) X=true; else if (par::chr_haploid[locus[l]->chr]) haploid=true; //////////////////////// // Consider each person vector<bool>::iterator i1 = (*s)->one.begin(); vector<bool>::iterator i2 = (*s)->two.begin(); vector<Individual*>::iterator gperson = sample.begin(); while ( gperson != sample.end() ) { Individual * pperson = (*gperson)->pperson; bool s1 = *i1; bool s2 = *i2; // Affected individuals if ( pperson->aff && !pperson->missing ) { // Haploid? if ( haploid || ( X && (*gperson)->sex ) ) { // Allelic marginal if ( ! s1 ) { // FF hom n_11[ pperson->sol ] ++ ; n_X1[ pperson->sol ] ++ ; } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_12[ pperson->sol ] ++ ; n_X2[ pperson->sol ] ++ ; } } // Disease marginal n_1X[ pperson->sol ] ++; n_TT[ pperson->sol ] ++; } else // autosomal { // Allelic marginal if ( ! s1 ) { if ( ! s2 ) // FF hom { n_11[ pperson->sol ] +=2 ; n_X1[ pperson->sol ] +=2 ; } else { n_11[ pperson->sol ]++ ; // FT het n_12[ pperson->sol ]++ ; n_X1[ pperson->sol ]++ ; n_X2[ pperson->sol ]++ ; } } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_12[ pperson->sol ] +=2 ; n_X2[ pperson->sol ] +=2 ; } } // Disease marginal n_1X[ pperson->sol ] += 2; n_TT[ pperson->sol ] += 2; } // end autosomal } else if ( ! pperson->missing ) // Unaffecteds { // Haploid? if ( haploid || ( X && (*gperson)->sex ) ) { // Allelic marginal if ( ! s1 ) { // FF hom n_21[ pperson->sol ] ++ ; n_X1[ pperson->sol ] ++ ; } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_22[ pperson->sol ] ++ ; n_X2[ pperson->sol ] ++ ; } } // Disease marginal n_2X[ pperson->sol ] ++; n_TT[ pperson->sol ] ++; } else // autosomal { // Allelic marginal if ( ! s1 ) { if ( ! s2 ) // FF { n_X1[ pperson->sol ] +=2 ; n_21[ pperson->sol ] +=2 ; } else { n_X1[ pperson->sol ] ++ ; n_X2[ pperson->sol ] ++ ; n_21[ pperson->sol ] ++ ; n_22[ pperson->sol ] ++ ; } } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_X2[ pperson->sol ] +=2 ; n_22[ pperson->sol ] +=2 ; } } // disease marginal n_2X[ pperson->sol ] += 2; n_TT[ pperson->sol ] += 2; } // end autosomal } // end unaffected gperson++; i1++; i2++; } // count next individual // Finished iterating over individuals: cluster needs at least 2 // nonmissing individuals vector<bool> validK(nk,false); for (int k=0; k<nk; k++) if (n_TT[k]>=2) validK[k]=true; for (int k=0; k<nk; k++) { if (validK[k]) { mean_11[k] = ( n_X1[k] * n_1X[k] ) / n_TT[k] ; var_11[k] = ( n_X1[k] * n_X2[k] * n_1X[k] * n_2X[k] ) / ( n_TT[k]*n_TT[k]*(n_TT[k]-1) ); // cout << k << " " // << n_11[k] << " " // << n_12[k] << " " // << n_21[k] << " " // << n_22[k] << "\n"; } } double CMH = 0; double denom = 0; for (int k=0; k<nk; k++) { if (validK[k]) { CMH += n_11[k] - mean_11[k]; denom += var_11[k]; } } CMH *= CMH; CMH /= denom; // MH Odds ratio & CI double R = 0; double S = 0; vector<double> r2(nk); vector<double> s2(nk); for (int k=0; k<nk; k++) { if (validK[k]) { r2[k] = (n_11[k]*n_22[k]) / n_TT[k]; s2[k] = (n_12[k]*n_21[k]) / n_TT[k]; R += r2[k]; S += s2[k]; } } double OR = R / S ; double v1 = 0, v2 = 0, v3 = 0; for (int k=0; k<nk; k++) { if (validK[k]) { v1 += (1/n_TT[k]) * ( n_11[k] + n_22[k] ) * r2[k] ; v2 += (1/n_TT[k]) * ( n_12[k] + n_21[k] ) * s2[k] ; v3 += (1/n_TT[k]) * ( ( n_11[k] + n_22[k] ) * s2[k] + ( n_12[k] + n_21[k] ) * r2[k] ); } } double SE = ( 1/(2*R*R) ) * v1 + (1/(2*S*S)) * v2 + (1/(2*R*S)) * v3 ; SE = sqrt(SE); double OR_lower = exp( log(OR) - zt * SE ); double OR_upper = exp( log(OR) + zt * SE ); if ( original ) { double pvalue = chiprobP(CMH,1); // Skip?, if filtering p-values if ( par::pfilter && ( pvalue > par::pfvalue || pvalue < 0 ) ) goto skip_p_cmh; MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(10) << locus[l]->bp << " " << setw(4) << locus[l]->allele1 << " " << setw(8) << locus[l]->freq << " " << setw(4) << locus[l]->allele2 << " "; if (realnum(CMH)) MHOUT << setw(10) << CMH << " " << setw(10) << chiprobP(CMH,1) << " "; else MHOUT << setw(10) << "NA" << " " << setw(10) << "NA" << " "; if (realnum(OR)) MHOUT << setw(10) << OR << " "; else MHOUT << setw(10) << "NA" << " "; if (realnum(SE)) MHOUT << setw(10) << SE << " "; else MHOUT << setw(10) << "NA" << " "; if (realnum(OR_lower)) MHOUT << setw(10) << OR_lower << " "; else MHOUT << setw(10) << "NA" << " "; if (realnum(OR_upper)) MHOUT << setw(10) << OR_upper << " "; else MHOUT << setw(10) << "NA" << " "; // Optional Breslow-Day test of homogeneity of odds ratios if (par::breslowday) { double amax; double bb; double determ; double as_plus; double as_minus; double Astar; double Bstar; double Cstar; double Dstar; double Var; double BDX2 = 0; int df = 0; for (int k=0; k<nk; k++) { if (validK[k]) { df++; amax = (n_1X[k] < n_X1[k]) ? n_1X[k] : n_X1[k]; bb = n_2X[k] + n_1X[k] * OR - n_X1[k] * (1-OR); determ = sqrt(bb*bb + 4*(1-OR) * OR * n_1X[k] * n_X1[k]); as_plus = ( -bb + determ ) / ( 2 - 2 * OR ); as_minus = ( -bb - determ ) / ( 2 - 2 * OR ); Astar = as_minus <= amax && as_minus >= 0 ? as_minus : as_plus ; Bstar = n_1X[k] - Astar; Cstar = n_X1[k] - Astar; Dstar = n_2X[k] - n_X1[k] + Astar; Var = 1/(1/Astar + 1/Bstar + 1/Cstar + 1/Dstar); BDX2 += ( (n_11[k] - Astar) * ( n_11[k] - Astar ) ) / Var ; } } double BDp = chiprobP( BDX2 , df-1 ); if ( BDp > -1 ) MHOUT << setw(10) << BDX2 << " " << setw(10) << BDp << " "; else MHOUT << setw(10) << "NA" << " " << setw(10) << "NA" << " "; } MHOUT << "\n"; } skip_p_cmh: // Store for permutation procedure, based 2x2xK CMH result results[l] = CMH; // Next SNP s++; l++; } if (original) MHOUT.close(); return results; }
void Plink::driverSCREEPI() { /////////////////////////////// // Gene-based epistasis ////////////////////////////////////////// // Case-control samples only affCoding(*this); ////////////////////////////////////////// // SNP-major mode analysis if (!par::SNP_major) Ind2SNP(); ////////////////////////////////////////// // Requires that sets have been speciefied if (par::set_test) readSet(); else error("Need to specify genes with --set {filename} when using --genepi\n"); ////////////////// // SET statistics Set S(snpset); ////////////////////////////////////////////// // Prune SET (0-sized sets, MAF==0 SNPs, etc) S.pruneSets(*this); int ns = snpset.size(); if (ns < 2) error("Need to specify at least two fully valid sets\n"); int n = 0; int ncase = 0; ///////////////////////////////////////////////////////// // Prune based on VIF string original_outfile = par::output_file_name; // Case-control? Prune cases and controls together... if (!par::epi_caseonly) { printLOG("\nConsidering cases and controls: "); setFlags(false); vector<Individual*>::iterator person = sample.begin(); while ( person != sample.end() ) { if ( ! (*person)->missing ) { (*person)->flag = true; n++; } person++; } par::output_file_name += ".all"; S.pruneMC(*this,false,par::vif_threshold); //S.pruneMC(*this,false,1000); } // Case-only? Prune cases only... else { printLOG("\nConsidering cases: "); setFlags(false); vector<Individual*>::iterator person = sample.begin(); while ( person != sample.end() ) { if ( (*person)->aff && ! (*person)->missing ) { (*person)->flag = true; ncase++; } person++; n++; } par::output_file_name += ".case"; S.pruneMC(*this,false,par::vif_threshold); //S.pruneMC(*this,false,1000); } par::output_file_name = original_outfile; // Write finalized set ofstream SET1, SET2; string f = par::output_file_name + ".all.set.in"; printLOG("Writing combined pruned-in set file to [ " + f + " ]\n"); SET1.open(f.c_str(),ios::out); f = par::output_file_name + ".all.set.out"; printLOG("Writing combined pruned-out set file to [ " + f + " ]\n"); SET2.open(f.c_str(),ios::out); for (int s=0; s<snpset.size(); s++) { int nss = snpset[s].size(); SET1 << setname[s] << "\n"; SET2 << setname[s] << "\n"; for (int j=0; j<nss; j++) { if (S.cur[s][j]) SET1 << locus[snpset[s][j]]->name << "\n"; else SET2 << locus[snpset[s][j]]->name << "\n"; } SET1 << "END\n\n"; SET2 << "END\n\n"; } SET1.close(); SET2.close(); // Prune empty sets once more: S.pruneSets(*this); ns = snpset.size(); if (ns < 2) error("Need to specify at least two fully valid sets\n"); //////////////////////////////// // Set up permutation structure // Specialized (i.e. cannot use Perm class) as this // requires a block-locus permutation // First block is fixed vector<vector<int> > blperm(ns); vector<vector<int> > blperm_case(ns); vector<vector<int> > blperm_control(ns); for (int i=0; i<ns; i++) { // A slot for each individual per locus for (int j=0; j<n; j++) if ( ! sample[j]->missing ) blperm[i].push_back(j); // A slot for each individual per locus for (int j=0; j<n; j++) if ( ! sample[j]->missing && sample[j]->aff ) blperm_case[i].push_back(j); // A slot for each individual per locus for (int j=0; j<n; j++) if ( ! sample[j]->missing && !sample[j]->aff ) blperm_control[i].push_back(j); } //////////////////////////////////////////// // Open file and print header for results ofstream EPI(f.c_str(), ios::out); EPI.open(f.c_str(), ios::out); EPI.precision(4); //////////////////////////////////////// // Analysis (calls genepi functions) if (!par::epi_caseonly) CCA_logit(false,blperm,S,*this); else CCA_caseonly(false,blperm_case,S,*this); if (!par::permute) return; if (!par::silent) cout << "\n"; } // End of screepi
void output_log::warning(string err_msg) { printLOG(err_msg + "\n"); }
void output_log::error(string err_msg, int error_code) { printLOG("Error: " + err_msg + "\n"); exit(error_code); }
void Plink::calcMH() { /////////////////////////////////// // Basic 2 x 2 x K CMH test // i.e. Disease x allele x strata // is taken care of in assoc.cpp // (i.e. allows for permutation, sets, etc) if (!par::SNP_major) Ind2SNP(); ////////////////////////////////// // Any individual not assigned to a cluster, // making missing phenotype vector<Individual*>::iterator person = sample.begin(); while ( person != sample.end() ) { if ( (*person)->sol < 0 ) (*person)->missing = true; person++; } /////////////////////////////////// // Generalized I x J x K CMH test // Either ordinal or normal // i.e. test strata X SNP controlling for disease if (par::CMH_test_2 || par::CMH_test_ORD ) { if (par::CMH_test_ORD && !par::bt) error("--mh-ord specified but the phenotype is only binary: use --mh"); if (nk==1) error("No clusters defined for --mh2 test, i.e. K=1"); string f = par::output_file_name + ".cmh2"; if (par::CMH_test_ORD) f = par::output_file_name + ".cmh.ord"; ofstream MHOUT; MHOUT.open(f.c_str(),ios::out); MHOUT << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " " << setw(10) << "CHISQ" << " " << setw(10) << "P" << "\n"; MHOUT.precision(4); if (par::CMH_test_ORD) { printLOG("Cochran-Mantel-Haenszel IxJxK ordinal test, K = " + int2str(nk) + "\n"); printLOG("Testing SNP x ORDINAL DISEASE | STRATUM (option --mh-ord)\n"); } else { printLOG("Cochran-Mantel-Haenszel IxJxK test, K = " + int2str(nk) + "\n"); printLOG("Testing SNP x STRATUM | DISEASE (option --mh2)\n"); } printLOG("Writing results to [ " + f + " ]\n"); vector<CSNP*>::iterator s = SNP.begin(); int l=0; while ( s != SNP.end() ) { ///////////////////////// // Autosomal or haploid? bool Xchr=false, haploid=false; if (par::chr_sex[locus[l]->chr]) Xchr=true; else if (par::chr_haploid[locus[l]->chr]) haploid=true; if (haploid || Xchr ) error("--mh2 / --mh-ord cannot handle X/Y markers currently..."); vector<int> X(0); // SNP vector<int> Y(0); // Cluster vector<int> Z(0); // Phenotype vector<Individual*>::iterator person = sample.begin(); vector<bool>::iterator i1 = (*s)->one.begin(); vector<bool>::iterator i2 = (*s)->two.begin(); while ( person != sample.end() ) { if ((*person)->missing) { // Next person person++; i1++; i2++; continue; } // Only consider individuals who have been assigned to a cluster if ( (*person)->sol >= 0 ) { if ( (!(*i1)) && (!(*i2)) ) { X.push_back(1); X.push_back(1); } else if ( (!(*i1)) && *i2 ) { X.push_back(1); X.push_back(2); } else if ( *i1 && *i2 ) { X.push_back(2); X.push_back(2); } else { // Next person person++; i1++; i2++; continue; } Y.push_back((*person)->sol); Y.push_back((*person)->sol); if (par::CMH_test_ORD) Z.push_back( (int)(*person)->phenotype ); else { if ((*person)->phenotype==2) { Z.push_back(2); Z.push_back(2); } else { Z.push_back(1); Z.push_back(1); } } } // Next person person++; i1++; i2++; } vector<double> res; if ( par::CMH_test_ORD ) res = calcMantelHaenszel_ORD(X,Z,Y); else res = calcMantelHaenszel_IxJxK(X,Y,Z); MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(10) << res[0] << " " << setw(10) << chiprobP(res[0],res[1]) << "\n"; // Next SNP s++; l++; } MHOUT.close(); } }
int WAV::Read(const char* WAVFName) { std::ifstream ifs(WAVFName, std::ios::binary); //WAVHeader wav; //unsigned char header_buf[44]; if (!ifs) { std::cerr << "[ERROR] can't open " << WAVFName << std::endl; return -1; } //ifs.read((char*)header_buf, 44); //ファイルがRIFF形式であるか; ifs.read(_riffID, 4); if (ifs.bad() || strncmp(_riffID, "RIFF", 4) != 0) return -1; ifs.read((char*) &_size, 4); // fileSize //ファイルがWAVEファイルであるか ifs.read((char*) _wavID, 4); if (ifs.bad() || strncmp(_wavID, "WAVE", 4) != 0) return -1; //fmt のチェック ifs.read((char*) _fmtID, 4); if (strncmp(_fmtID, "fmt ", 4)) { std::cerr << "fmt not found" << std::endl; return -1; } // fmt チャンクのバイト数 ifs.read((char*) &_fmtSize, 4); if (_fmtSize != 16) { std::cerr << "not LinearPCM" << std::endl; return -1; } //フォーマットIDから拡張部分までのヘッダ部分を取り込む // LinearPCMなので16byte分のデータ読み込む ifs.read((char*) &_format, 2); //LinearPCMファイルならば1が入る if (_format != 1) { std::cerr << "not LinearPCM" << std::endl; return -1; } ifs.read((char*) &_channels, 2); ifs.read((char*) &_sampleRate, 4); ifs.read((char*) &_bytePerSec, 4); ifs.read((char*) &_blockSize, 2); ifs.read((char*) &_bit, 2); //-- // "data" ifs.read((char*) _dataID, 4); if (ifs.bad() || strncmp(_dataID, "data", 4) != 0) return -1; // 波形データのバイト数 ifs.read((char*) &_dataSize, 4); //モノラルならサンプル数を、ステレオなら左右1サンプルずつの組の数 _sampleCount = _dataSize / (_channels*(_bit / 8)); //_monoral8=NULL; //_monoral16=NULL; //_stereo8=NULL; //_stereo16=NULL; try { if (_channels == 1) { if (_bit == 8) { if ((_monoral8 = (Monoral8*) malloc(_dataSize)) == NULL) { return -1; } ifs.read((char*) _monoral8, _dataSize); } else if (_bit == 16) { if ((_monoral16 = (Monoral16*) malloc(_dataSize)) == NULL) { return -1; } //std::cout << "1block=" << sizeof(Monoral16) << std::endl; //std::cout << "samplecount=" << _sampleCount << std::endl; //std::cout << "dataSize=1block*sampleCount=" << sizeof(Monoral16)* _sampleCount << "=" <<_dataSize << std::endl; ifs.read((char*) _monoral16, _dataSize); } else { return -1; } } else if (_channels == 2) { // ToDO LR insert data if (_bit == 8) { if ((_stereo8 = (Stereo8*) malloc(_dataSize)) == NULL) { return -1; } ifs.read((char*) _stereo8, _dataSize); } else if (_bit == 16) { if ((_stereo16 = (Stereo16*) malloc(_dataSize)) == NULL) { return -1; } ifs.read((char*) _stereo16, _dataSize); } else { return -1; } } else { return -1; } } catch (const char* e) { printLOG("malloc error"); printf("%s\n", e); exit(-1); } ifs.close(); return 0; }
// Read VCF file void vcf_file::scan_file(const string &chr, const string &exclude_chr) { printLOG("Scanning " + filename + " ... \n"); bool filter_by_chr = (chr != ""); bool exclude_by_chr = (exclude_chr != ""); string line, tmp; N_indv = 0; unsigned int N_read = 0; istringstream ss; string last_CHROM = ""; N_entries=0; string CHROM; bool finish = false; int last_POS = -1; int POS; streampos filepos; while(!feof()) { filepos = get_filepos(); read_line(line); if (line.length() <= 2) continue; if (line[0] == '#') { if (line[1] == '#') { // Meta information parse_meta(line); } else { // Must be header information: #CHROM POS ID REF ALT QUAL FILTER INFO (FORMAT NA00001 NA00002 ... ) parse_header(line); } } else { // Must be a data line ss.clear(); ss.str(line); ss >> CHROM; N_read++; if ((filter_by_chr == true) && (last_CHROM == chr) && (CHROM != chr)) { // Presuming the file to be sorted (it should be), we have already found the chromosome we wanted, so there's no need to continue. printLOG("\tCompleted reading required chromosome. Skipping remainder of file.\n"); finish = true; break; } if (CHROM != last_CHROM) { printLOG("Currently scanning CHROM: " + CHROM); if ((exclude_by_chr == true) && (CHROM == exclude_chr)) printLOG(" - excluded."); printLOG("\n"); last_CHROM = CHROM; last_POS = -1; } if ((exclude_by_chr == true) && (CHROM == exclude_chr)) continue; if (filter_by_chr == true) { // For speed, only parse the entry if it's needed if (CHROM == chr) { ss >> POS; if (POS < last_POS) error("VCF file is not sorted at: " + CHROM + ":" + int2str(POS)); last_POS = POS; entry_file_locations.push_back(filepos); N_entries++; } } else { ss >> POS; if (POS < last_POS) error("VCF file is not sorted at: " + CHROM + ":" + int2str(POS)); last_POS = POS; entry_file_locations.push_back(filepos); N_entries++; } }
void vcf_file::output_discordance_by_site(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Outputting Discordance By Site...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); string CHROM, vcf_line; int POS; int s1, s2, indv1, indv2; string output_file = output_file_prefix + ".diff.sites"; ofstream diffsites(output_file.c_str()); if (!diffsites.is_open()) error("Could not open Sites Differences File: " + output_file, 3); //diffsites << "CHROM\tPOS\tFILES\tMATCHING_ALT\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE\tN_FILE1_NONREF_GENOTYPES\tNON_REF_DISCORDANCE" << endl; diffsites << "CHROM\tPOS\tFILES\tMATCHING_ALLELES\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE" << endl; for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { CHROM = CHROMPOS_to_filepos_pair_it->first.first; POS = CHROMPOS_to_filepos_pair_it->first.second; diffsites << CHROM << "\t" << POS; s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); bool data_in_both = true; // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } else data_in_both = false; if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } else data_in_both = false; if (data_in_both) diffsites << "\tB"; else if ((s1 != -1) && (s2 == -1)) diffsites << "\t1"; else if ((s1 == -1) && (s2 != -1)) diffsites << "\t2"; else error("Unhandled condition"); e1.parse_basic_entry(true); e2.parse_basic_entry(true); // Set the reference to the non-missing entry (if available) string REF = e1.get_REF(); string REF2 = e2.get_REF(); if (REF == "N") REF = REF2; if (REF2 == "N") REF2 = REF; if (REF.size() != REF2.size()) { warning("REF sequences at " + CHROM + ":" + int2str(POS) + " are not comparable. Skipping site"); continue; } if ((REF != REF2) && (REF2 != "N") && (REF != "N")) warning("Non-matching REF " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2); // Do the alternative alleles match? string ALT, ALT2; ALT = e1.get_ALT(); ALT2 = e2.get_ALT(); bool alleles_match = ((ALT == ALT2) && (REF == REF2)); diffsites << "\t" << alleles_match; e1.parse_full_entry(true); e1.parse_genotype_entries(true); e2.parse_full_entry(true); e2.parse_genotype_entries(true); pair<string, string> genotype1, genotype2; pair<int,int> geno_ids1, geno_ids2; pair<string, string> missing_genotype(".","."); pair<int, int> missing_id(-1,-1); unsigned int N_common_called=0; // Number of genotypes called in both files unsigned int N_missing_1=0, N_missing_2=0; unsigned int N_discord=0; unsigned int N_concord_non_missing=0; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if ((indv1 == -1) || (indv2 == -1)) continue; // Individual not found in one of the files if (alleles_match) { // Alleles match, so can compare ids instead of strings e1.get_indv_GENOTYPE_ids(indv1, geno_ids1); e2.get_indv_GENOTYPE_ids(indv2, geno_ids2); if ((geno_ids1 != missing_id) && (geno_ids2 != missing_id)) { N_common_called++; if (((geno_ids1.first == geno_ids2.first) && (geno_ids1.second == geno_ids2.second)) || ((geno_ids1.first == geno_ids2.second) && (geno_ids1.second == geno_ids2.first)) ) { // Match N_concord_non_missing++; } else { // Mismatch N_discord++; } } else if ((geno_ids1 == missing_id) && (geno_ids2 == missing_id)) { // Both missing N_missing_1++; N_missing_2++; } else if (geno_ids1 != missing_id) { // Genotype 1 is not missing, genotype 2 is. N_missing_2++; } else if (geno_ids2 != missing_id) { // Genotype 2 is not missing, genotype 1 is. N_missing_1++; } else error("Unknown condition"); } else { // Alleles don't match, so need to be more careful and compare strings e1.get_indv_GENOTYPE_strings(indv1, genotype1); e2.get_indv_GENOTYPE_strings(indv2, genotype2); if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype)) { // No missing data N_common_called++; if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) || ((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) ) { // Match N_concord_non_missing++; } else { // Mismatch N_discord++; } } else if ((genotype1 == missing_genotype) && (genotype2 == missing_genotype)) { // Both missing N_missing_1++; N_missing_2++; } else if (genotype1 != missing_genotype) { // Genotype 1 is not missing, genotype 2 is. N_missing_2++; } else if (genotype2 != missing_genotype) { // Genotype 2 is not missing, genotype 1 is. N_missing_1++; } else error("Unknown condition"); } } double discordance = N_discord / double(N_common_called); diffsites << "\t" << N_common_called << "\t" << N_discord << "\t" << discordance; diffsites << endl; } diffsites.close(); }
void Plink::perm_testQTDT(Perm & perm) { ////////////////////////////// // Use individual-major coding if (par::SNP_major) SNP2Ind(); // for now, no covariates if ( par::clist_number > 0 ) error("Cannot specify covariates with QFAM for now...\n"); //////////////////////////////////////////////// // Specify special adaptive QFAM mode (i.e. one SNP // at a time) ///////////////////////////// // Set up permutation indices vector<int> pbetween(family.size()); vector<bool> pwithin(family.size(),false); for (int i=0; i < family.size(); i++) pbetween[i] = i; /////////////// // Output files string f = ".qfam"; if (par::QFAM_within1) f += ".within"; else if (par::QFAM_within2) f += ".parents"; else if (par::QFAM_between) f += ".between"; else if (par::QFAM_total) f += ".total"; printLOG("Writing QFAM statistics to [ " + par::output_file_name + f + " ]\n"); if (!par::permute) printLOG("** Warning ** QFAM results require permutation to correct for family structure\n"); else printLOG("Important: asymptotic p-values not necessarily corrected for family-structure:\n" " use empirical p-values for robust p-values from QFAM\n" " and consult the above file only for parameter estimates\n"); ofstream QOUT((par::output_file_name+f).c_str(),ios::out); // dummy QOUT.precision(4); QOUT << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " " << setw(10) << "BP" << " " << setw(4) << "A1" << " " << setw(10) << "TEST" << " " << setw(8) << "NIND" << " " << setw(10) << "BETA" << " "; if (par::display_ci) QOUT << setw(8) << "SE" << " " << setw(8) << "LOWER" << " " << setw(8) << "UPPER" << " "; QOUT << setw(12) << "STAT" << " " << setw(12) << "P\n"; ////////////////////// // Familial clustering // C holds which family an individual belongs to // (as element in the family[] array vector<int> C; map<Family*,int> famcnt; for (int f = 0 ; f < family.size() ; f++) famcnt.insert( make_pair( family[f] , f ) ); vector<Individual*>::iterator person = sample.begin(); while ( person != sample.end() ) { map<Family*,int>::iterator f = famcnt.find( (*person)->family ); if ( f == famcnt.end() ) error("Internal error in QFAM, allocating families to individuals...\n"); else C.push_back( f->second ); person++; } printLOG(int2str(family.size())+" nuclear families in analysis\n"); if ( family.size()<2 ) error("Halting: not enough nuclear families for this analysis\n"); //////////////////// // Run original QFAM perm.setTests(nl_all); perm.setPermClusters(*this); // Force adaptive perm par::adaptive_perm = true; vector_t orig = calcQTDT(C, QOUT, false, perm, pbetween, pwithin); QOUT.close(); //////////////// // Permutation if ( ! par::permute ) return; // Adpative permutation will already have been conducted in original // function call for QFAM (i.e. per-SNP adaptive permutation) if (!par::silent) cout << "\n\n"; //////////////////// // Display results ofstream TDT; f += ".perm"; TDT.open((par::output_file_name+f).c_str(),ios::out); printLOG("Writing QFAM permutation results to [ " + par::output_file_name + f + " ] \n"); TDT.precision(4); TDT << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " "; if (par::perm_TDT_basic) TDT << setw(12) << "STAT" << " "; TDT << setw(12) << "EMP1" << " "; TDT << setw(12) << "NP" << " " << "\n"; for (int l=0; l<nl_all; l++) { TDT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " "; if (orig[l] < -0.5) TDT << setw(12) << "NA" << " " << setw(12) << "NA" << " " << setw(12) << "NA"; else { TDT << setw(12) << orig[l] << " " << setw(12) << perm.pvalue(l) << " " << setw(12) << perm.reps_done(l); } TDT << "\n"; } TDT.close(); // Adjusted p-values, assumes 1-df chi-squares if (par::multtest) { vector<double> obp(0); for (int l=0; l<nl_all;l++) obp.push_back(inverse_chiprob(perm.pvalue(l),1)); multcomp(obp,f); } }
void vcf_file::output_discordance_matrix(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Outputting Discordance Matrix\n\tFor bi-allelic loci, called in both files, with matching alleles only...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); string vcf_line; int s1, s2, indv1, indv2; vector<vector<int> > discordance_matrix(4, vector<int>(4, 0)); for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } e1.parse_basic_entry(true); e2.parse_basic_entry(true); if ((e1.get_N_alleles() != 2) || (e2.get_N_alleles() != 2)) continue; // Set the reference to the non-missing entry (if available) string REF = e1.get_REF(); string REF2 = e2.get_REF(); if (REF == "N") REF = REF2; if (REF2 == "N") REF2 = REF; if (REF.size() != REF2.size()) continue; if ((REF != REF2) && (REF2 != "N") && (REF != "N")) continue; // Do the alternative alleles match? string ALT, ALT2; ALT = e1.get_ALT(); ALT2 = e2.get_ALT(); bool alleles_match = (ALT == ALT2) && (REF == REF2); if (alleles_match == false) continue; e1.parse_full_entry(true); e1.parse_genotype_entries(true); e2.parse_full_entry(true); e2.parse_genotype_entries(true); pair<int,int> geno_ids1, geno_ids2; int N1, N2; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if ((indv1 == -1) || (indv2 == -1)) continue; // Individual not found in one of the files // Alleles match, so can compare ids instead of strings e1.get_indv_GENOTYPE_ids(indv1, geno_ids1); e2.get_indv_GENOTYPE_ids(indv2, geno_ids2); if (((geno_ids1.first != -1) && (geno_ids1.second == -1)) || ((geno_ids2.first != -1) && (geno_ids2.second == -1))) { // Haploid one_off_warning("***Warning: Haploid chromosomes not counted!***"); continue; } N1 = geno_ids1.first + geno_ids1.second; N2 = geno_ids2.first + geno_ids2.second; if ((N1 == -1) || (N1 < -2) || (N1 > 2)) error("Unhandled case"); if ((N2 == -1) || (N2 < -2) || (N2 > 2)) error("Unhandled case"); if (N1 == -2) N1 = 3; if (N2 == -2) N2 = 3; discordance_matrix[N1][N2]++; } } string output_file = output_file_prefix + ".diff.discordance_matrix"; ofstream out(output_file.c_str()); if (!out.is_open()) error("Could not open Discordance Matrix File: " + output_file, 3); out << "-\tN_0/0_file1\tN_0/1_file1\tN_1/1_file1\tN_./._file1" << endl; out << "N_0/0_file2\t" << discordance_matrix[0][0] << "\t" << discordance_matrix[1][0] << "\t" << discordance_matrix[2][0] << "\t" << discordance_matrix[3][0] << endl; out << "N_0/1_file2\t" << discordance_matrix[0][1] << "\t" << discordance_matrix[1][1] << "\t" << discordance_matrix[2][1] << "\t" << discordance_matrix[3][1] << endl; out << "N_1/1_file2\t" << discordance_matrix[0][2] << "\t" << discordance_matrix[1][2] << "\t" << discordance_matrix[2][2] << "\t" << discordance_matrix[3][2] << endl; out << "N_./._file2\t" << discordance_matrix[0][3] << "\t" << discordance_matrix[1][3] << "\t" << discordance_matrix[2][3] << "\t" << discordance_matrix[3][3] << endl; out.close(); }
void Plink::displayGeneReport() { // Simply read in any generic results file and list of SNPs by // ranges (which may be subsetted). // if ( false ) // readMapFile(par::mapfile,include,include_pos,nl_actual); ofstream GREP; GREP.open( (par::output_file_name + ".range.report").c_str() , ios::out); map<string, set<Range> > ranges; // Read list of ranges ranges = readRange( par::greport_gene_list ); // Filter ranges if ( par::greport_subset ) ranges = filterRanges( ranges, par::greport_subset_file ); // Open a single results file ifstream RESIN; RESIN.open( par::greport_results.c_str() , ios::in ); // Read first (header) row char cline[par::MAX_LINE_LENGTH]; RESIN.getline(cline,par::MAX_LINE_LENGTH,'\n'); string sline = cline; if (sline=="") error("Problem reading [ " + par::greport_results + " ]\n"); string buf; stringstream ss(sline); vector<string> tokens; while (ss >> buf) tokens.push_back(buf); int chr_column = -1; int bp_column = -1; int pval_column = -1; int snp_column = -1; for (int i=0; i<tokens.size(); i++) { if ( tokens[i] == "CHR" ) chr_column = i; if ( tokens[i] == "BP" ) bp_column = i; if ( tokens[i] == "SNP" ) snp_column = i; if ( tokens[i] == "P" ) pval_column = i; } // Do we have a list of SNPs to specifically extract? set<string> extractSNP; if ( par::extract_set ) { if ( snp_column == -1 ) error("Did not find a SNP field, so cannot use --extract"); checkFileExists( par::extract_file ); PP->printLOG("Only extracting SNPs listed in [ " + par::extract_file + " ]\n"); ifstream IN(par::extract_file.c_str(), ios::in); while ( ! IN.eof() ) { string snpname; IN >> snpname; if ( snpname=="" ) continue; extractSNP.insert(snpname); } IN.close(); PP->printLOG("Read " + int2str( extractSNP.size() ) + " SNPs to extract\n"); } if ( chr_column < 0 || bp_column < 0 ) error("Could not find CHR and BP fields in results file"); map<Range*,vector<string> > annotatedResults; string headerline = sline; int cnt = 0; while ( ! RESIN.eof() ) { // if ( ! par::silent ) // cout << "Processing results line " << ++cnt << " \r"; // vector<string> tokens = tokenizeLine( RESIN ); char cline[par::MAX_LINE_LENGTH]; RESIN.getline(cline,par::MAX_LINE_LENGTH,'\n'); string sline = cline; if (sline=="") continue; string buf; stringstream ss(sline); vector<string> tokens; while (ss >> buf) tokens.push_back(buf); if ( tokens.size() <= chr_column || tokens.size() <= bp_column ) continue; // Using a p-value-filtering field? double pvalue = 0; if ( pval_column != -1 ) { if ( tokens.size() <= pval_column ) continue; if ( ! from_string<double>( pvalue, tokens[pval_column] , std::dec)) continue; if ( par::pfilter && pvalue > par::pfvalue ) continue; } if ( par::extract_set ) { if ( tokens.size() <= snp_column ) continue; if ( extractSNP.find( tokens[snp_column] ) == extractSNP.end() ) continue; } int thisChr = -1; int thisBP = -1; if ( ! from_string<int>( thisChr, tokens[chr_column] , std::dec)) continue; if ( ! from_string<int>( thisBP, tokens[bp_column] , std::dec)) continue; // Do we need to store this? i.e. what ranges is it actually in? // This information is in snp2range Range r1(thisChr,thisBP,thisBP,"dummy"); set<Range*> implicated = rangeIntersect(r1,ranges); set<Range*>::iterator ri = implicated.begin(); while ( ri != implicated.end() ) { string distance = dbl2str(( thisBP - ((*ri)->start + par::make_set_border)) /1000.00 , 4 ) + "kb" ; if ( annotatedResults.find( *ri ) == annotatedResults.end() ) { vector<string> t(2); t[0] = distance; t[1] = sline; annotatedResults.insert(make_pair( (Range *)(*ri) , t ) ); } else { vector<string> & v = annotatedResults.find( *ri )->second; v.push_back(distance); v.push_back(sline); } ++ri; } // Read next line of results } // Iterate through these -- they will be in genomic order, hopefully map<string, set<Range> >::iterator ri = ranges.begin(); while ( ri != ranges.end() ) { set<Range>::iterator si = ri->second.begin(); while ( si != ri->second.end() ) { bool displayed = false; map<Range*,vector<string> >::iterator ari; ari = annotatedResults.find( (Range *)&(*si) ); if ( ari != annotatedResults.end() ) { for (int l=0; l< ari->second.size(); l+=2) { if ( ! displayed ) { GREP << ri->first << " -- chr" << chromosomeName( si->chr ) << ":" << si->start << ".." << si->stop << " ( " << (si->stop - si->start ) / 1000.00 << "kb ) "; if ( par::make_set_border > 0 ) GREP << " including " << par::make_set_border/1000.00 << "kb border "; GREP << "\n\n" << setw(12) << "DIST" << " " << headerline << "\n"; displayed = true; } GREP << setw(12) << ari->second[l] << " " << ari->second[l+1] << "\n"; } } if ( ! displayed ) { if ( par::greport_display_empty ) { GREP << ri->first << " -- chr" << chromosomeName( si->chr ) << ":" << si->start << ".." << si->stop << " ( " << (si->stop - si->start ) / 1000.00 << "kb ) "; if ( par::make_set_border > 0 ) GREP << " including " << par::make_set_border/1000.00 << "kb border "; GREP << " { nothing to report }\n\n"; } } else GREP << "\n\n"; ++si; } ++ri; } RESIN.close(); GREP.close(); if ( ! par::silent ) cout << "\n"; printLOG("Writing per-range report to [ " + par::output_file_name + ".range.report ]\n"); shutdown(); }
void vcf_file::output_switch_error(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Outputting Phase Switch Errors...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); string CHROM, vcf_line; int POS; int s1, s2, indv1, indv2; string output_file = output_file_prefix + ".diff.switch"; ofstream switcherror(output_file.c_str()); if (!switcherror.is_open()) error("Could not open Switch Error file: " + output_file, 4); switcherror << "CHROM\tPOS\tINDV" << endl; unsigned int N_combined_indv = combined_individuals.size(); vector<int> N_phased_het_sites(N_combined_indv, 0); vector<int> N_switch_errors(N_combined_indv, 0); pair<string, string> missing_genotype(".","."); vector<pair<string, string> > prev_geno_file1(N_combined_indv, missing_genotype); vector<pair<string, string> > prev_geno_file2(N_combined_indv, missing_genotype); pair<string, string> file1_hap1, file1_hap2, file2_hap1; for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { CHROM = CHROMPOS_to_filepos_pair_it->first.first; POS = CHROMPOS_to_filepos_pair_it->first.second; s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } e1.parse_basic_entry(true); e2.parse_basic_entry(true); e1.parse_full_entry(true); e1.parse_genotype_entries(true); e2.parse_full_entry(true); e2.parse_genotype_entries(true); pair<string, string> genotype1, genotype2; pair<string, string> missing_genotype(".","."); unsigned int N_common_called=0; // Number of genotypes called in both files unsigned int indv_count=0; // Bug fix applied (#3354189) - July 5th 2011 for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it, indv_count++) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if ((indv1 == -1) || (indv2 == -1)) continue; // Individual not found in one of the files e1.get_indv_GENOTYPE_strings(indv1, genotype1); e2.get_indv_GENOTYPE_strings(indv2, genotype2); if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype)) { // No missing data N_common_called++; if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) || ((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) ) { // Have a matching genotypes in files 1 and 2 if (genotype1.first != genotype1.second) { // It's a heterozgote char phase1, phase2; phase1 = e1.get_indv_PHASE(indv1); phase2 = e2.get_indv_PHASE(indv2); if ((phase1 == '|') && (phase2 == '|')) { // Calculate Phasing error (switch error) N_phased_het_sites[indv_count]++; file1_hap1 = make_pair<string,string>(prev_geno_file1[indv_count].first, genotype1.first); file1_hap2 = make_pair<string,string>(prev_geno_file1[indv_count].second, genotype1.second); file2_hap1 = make_pair<string,string>(prev_geno_file2[indv_count].first, genotype2.first); if ((file2_hap1 != file1_hap1) && (file2_hap1 != file1_hap2)) { // Must be a switch error string indv_id; N_switch_errors[indv_count]++; if (indv1 != -1) indv_id = indv[indv1]; else indv_id = diff_vcf_file.indv[indv2]; switcherror << CHROM << "\t" << POS << "\t" << indv_id << endl; } prev_geno_file1[indv_count] = genotype1; prev_geno_file2[indv_count] = genotype2; } } } } } } switcherror.close(); output_file = output_file_prefix + ".diff.indv.switch"; ofstream idiscord(output_file.c_str()); if (!idiscord.is_open()) error("Could not open Individual Discordance File: " + output_file, 3); idiscord << "INDV\tN_COMMON_PHASED_HET\tN_SWITCH\tSWITCH" << endl; unsigned int indv_count=0; double switch_error; string indv_id; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if (indv1 != -1) indv_id = indv[indv1]; else indv_id = diff_vcf_file.indv[indv2]; if (N_phased_het_sites[indv_count] > 0) switch_error = double(N_switch_errors[indv_count]) / N_phased_het_sites[indv_count]; else switch_error = 0; idiscord << indv_id << "\t" << N_phased_het_sites[indv_count] << "\t" << N_switch_errors[indv_count] << "\t" << switch_error << endl; indv_count++; } idiscord.close(); }
void vcf_file::output_discordance_by_indv(const string &output_file_prefix, vcf_file &diff_vcf_file) { printLOG("Outputting Discordance By Individual...\n"); map<pair<string, int>, pair<int, int> > CHROMPOS_to_filepos_pair; map<pair<string, int>, pair<int, int> >::iterator CHROMPOS_to_filepos_pair_it; return_site_union(diff_vcf_file, CHROMPOS_to_filepos_pair); map<string, pair< int, int> > combined_individuals; map<string, pair< int, int> >::iterator combined_individuals_it; return_indv_union(diff_vcf_file, combined_individuals); map<string, pair<int, int> > indv_sums; string vcf_line, CHROM; int POS; int s1, s2, indv1, indv2; for (CHROMPOS_to_filepos_pair_it=CHROMPOS_to_filepos_pair.begin(); CHROMPOS_to_filepos_pair_it != CHROMPOS_to_filepos_pair.end(); ++CHROMPOS_to_filepos_pair_it) { CHROM = CHROMPOS_to_filepos_pair_it->first.first; POS = CHROMPOS_to_filepos_pair_it->first.second; s1 = CHROMPOS_to_filepos_pair_it->second.first; s2 = CHROMPOS_to_filepos_pair_it->second.second; vcf_entry e1(N_indv); vcf_entry e2(diff_vcf_file.N_indv); // Read entries from file (if available) if (s1 != -1) { get_vcf_entry(s1, vcf_line); e1.reset(vcf_line); } if (s2 != -1) { diff_vcf_file.get_vcf_entry(s2, vcf_line); e2.reset(vcf_line); } e1.parse_basic_entry(true); e2.parse_basic_entry(true); // Set the reference to the non-missing entry (if available) string REF = e1.get_REF(); string REF2 = e2.get_REF(); if (REF == "N") REF = REF2; if (REF2 == "N") REF2 = REF; if (REF.size() != REF2.size()) { warning("REF sequences at " + CHROM + ":" + int2str(POS) + " are not comparable. Skipping site"); continue; } if ((REF != REF2) && (REF2 != "N") && (REF != "N")) warning("Non-matching REF " + CHROM + ":" + int2str(POS) + " " + REF + "/" + REF2); // Do the alternative alleles match? string ALT, ALT2; ALT = e1.get_ALT(); ALT2 = e2.get_ALT(); bool alleles_match = (ALT == ALT2) && (REF == REF2); e1.parse_full_entry(true); e1.parse_genotype_entries(true); e2.parse_full_entry(true); e2.parse_genotype_entries(true); pair<string, string> genotype1, genotype2; pair<int,int> geno_ids1, geno_ids2; pair<string, string> missing_genotype(".","."); pair<int, int> missing_id(-1,-1); for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { indv1 = combined_individuals_it->second.first; indv2 = combined_individuals_it->second.second; if ((indv1 == -1) || (indv2 == -1)) continue; // Individual not found in one of the files if (alleles_match) { // Alleles match, so can compare ids instead of strings e1.get_indv_GENOTYPE_ids(indv1, geno_ids1); e2.get_indv_GENOTYPE_ids(indv2, geno_ids2); if ((geno_ids1 != missing_id) && (geno_ids2 != missing_id)) { indv_sums[combined_individuals_it->first].first++; if (((geno_ids1.first == geno_ids2.first) && (geno_ids1.second == geno_ids2.second)) || ((geno_ids1.first == geno_ids2.second) && (geno_ids1.second == geno_ids2.first)) ) { // Match // Don't do anything } else { // Mismatch indv_sums[combined_individuals_it->first].second++; } } else if ((geno_ids1 == missing_id) && (geno_ids2 == missing_id)) { // Both missing // Don't do anything. } else if (geno_ids1 != missing_id) { // Genotype 1 is not missing, genotype 2 is. // Don't do anything. } else if (geno_ids2 != missing_id) { // Genotype 2 is not missing, genotype 1 is. // Don't do anything. } else error("Unknown condition"); } else { // Alleles don't match, so need to be more careful and compare strings e1.get_indv_GENOTYPE_strings(indv1, genotype1); e2.get_indv_GENOTYPE_strings(indv2, genotype2); if ((genotype1 != missing_genotype) && (genotype2 != missing_genotype)) { // No missing data indv_sums[combined_individuals_it->first].first++; if (((genotype1.first == genotype2.first) && (genotype1.second == genotype2.second)) || ((genotype1.first == genotype2.second) && (genotype1.second == genotype2.first)) ) { // Match // Don't do anything } else { // Mismatch indv_sums[combined_individuals_it->first].second++; } } else if ((genotype1 == missing_genotype) && (genotype2 == missing_genotype)) { // Both missing // Don't do anything } else if (genotype1 != missing_genotype) { // Genotype 1 is not missing, genotype 2 is. // Don't do anything } else if (genotype2 != missing_genotype) { // Genotype 2 is not missing, genotype 1 is. // Don't do anything } else error("Unknown condition"); } } } string output_file = output_file_prefix + ".diff.indv"; ofstream out(output_file.c_str()); if (!out.is_open()) error("Could not open Sites Differences File: " + output_file, 3); out << "INDV\tN_COMMON_CALLED\tN_DISCORD\tDISCORDANCE" << endl; int N, N_discord; double discordance; for (combined_individuals_it=combined_individuals.begin(); combined_individuals_it!=combined_individuals.end(); ++combined_individuals_it) { out << combined_individuals_it->first; N = indv_sums[combined_individuals_it->first].first; N_discord = indv_sums[combined_individuals_it->first].second; discordance = N_discord / double(N); out << "\t" << N << "\t" << N_discord << "\t" << discordance << endl; } out.close(); }
void Plink::calcHomog() { if (!par::SNP_major) Ind2SNP(); string f = par::output_file_name + ".homog"; ofstream MHOUT; MHOUT.open(f.c_str(),ios::out); MHOUT.precision(4); if (nk==0) error("No clusters (K=0)... cannot perform CMH tests"); printLOG("Homogeneity of odds ratio test, K = " + int2str(nk) + "\n"); if (nk<2) { printLOG("** Warning ** less then 2 clusters specified... \n"); printLOG(" cannot compute between-cluster effects ** \n"); return; } if (nk>10) printLOG("** Warning ** statistics can be unreliable if strata have small N ** \n"); printLOG("Writing results to [ " + f + " ]\n"); MHOUT << setw(4) << "CHR" << " " << setw(par::pp_maxsnp) << "SNP" << " " << setw(4) << "A1" << " " << setw(4) << "A2" << " " << setw(8) << "F_A" << " " << setw(8) << "F_U" << " " << setw(8) << "N_A" << " " << setw(8) << "N_U" << " " << setw(8) << "TEST" << " " << setw(10) << "CHISQ" << " " << setw(4) << "DF" << " " << setw(10) << "P" << " " << setw(10) << "OR" << "\n"; /////////////////////////////////// // Create boolean affection coding affCoding(*this); ////////////////////////////////// // Any individual not assigned to a cluster, // making missing phenotype vector<Individual*>::iterator person = sample.begin(); while ( person != sample.end() ) { if ( (*person)->sol < 0 ) (*person)->missing = true; person++; } /////////////////////////////// // Iterate over SNPs vector<CSNP*>::iterator s = SNP.begin(); int l=0; while ( s != SNP.end() ) { // Uncomment this if we allow permutation for the CMH // tests // In adaptive mode, possibly skip this test // if (par::adaptive_perm && (!perm.snp_test[l])) // { // s++; // l++; // continue; // } // Calculate statistic vector<double> n_11(nk,0); vector<double> n_12(nk,0); vector<double> n_21(nk,0); vector<double> n_22(nk,0); vector<double> lnOR(nk,0); vector<double> SEsq(nk,0); ///////////////// // Autosomal or haploid? bool X=false, haploid=false; if (par::chr_sex[locus[l]->chr]) X=true; else if (par::chr_haploid[locus[l]->chr]) haploid=true; ///////////////////////////// // Iterate over individuals vector<bool>::iterator i1 = (*s)->one.begin(); vector<bool>::iterator i2 = (*s)->two.begin(); vector<Individual*>::iterator gperson = sample.begin(); while ( gperson != sample.end() ) { // Phenotype for this person (i.e. might be permuted) Individual * pperson = (*gperson)->pperson; // SNP alleles bool s1 = *i1; bool s2 = *i2; int hom = 2; if ( haploid || ( X && (*gperson)->sex ) ) hom = 1; // Affected individuals if ( pperson->aff && !pperson->missing ) { // Allelic marginal if ( !s1 ) { if ( !s2 ) // FF hom { n_11[ pperson->sol ] += hom ; } else { n_11[ pperson->sol ]++ ; // FT het n_12[ pperson->sol ]++ ; } } else { if ( !s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_12[ pperson->sol ] += hom ; } } } else if ( ! pperson->missing ) // Unaffecteds { // Allelic marginal if ( ! s1 ) { if ( ! s2 ) // FF { n_21[ pperson->sol ] += hom ; } else { n_21[ pperson->sol ] ++ ; n_22[ pperson->sol ] ++ ; } } else { if ( ! s2 ) // FT { gperson++; i1++; i2++; continue; // skip missing genotypes } else // TT { n_22[ pperson->sol ] += hom ; } } } // Next individual gperson++; i1++; i2++; } // Calculate log(OR) and SE(ln(OR)) for eacsh strata double X_total = 0; double X_assoc1 = 0; double X_assoc2 = 0; vector<double> X_indiv(nk,0); for (int k=0; k<nk; k++) { // Add 0.5 to each cell to reduce bias n_11[k] += 0.5; n_12[k] += 0.5; n_21[k] += 0.5; n_22[k] += 0.5; // ln(OR) lnOR[k] = log ( ( n_11[k] * n_22[k] ) / ( n_12[k] * n_21[k] ) ); SEsq[k] = 1/n_11[k] + 1/n_12[k] + 1/n_21[k] + 1/n_22[k] ; X_indiv[k] = (lnOR[k] * lnOR[k]) / SEsq[k]; X_total += X_indiv[k]; // For the common, strata-adjusted test X_assoc1 += lnOR[k] / SEsq[k]; X_assoc2 += 1/ SEsq[k]; } // X_total is total chi-square on nk df // X_indiv are individual chi-squares, each on 1 df // X_homog is test for homogeneity of OR, with nk-1 df // X_assoc is strata-adjusted test, with 1 df double X_assoc = (X_assoc1*X_assoc1)/X_assoc2; double X_homog = X_total - X_assoc; MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(6) << "TOTAL" << " " << setw(10) << X_total << " " << setw(4) << nk << " " << setw(10) << chiprobP(X_total,nk) << " " << setw(10) << "NA" << "\n"; MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(6) << "ASSOC" << " " << setw(10) << X_assoc << " " << setw(4) << 1 << " " << setw(10) << chiprobP(X_assoc,1) << " " << setw(10) << "NA" << "\n"; MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(6) << "HOMOG" << " " << setw(10) << X_homog << " " << setw(4) << nk-1 << " " << setw(10) << chiprobP(X_homog,nk-1) << " " << setw(10) << "NA" << "\n"; for (int k=0; k<nk; k++) { if ( n_11[k] + n_12[k] <= 1.0001 || n_21[k] + n_22[k] <= 1.0001 ) { MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << "NA" << " " << setw(8) << "NA" << " " << setw(8) << n_11[k] + n_12[k] - 1 << " " << setw(8) << n_21[k] + n_22[k] - 1 << " " << setw(6) << kname[k] << " " << setw(10) << "NA" << " " << setw(4) << "NA" << " " << setw(10) << "NA" << " " << setw(10) << "NA" << "\n"; } else { MHOUT << setw(4) << locus[l]->chr << " " << setw(par::pp_maxsnp) << locus[l]->name << " " << setw(4) << locus[l]->allele1 << " " << setw(4) << locus[l]->allele2 << " " << setw(8) << n_11[k]/double(n_11[k]+n_12[k]) << " " << setw(8) << n_21[k]/double(n_21[k]+n_22[k]) << " " << setw(8) << n_11[k] + n_12[k] - 1 << " " << setw(8) << n_21[k] + n_22[k] - 1 << " " << setw(6) << kname[k] << " " << setw(10) << X_indiv[k] << " " << setw(4) << 1 << " " << setw(10) << chiprobP(X_indiv[k],1) << " "; double odr = ( n_11[k] * n_22[k] ) / ( n_12[k] * n_21[k] ); if ( realnum(odr) ) MHOUT << setw(10) << odr << "\n"; else MHOUT << setw(10) << "NA" << "\n"; } } // Next locus s++; l++; } MHOUT.close(); }