void Bam2FastQ::closeFiles() { // NULL out any duplicate file pointers // so files are only closed once. if(myFirstFile == myUnpairedFile) { myFirstFile = NULL; } if(mySecondFile == myUnpairedFile) { mySecondFile = NULL; } if(mySecondFile == myFirstFile) { mySecondFile = NULL; } if(myUnpairedFile != NULL) { ifclose(myUnpairedFile); myUnpairedFile = NULL; } if(myFirstFile != NULL) { ifclose(myFirstFile); myFirstFile = NULL; } if(mySecondFile != NULL) { ifclose(mySecondFile); mySecondFile = NULL; } }
void Bam2FastQ::closeFiles() { // NULL out any duplicate file pointers // so files are only closed once. if(myFirstFile == myUnpairedFile) { myFirstFile = NULL; } if(mySecondFile == myUnpairedFile) { mySecondFile = NULL; } if(mySecondFile == myFirstFile) { mySecondFile = NULL; } if(myUnpairedFile != NULL) { ifclose(myUnpairedFile); myUnpairedFile = NULL; } if(myFirstFile != NULL) { ifclose(myFirstFile); myFirstFile = NULL; } if(mySecondFile != NULL) { ifclose(mySecondFile); mySecondFile = NULL; } if(myFqList != NULL) { ifclose(myFqList); myFqList = NULL; } // Loop through the fastq map and close those files. for (OutFastqMap::iterator it=myOutFastqs.begin(); it!=myOutFastqs.end(); ++it) { ifclose(it->second); it->second = NULL; } myOutFastqs.clear(); }
// Close a FastQFile. FastQStatus::Status FastQFile::closeFile() { int closeStatus = 0; // Success. // If a file has been opened, close it. if(myFile != NULL) { // Close the file. closeStatus = ifclose(myFile); myFile = NULL; } if(closeStatus == 0) { // Success - either there wasn't a file to close or it was closed // successfully. return(FastQStatus::FASTQ_SUCCESS); } else { std::string errorMessage = "Failed to close file: "; errorMessage += myFileName.c_str(); logMessage(errorMessage.c_str()); return(FastQStatus::FASTQ_CLOSE_ERROR); } }
void testReadLine() { IFILE filePtr = ifopen("testFiles/testFile.txt", "rb"); assert(filePtr != NULL); String line = ""; line.ReadLine(filePtr); assert(line == " Hello, I am a testFile. "); line.Trim(); assert(line == "Hello, I am a testFile."); // Does not compile in current version, but compiles in old verison. // This can be added back in to ensure that it will catch the difference // in return value for ReadLine (now: int; used to be: string&) // testMethod(line.ReadLine(filePtr)); line.ReadLine(filePtr); assert(temp1 == 0); testMethod(line); assert(temp1 == 1); // line.ReadLine(filePtr).Trim(); line.ReadLine(filePtr); line.Trim(); assert(line == "ThirdLine."); ifclose(filePtr); }
// Reset variables for each file. void GlfFile::resetFile() { // Close the file. if (myFilePtr != NULL) { // If we already have an open file, close it. // First check to see if an end record needs to be written, which // is the case if the state is RECORD. if(myNextSection == RECORD) { if(!writeRecord(myEndMarker)) { // Failed to write the end marker record. myStatus.setStatus(GlfStatus::FAIL_IO, "Failed to write end of chromosome/section marker."); throw(GlfException(myStatus)); } } ifclose(myFilePtr); myFilePtr = NULL; } myIsOpenForRead = false; myIsOpenForWrite = false; myRecordCount = 0; myStatus = GlfStatus::SUCCESS; myNextSection = HEADER; }
void StringHash::ReadLinesFromFile(const char * filename) { IFILE f = ifopen(filename, "rb"); if (f == NULL) return; ReadLinesFromFile(f); ifclose(f); }
BedFile::~BedFile() { if ( iBimFile != NULL ) { ifclose(iBimFile); } iBimFile = NULL; if ( iFamFile != NULL ) { ifclose(iFamFile); } iFamFile = NULL; if ( iFile != NULL ) { ifclose(iFile); } if ( pBedBuffer != NULL ) { delete[] pBedBuffer; } }
void Imputation::PrintInfoFile(HaplotypeSet &rHap,HaplotypeSet &tHap, ImputationStatistics &stats) { cout<<endl<<" Writing summary (.info) files ... "<<endl; IFILE info = ifopen(outFile + ".info", "wb"); ifprintf(info, "SNP\tREF(0)\tALT(1)\tALT_Frq\tMAF\tAvgCall\tRsq\tGenotyped\tLooRsq\tEmpR\tEmpRsq\tDose0\tDose1\n"); int i=0; for (int index =0; index < rHap.RefTypedTotalCount; index++) { if(rHap.RefTypedIndex[index]==-1) { if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex) { ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t%.5f\t%.5f\t", RsId? rHap.VariantList[i].rsid.c_str(): rHap.VariantList[i].name.c_str(), rHap.VariantList[i].refAlleleString.c_str(), rHap.VariantList[i].altAlleleString.c_str(), stats.AlleleFrequency(i), stats.AlleleFrequency(i) > 0.5 ? 1.0 - stats.AlleleFrequency(i) : stats.AlleleFrequency(i), stats.AverageCallScore(i), stats.Rsq(i)); if (!tHap.missing[i]) { ifprintf(info, "Genotyped\t%.3f\t%.3f\t%.5f\t%.5f\t%.5f\n", stats.LooRsq(i), stats.EmpiricalR(i), stats.EmpiricalRsq(i), stats.LooMajorDose(i), stats.LooMinorDose(i)); } else ifprintf(info, "Imputed\t-\t-\t-\t-\t-\n"); } i++; } else { variant ThisTypedVariant =tHap.TypedOnlyVariantList[rHap.RefTypedIndex[index]]; ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t-\t-\tTyped_Only\t-\t-\t-\t-\t-\n", RsId? ThisTypedVariant.rsid.c_str(): ThisTypedVariant.name.c_str(), ThisTypedVariant.refAlleleString.c_str(), ThisTypedVariant.altAlleleString.c_str(), tHap.AlleleFreq[rHap.RefTypedIndex[index]], tHap.AlleleFreq[rHap.RefTypedIndex[index]] > 0.5 ? 1.0 - tHap.AlleleFreq[rHap.RefTypedIndex[index]] : tHap.AlleleFreq[rHap.RefTypedIndex[index]]); } } ifclose(info); cout<<endl<<" Summary information written to : "<<outFile<<".info"<<endl; }
void Imputation::FlushPartialVcf(HaplotypeSet &rHap,HaplotypeSet &tHap,HaplotypeSet &PartialDosage, string &filename,int &Index) { string tempFileIndex(outFile),tempFileIndex1(outFile); IFILE vcfdosepartial = ifopen(filename.c_str(), "wb", InputFile::BGZF); for(int hapId=0;hapId<(int)PartialDosage.individualName.size();hapId++) { ifprintf(vcfdosepartial,"\t%s",PartialDosage.individualName[hapId].c_str()); } ifprintf(vcfdosepartial,"\n"); int i=0; for (int index =0; index < rHap.RefTypedTotalCount; index++) { if(rHap.RefTypedIndex[index]==-1) { if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex) { bool majorIsReference=false; if(!rHap.major[i]) majorIsReference=true; if(!tHap.AllMaleTarget) PartialDosage.PrintDosageForVcfOutputForID(vcfdosepartial,i, majorIsReference,rHap.VariantList[i].refAllele); else PartialDosage.PrintDosageForVcfOutputForIDMaleSamples(vcfdosepartial,i, majorIsReference,rHap.VariantList[i].refAllele); ifprintf(vcfdosepartial,"\n"); } i++; } else { if(!tHap.AllMaleTarget) PartialDosage.PrintDosageGWASOnlyForVcfOutputForID (tHap,vcfdosepartial,rHap.RefTypedIndex[index]); else PartialDosage.PrintDosageGWASOnlyForVcfOutputForIDMaleSamples (tHap,vcfdosepartial,rHap.RefTypedIndex[index]); ifprintf(vcfdosepartial,"\n"); } } ifclose(vcfdosepartial); }
VcfFile::~VcfFile() { // Close the file. if (myFilePtr != NULL) { // If we already have an open file, close it. ifclose(myFilePtr); myFilePtr = NULL; } }
void GCContent::LoadRegions(String & regionsFile, GenomeSequence &genome, bool invertRegion) { if(regionsFile.Length()==0) return; if(genome.sequenceLength()==0) error("No reference genome loaded!\n"); IFILE fhRegions; fhRegions = ifopen(regionsFile.c_str(),"r"); if(fhRegions==NULL) error("Open regions file %s failed!\n", regionsFile.c_str()); regionIndicator.resize(genome.sequenceLength()); StringArray tokens; String buffer; int len; fprintf(stderr, "Loading region list..."); while (!ifeof(fhRegions)){ buffer.ReadLine(fhRegions); if (buffer.IsEmpty() || buffer[0] == '#') continue; tokens.AddTokens(buffer, WHITESPACE); if(tokens.Length() < 3) continue; genomeIndex_t startGenomeIndex = 0; int chromosomeIndex = tokens[1].AsInteger(); // use chromosome name (token[0]) and position (token[1]) to query genome index. startGenomeIndex = genome.getGenomePosition(tokens[0].c_str(), chromosomeIndex); if(startGenomeIndex >= regionIndicator.size() ) { //fprintf(stderr, "WARNING: region list section %s position %u is not found in the reference and skipped...\n", tokens[0].c_str(), chromosomeIndex); continue; } len = tokens[2].AsInteger() - tokens[1].AsInteger() + 1; for(uint32_t i=startGenomeIndex; i<startGenomeIndex+len; i++) regionIndicator[i] = true; tokens.Clear(); buffer.Clear(); } if (invertRegion) { fprintf(stderr, " invert region..."); for (uint32_t i = 0; i < regionIndicator.size(); i++) { regionIndicator[i] = !regionIndicator[i]; } } ifclose(fhRegions); fprintf(stderr, "DONE!\n"); }
void MarkovParameters::WriteErrorRates(StringArray & markerNames, const char * filename) { IFILE output = ifopen(filename, "wb"); if (output == NULL) return; ifprintf(output, "MarkerName\tErrorRate\n"); for (int i = 0; i < markers; i++) ifprintf(output, "%s\t%.5g\n", (const char *) markerNames[i], E[i]); ifclose(output); }
void glfHandler::Rewind() { if (isOpen()) { ifrewind(handle); if (!ReadHeader()) ifclose(handle); endOfSection = true; } }
bool StringAlias::ReadFromFile(const char * filename) { IFILE input = ifopen(filename, "rt"); if (input == NULL) return false; ReadFromFile(input); ifclose(input); return true; }
void MarkovParameters::WriteCrossoverRates(StringArray & markerNames, const char * filename) { IFILE output = ifopen(filename, "wb"); if (output == NULL) return; ifprintf(output, "Interval\tSwitchRate\n"); for (int i = 0; i < markers - 1; i++) ifprintf(output, "%s-%s\t%.5g\n", (const char *) markerNames[i], (const char *) markerNames[i+1], R[i]); ifclose(output); }
void VcfFile::reset() { // Reset the child class. resetFile(); // Close the file. if (myFilePtr != NULL) { // If we already have an open file, close it. ifclose(myFilePtr); myFilePtr = NULL; } myNumRecords = 0; }
EXTERN_C void sread_atf(HDRTYPE* hdr) { if (VERBOSE_LEVEL>6) fprintf(stdout,"SREAD ATF [%i,%i]\n",(unsigned)hdr->NRec, (unsigned)hdr->SPR); if (hdr->AS.rawdata != NULL) return; //if (hdr->NRec * hdr->SPR > 0) hdr->AS.rawdata = malloc(hdr->NRec * hdr->SPR * hdr->AS.bpb); ifseek(hdr, hdr->HeadLen, SEEK_SET); size_t ll; char *line = NULL; if (VERBOSE_LEVEL>6) fprintf(stdout,"SREAD ATF\n"); size_t ln = 0; while (~ifeof(hdr)) { if (line!=NULL) { free(line); line=NULL; } // allocate line buffer as needed ssize_t nc = getline(&line, &ll, hdr->FILE.FID); if (nc < 0) break; if (VERBOSE_LEVEL>8) fprintf(stdout,"SREAD ATF 2 %i\t<%s>\n",(unsigned)ln,line ); if ((hdr->NRec * hdr->SPR) <= (ln+1) ) { hdr->NRec = max(1024, ln*2); hdr->AS.rawdata = realloc(hdr->AS.rawdata, hdr->NRec * hdr->SPR * hdr->AS.bpb); } if (VERBOSE_LEVEL>8) fprintf(stdout,"SREAD ATF 4 %i\t<%s>\n",(unsigned)ln,line ); char *str = strtok(line,"\t"); typeof(hdr->NS) k; for (k = 0; k < hdr->NS; k++) { *(double*)(hdr->AS.rawdata + ln*hdr->AS.bpb + hdr->CHANNEL[k].bi) = strtod(str, &str); // extract next value //str = strtok(NULL,"\t"); } if (VERBOSE_LEVEL>8) fprintf(stdout,"SREAD ATF 6 %i\t<%s>\n",(unsigned)ln,line ); ln++; } free(line); ifclose(hdr); hdr->NRec = ln; hdr->AS.first = 0; hdr->AS.length = hdr->NRec; }
// Reset variables for each file. void SamFile::resetFile() { // Close the file. if (myFilePtr != NULL) { // If we already have an open file, close it. ifclose(myFilePtr); myFilePtr = NULL; } if(myInterfacePtr != NULL) { delete myInterfacePtr; myInterfacePtr = NULL; } myIsOpenForRead = false; myIsOpenForWrite = false; myHasHeader = false; mySortedType = UNSORTED; myPrevReadName.Clear(); myPrevCoord = -1; myPrevRefID = 0; myRecordCount = 0; myStatus = SamStatus::SUCCESS; // Reset indexed bam values. myIsBamOpenForRead = false; myRefID = BamIndex::REF_ID_ALL; myStartPos = -1; myEndPos = -1; myNewSection = false; myOverlapSection = true; myCurrentChunkEnd = 0; myChunksToRead.clear(); if(myBamIndex != NULL) { delete myBamIndex; myBamIndex = NULL; } // If statistics are being generated, reset them. if(myStatistics != NULL) { myStatistics->reset(); } myRefName.clear(); }
void put_cde_slots(IFILE *file, int ifaxq) /*;put_cde_slots*/ { long dpos; dpos = iftell(file); /* get current position */ putnum(file, "n-code_slots", tup_size(CODE_SLOTS)); putnum(file, "n-data-slots", tup_size(DATA_SLOTS)); putnum(file, "n-exception-slots", tup_size(EXCEPTION_SLOTS)); put_slot(file, CODE_SLOTS); put_slot(file, DATA_SLOTS); put_slot(file, EXCEPTION_SLOTS); /* now replace word at start of file with long giving offset to *start of information just written. */ file->fh_slots = dpos; ifclose(file); }
void VcfFile::reset() { if ( iFile != NULL ) ifclose(iFile); iFile = NULL; for(int i=0; i < (int) vpVcfInds.size(); ++i) { delete vpVcfInds[i]; } vpVcfInds.clear(); for(int i=0; i < (int) vpVcfMarkers.size(); ++i) { delete vpVcfMarkers[i]; } vpVcfMarkers.clear(); asMetaKeys.Clear(); asMetaValues.Clear(); nNumLines = 0; }
bool glfHandler::Open(const String & filename) { isStub = false; handle = ifopen(filename, "rb"); if (handle == NULL) { isStub = true; return false; } if (!ReadHeader()) ifclose(handle); endOfSection = true; return handle != NULL; }
bool FilterStat::writeMergedVcf(const char* outFile) { IFILE oFile = ifopen(outFile,"wb"); if ( oFile == NULL ) { Logger::gLogger->error("Cannot open output file %s",outFile); } VcfFile vcf; vcf.setSiteOnly(false); vcf.setParseValues(true); vcf.openForRead(sAnchorVcf.c_str(),1); vcf.printVCFHeader(oFile); VcfMarker* pMarker; String STC, STR; for( int i=0; vcf.iterateMarker(); ++i ) { pMarker = vcf.getLastMarker(); int c[FILTER_STAT_COUNTS]; for(int j=0; j < FILTER_STAT_COUNTS; ++j) { c[j] = vCounts[FILTER_STAT_COUNTS*i+j]; } STC.printf("%d,%d,%d,%d,%d,%d",c[0],c[1],c[2],c[3],c[4],c[5]); if ( ( c[0]+c[1] > 4 ) && ( c[1]+c[3] > 4 ) && ( c[0]+c[2] > 4 ) && ( c[1]+c[3] > 4 ) ) { STR.printf("%.2lf",((c[0]+.5)*(c[3]+.5)-(c[1]+.5)*(c[2]+.5))/sqrt((c[0]+c[1]+1.)*(c[2]+c[3]+1.)*(c[0]+c[2]+1.)*(c[1]+c[3]+1.))); } else { STR = "0"; } pMarker->asInfoKeys.Add("STC"); pMarker->asInfoKeys.Add("STR"); pMarker->asInfoValues.Add(STC); pMarker->asInfoValues.Add(STR); pMarker->printVCFMarker(oFile,false); } ifclose(oFile); return true; }
// Read & parse the specified index file. SamStatus::Status BamIndex::readIndex(const char* filename) { // Reset the index from anything that may previously be set. resetIndex(); IFILE indexFile = ifopen(filename, "rb"); // Failed to open the index file. if(indexFile == NULL) { return(SamStatus::FAIL_IO); } // generate the bam index structure. // Read the magic string. char magic[4]; if(ifread(indexFile, magic, 4) != 4) { // Failed to read the magic ifclose(indexFile); return(SamStatus::FAIL_IO); } // If this is not an index file, set num references to 0. if (magic[0] != 'B' || magic[1] != 'A' || magic[2] != 'I' || magic[3] != 1) { // Not a BAM Index file. ifclose(indexFile); return(SamStatus::FAIL_PARSE); } // It is a bam index file. // Read the number of reference sequences. if(ifread(indexFile, &n_ref, 4) != 4) { // Failed to read. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Size the references. myRefs.resize(n_ref); for(int refIndex = 0; refIndex < n_ref; refIndex++) { // Read each reference. Reference* ref = &(myRefs[refIndex]); // Read the number of bins. if(ifread(indexFile, &(ref->n_bin), 4) != 4) { // Failed to read the number of bins. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_PARSE); } // If there are no bins, then there are no // mapped/unmapped reads. if(ref->n_bin == 0) { ref->n_mapped = 0; ref->n_unmapped = 0; } // Resize the bins so they can be indexed by bin number. ref->bins.resize(ref->n_bin + 1); // Read each bin. for(int binIndex = 0; binIndex < ref->n_bin; binIndex++) { uint32_t binNumber; // Read in the bin number. if(ifread(indexFile, &(binNumber), 4) != 4) { // Failed to read the bin number. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Add the bin to the reference and get the // pointer back so the values can be set in it. Bin* binPtr = &(ref->bins[binIndex]); binPtr->bin = binNumber; // Read in the number of chunks. if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4) { // Failed to read number of chunks. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Read in the chunks. // Allocate space for the chunks. uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk); binPtr->chunks = (Chunk*)malloc(sizeOfChunkList); if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList) { // Failed to read the chunks. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Determine the min/max for this bin if it is not the max bin. if(binPtr->bin != MAX_NUM_BINS) { for(int i = 0; i < binPtr->n_chunk; i++) { if(binPtr->chunks[i].chunk_beg < ref->minChunkOffset) { ref->minChunkOffset = binPtr->chunks[i].chunk_beg; } if(binPtr->chunks[i].chunk_end > ref->maxChunkOffset) { ref->maxChunkOffset = binPtr->chunks[i].chunk_end; } if(binPtr->chunks[i].chunk_end > maxOverallOffset) { maxOverallOffset = binPtr->chunks[i].chunk_end; } } } else { // Mapped/unmapped are the last chunk of the // MAX BIN ref->n_mapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_beg; ref->n_unmapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_end; } } // Read the number of intervals. if(ifread(indexFile, &(ref->n_intv), 4) != 4) { // Failed to read, set to 0. ref->n_intv = 0; // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Allocate space for the intervals and read them. uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t); ref->ioffsets = (uint64_t*)malloc(linearIndexSize); if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize) { // Failed to read the linear index. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } } int32_t numUnmapped = 0; if(ifread(indexFile, &numUnmapped, sizeof(int32_t)) == sizeof(int32_t)) { myUnMappedNumReads = numUnmapped; } // Successfully read the bam index file. ifclose(indexFile); return(SamStatus::SUCCESS); }
int main(int argc, char ** argv) { setbuf(stdout, NULL); time_t start = time(NULL); printf("MiniMac - Imputation into phased haplotypes\n" "(c) 2011 Goncalo Abecasis\n"); #ifdef __VERSION__ printf("VERSION 5.0\n"); #else printf("UNDOCUMENTED RELEASE\n"); #endif int rounds = 5, states = 200, cpus = 0; bool em = false, gzip = false, phased = false; String referenceHaplotypes, referenceSnps; String haplotypes, snps; String prefix("minimac"); String firstMarker, lastMarker; String recombinationRates, errorRates; BEGIN_LONG_PARAMETERS(longParameters) LONG_PARAMETER_GROUP("Reference Haplotypes") LONG_STRINGPARAMETER("refHaps", &referenceHaplotypes) LONG_STRINGPARAMETER("refSnps", &referenceSnps) LONG_PARAMETER_GROUP("Target Haplotypes") LONG_STRINGPARAMETER("haps", &haplotypes) LONG_STRINGPARAMETER("snps", &snps) LONG_PARAMETER_GROUP("Starting Parameters") LONG_STRINGPARAMETER("rec", &recombinationRates) LONG_STRINGPARAMETER("erate", &errorRates) LONG_PARAMETER_GROUP("Parameter Fitting") LONG_INTPARAMETER("rounds", &rounds) LONG_INTPARAMETER("states", &states) LONG_PARAMETER("em", &em) LONG_PARAMETER_GROUP("Output Files") LONG_STRINGPARAMETER("prefix", &prefix) LONG_PARAMETER("phased", &phased) LONG_PARAMETER("gzip", &gzip) // LONG_PARAMETER_GROUP("Clipping Window") // LONG_STRINGPARAMETER("start", &firstMarker) // LONG_STRINGPARAMETER("stop", &lastMarker) #ifdef _OPENMP LONG_PARAMETER_GROUP("Multi-Threading") LONG_INTPARAMETER("cpus", &cpus) #endif END_LONG_PARAMETERS(); ParameterList pl; pl.Add(new LongParameters("Command Line Options", longParameters)); pl.Read(argc, argv); pl.Status(); #ifdef _OPENMP if (cpus > 0) omp_set_num_threads(cpus); #endif // Read marker list printf("Reading Reference Marker List ...\n"); StringArray refMarkerList; refMarkerList.Read(referenceSnps); // Index markers StringIntHash referenceHash; for (int i = 0; i < refMarkerList.Length(); i++) referenceHash.Add(refMarkerList[i].Trim(), i); printf(" %d Markers in Reference Haplotypes...\n\n", refMarkerList.Length()); // Load reference haplotypes printf("Loading reference haplotypes ...\n"); HaplotypeSet reference; reference.markerCount = refMarkerList.Length(); reference.LoadHaplotypes(referenceHaplotypes); printf(" %d Reference Haplotypes Loaded ...\n\n", reference.count); // Read framework marker list printf("Reading Framework Marker List ...\n"); StringArray markerList; markerList.Read(snps); ClipReference(reference, refMarkerList, referenceHash, markerList, firstMarker, lastMarker); // Crossref Marker Names to Reference Panel Positions IntArray markerIndex; markerIndex.Dimension(markerList.Length()); int matches = 0; for (int i = 0; i < markerList.Length(); i++) { markerIndex[i] = referenceHash.Integer(markerList[i].Trim()); if (markerIndex[i] >= 0) matches++; } printf(" %d Markers in Framework Haplotypes Overlap Reference ...\n", matches); if (matches == 0) error("No markers overlap between target and reference\n" "Please check correct reference is being used and markers are named consistently"); printf(" %d Other Markers in Framework Haplotypes Discarded ...\n\n", markerList.Length() - matches); // Check for flips in reference vs. target haplotypes int flips = 0; int previous = -1; for (int i = 0; i < markerIndex.Length(); i++) if (markerIndex[i] >= 0) if (markerIndex[i] < previous) { if (flips++ < 10) printf(" -> Marker %s precedes %s in reference, but follows it in target\n", (const char *) refMarkerList[previous], (const char *) markerList[i]); previous = markerIndex[i]; } if (flips > 10) printf(" -> %d Additional Marker Order Changes Not Listed\n", flips - 10); if (flips) printf(" %d Marker Pairs Change Order in Target vs Framework Haplotypes\n", flips); // Load target haplotypes printf("Loading target haplotypes ...\n"); HaplotypeSet target; target.markerCount = markerList.Length(); target.LoadHaplotypes(haplotypes, true); reference.CalculateFrequencies(); target.CalculateFrequencies(); target.CompareFrequencies(reference, markerIndex, markerList); printf(" %d Target Haplotypes Loaded ...\n\n", target.count); int startIndex = firstMarker.IsEmpty() ? 0 : referenceHash.Integer(firstMarker); int stopIndex = lastMarker.IsEmpty() ? reference.markerCount - 1 : referenceHash.Integer(lastMarker); if (startIndex < 0 || stopIndex < 0) error("Clipping requested, but no position available for one of the endpoints"); printf("Setting up Markov Model...\n\n"); // Setup Markov Model MarkovParameters mp; mp.Allocate(reference.markerCount); if (rounds > 0) printf("Initializing Model Parameters (using %s and up to %d haplotypes)\n", em ? "E-M" : "MCMC", states); // Simple initial estimates of error and recombination rate for (int i = 0; i < reference.markerCount; i++) mp.E[i] = 0.01; for (int i = 0; i < reference.markerCount - 1; i++) mp.R[i] = 0.001; if (mp.ReadErrorRates(errorRates)) printf(" Updated error rates using data in %s ...\n", (const char *) errorRates); if (mp.ReadCrossoverRates(recombinationRates)) printf(" Updated recombination rates using %s ...\n", (const char *) recombinationRates); // Parameter estimation loop for (int round = 0; round < rounds; round++) { printf(" Round %d of Parameter Refinement ...\n", round + 1); int iterations = states < reference.count ? states : reference.count; MarkovModel original; original.CopyParameters(mp); #pragma omp parallel for for (int i = 0; i < iterations; i++) { MarkovModel mm; mm.Allocate(reference.markerCount, reference.count - 1); mm.CopyParameters(original); // Reference leave one out (loo) panel char ** reference_loo = new char * [reference.count - 1]; for (int in = 0, out = 0; in < reference.count; in++) if (in != i) reference_loo[out++] = reference.haplotypes[in]; mm.WalkLeft(reference.haplotypes[i], reference_loo, reference.freq); if (em) mm.CountExpected(reference.haplotypes[i], reference_loo, reference.freq); else { #pragma omp critical { mm.ProfileModel(reference.haplotypes[i], reference_loo, reference.freq); } } delete [] reference_loo; #pragma omp critical mp += mm; } if (round >= rounds / 2) { int iterations = states < target.count ? states : target.count; #pragma omp parallel for for (int i = 0; i < iterations; i++) { MarkovModel mm; mm.Allocate(reference.markerCount, reference.count); mm.CopyParameters(original); // Padded version of target haplotype, including missing sites char * padded = new char [reference.markerCount]; for (int k = 0; k < reference.markerCount; k++) padded[k] = 0; // Copy current haplotype into padded vector for (int j = 0; j < target.markerCount; j++) if (markerIndex[j] >= 0) padded[markerIndex[j]] = target.haplotypes[i][j]; mm.WalkLeft(padded, reference.haplotypes, reference.freq); if (em) mm.CountExpected(padded, reference.haplotypes, reference.freq); else { #pragma omp critical { mm.ProfileModel(padded, reference.haplotypes, reference.freq); } } delete [] padded; #pragma omp critical mp += mm; } } mp.UpdateModel(); double crossovers = 0; for (int i = 0; i < reference.markerCount - 1; i++) crossovers += mp.R[i]; double errors = 0; for (int i = 0; i < reference.markerCount; i++) { double heterozygosity = 1.0 - square(reference.freq[1][i]) - square(reference.freq[2][i]) - square(reference.freq[3][i]) - square(reference.freq[4][i]); errors += mp.E[i] * heterozygosity; } errors /= reference.markerCount + 1e-30; printf(" %.0f mosaic crossovers expected per haplotype\n", crossovers); printf(" %.1f%% of crossovers are due to reference flips\n", mp.empiricalFlipRate * 100.); printf(" %.3g errors in mosaic expected per marker\n", errors); } if (rounds > 0) { printf(" Saving estimated parameters for future use ...\n"); mp.WriteParameters(refMarkerList, prefix, gzip); } printf("\n"); // List the major allele at each location reference.ListMajorAlleles(); printf("Generating Draft .info File ...\n\n"); // Output some basic information IFILE info = ifopen(prefix + ".info.draft", "wt"); ifprintf(info, "SNP\tAl1\tAl2\tFreq1\tGenotyped\n"); for (int i = 0, j = 0; i <= stopIndex; i++) if (i >= startIndex) ifprintf(info, "%s\t%s\t%s\t%.4f\t%s\n", (const char *) refMarkerList[i], reference.MajorAlleleLabel(i), reference.MinorAlleleLabel(i), reference.freq[reference.major[i]][i], j < markerIndex.Length() && i == markerIndex[j] ? (j++, "Genotyped") : "-"); else if (j < markerIndex.Length() && i == markerIndex[j]) j++; ifclose(info); printf("Imputing Genotypes ...\n"); IFILE dosages = ifopen(prefix + ".dose" + (gzip ? ".gz" : ""), "wt"); IFILE hapdose, haps; if (phased) { hapdose = ifopen(prefix + ".hapDose" + (gzip ? ".gz" : ""), "wt"); haps = ifopen(prefix + ".haps" + (gzip ? ".gz" : ""), "wt"); } ImputationStatistics stats(reference.markerCount); // Impute each haplotype #pragma omp parallel for for (int i = 0; i < target.count; i++) { if (i != 0 && target.labels[i] == target.labels[i-1]) continue; MarkovModel mm; mm.Allocate(reference.markerCount, reference.count); mm.ClearImputedDose(); mm.CopyParameters(mp); // Padded version of target haplotype, including missing sites char * padded = new char [reference.markerCount]; for (int j = 0; j < reference.markerCount; j++) padded[j] = 0; int k = i; do { printf(" Processing Haplotype %d of %d ...\n", k + 1, target.count); // Copy current haplotype into padded vector for (int j = 0; j < target.markerCount; j++) if (markerIndex[j] >= 0) padded[markerIndex[j]] = target.haplotypes[k][j]; mm.WalkLeft(padded, reference.haplotypes, reference.freq); mm.Impute(reference.major, padded, reference.haplotypes, reference.freq); #pragma omp critical { stats.Update(mm.imputedHap, mm.leaveOneOut, padded, reference.major); } #pragma omp critical if (phased) { ifprintf(hapdose, "%s\tHAPLO%d", (const char *) target.labels[i], k - i + 1); ifprintf(haps, "%s\tHAPLO%d", (const char *) target.labels[i], k - i + 1); for (int j = startIndex; j <= stopIndex; j++) { ifprintf(hapdose, "\t%.3f", mm.imputedHap[j]); ifprintf(haps, "%s%c", j % 8 == 0 ? " " : "", mm.imputedAlleles[j]); } ifprintf(hapdose, "\n"); ifprintf(haps, "\n"); } k++; } while (k < target.count && target.labels[k] == target.labels[i]); printf(" Outputting Individual %s ...\n", (const char *) target.labels[i]); #pragma omp critical { ifprintf(dosages, "%s\tDOSE", (const char *) target.labels[i]); for (int j = startIndex; j <= stopIndex; j++) ifprintf(dosages, "\t%.3f", mm.imputedDose[j]); ifprintf(dosages, "\n"); } delete [] padded; } ifclose(dosages); if (phased) { ifclose(hapdose); ifclose(haps); } // Output some basic information info = ifopen(prefix + ".info" + (gzip ? ".gz" : ""), "wt"); ifprintf(info, "SNP\tAl1\tAl2\tFreq1\tMAF\tAvgCall\tRsq\tGenotyped\tLooRsq\tEmpR\tEmpRsq\tDose1\tDose2\n"); // Padded version of target haplotype, including missing sites char * padded = new char [reference.markerCount]; for (int k = 0; k < reference.markerCount; k++) padded[k] = 0; // Mark genotyped SNPs in padded vector for (int j = 0; j < target.markerCount; j++) if (markerIndex[j] >= 0) padded[markerIndex[j]] = 1; for (int i = startIndex; i <= stopIndex; i++) { ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t%.5f\t%.5f\t", (const char *) refMarkerList[i], reference.MajorAlleleLabel(i), reference.MinorAlleleLabel(i), stats.AlleleFrequency(i), stats.AlleleFrequency(i) > 0.5 ? 1.0 - stats.AlleleFrequency(i) : stats.AlleleFrequency(i), stats.AverageCallScore(i), stats.Rsq(i)); if (padded[i]) ifprintf(info, "Genotyped\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\n", stats.LooRsq(i), stats.EmpiricalR(i), stats.EmpiricalRsq(i), stats.LooMajorDose(i), stats.LooMinorDose(i)); else ifprintf(info, "-\t-\t-\t-\t-\t-\n"); } ifclose(info); delete [] padded; time_t stop = time(NULL); int seconds = stop - start; printf("\nRun completed in %d hours, %d mins, %d seconds on %s\n\n", seconds / 3600, (seconds % 3600) / 60, seconds % 60, ctime(&stop)); }
void Imputation::MergeFinalVcfAllVariants(HaplotypeSet &rHap,HaplotypeSet &tHap,ImputationStatistics &stats,int MaxIndex) { cout<<" ------------------------------------------------------------------------------"<<endl; cout<<" FINAL VCF MERGE "<<endl; cout<<" ------------------------------------------------------------------------------"<<endl; printf("\n Merging partial VCF files to final output VCF File : %s ",(outFile + ".dose.vcf" + (gzip ? ".gz" : "")).c_str() ); cout<<endl<<endl; IFILE vcfdosepartial = ifopen(outFile + ".dose.vcf" + (gzip ? ".gz" : ""), "a", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED); vector<IFILE> vcfdosepartialList(MaxIndex); for(int i=1;i<=MaxIndex;i++) { string tempFileIndex(outFile); stringstream strs; strs<<(i); tempFileIndex+=(".dose.vcf.part." + (string)(strs.str())+(gzip ? ".gz" : "")); vcfdosepartialList[i-1] = ifopen(tempFileIndex.c_str(), "r"); } string line; for(int i=1;i<=MaxIndex;i++) { line.clear(); vcfdosepartialList[i-1]->readLine(line); ifprintf(vcfdosepartial,"%s",line.c_str()); } int i=0; for (int index =0; index < rHap.RefTypedTotalCount; index++) { //abort(); if(index%10000==0) { printf(" Merging marker %d of %d [%.1f%%] to VCF File ...", index + 1, rHap.RefTypedTotalCount,100*(double)(index + 1)/(int)rHap.RefTypedTotalCount); cout<<endl; } if(rHap.RefTypedIndex[index]==-1) { if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex) { ifprintf(vcfdosepartial,"\n%s\t%d\t%s\t%s\t%s\t.\tPASS\tMAF=%.5f;R2=%.5f", rHap.VariantList[i].chr.c_str(),rHap.VariantList[i].bp, RsId?rHap.VariantList[i].rsid.c_str():rHap.VariantList[i].name.c_str(),rHap.VariantList[i].refAlleleString.c_str(), rHap.VariantList[i].altAlleleString.c_str(),stats.AlleleFrequency(i) > 0.5 ? 1.0 - stats.AlleleFrequency(i) : stats.AlleleFrequency(i),stats.Rsq(i)); if(!tHap.missing[i]) ifprintf(vcfdosepartial,";ER2=%.5f",stats.EmpiricalRsq(i)); ifprintf(vcfdosepartial,"\t%s",GT?(DS?(GP?"GT:DS:GP":"GT:DS"):(GP?"GT:GP":"GT")):(DS?(GP?"DS:GP":"DS"):(GP?"GP":""))); for(int j=1;j<=MaxIndex;j++) { string tempFileIndex(outFile); stringstream strs; strs<<(j); tempFileIndex+=(".dose.vcf.part." + (string)(strs.str()) +(gzip ? ".gz" : "")); line.clear(); vcfdosepartialList[j-1]->readLine(line); ifprintf(vcfdosepartial,"%s",line.c_str()); } } i++; } else { variant ThisTypedVariant =tHap.TypedOnlyVariantList[rHap.RefTypedIndex[index]]; ifprintf(vcfdosepartial,"\n%s\t%d\t%s\t%s\t%s\t.\tPASS\t", ThisTypedVariant.chr.c_str(), ThisTypedVariant.bp, RsId? ThisTypedVariant.rsid.c_str():ThisTypedVariant.name.c_str(), ThisTypedVariant.refAlleleString.c_str(), ThisTypedVariant.altAlleleString.c_str()); ifprintf(vcfdosepartial,"GENOTYPED_ONLY;AN=%d;MAF=%.5f", tHap.TotalSample[rHap.RefTypedIndex[index]], tHap.AlleleFreq[rHap.RefTypedIndex[index]]); //cout<<rHap.RefTypedIndex[index]<<" " <<tHap.TotalSample[rHap.RefTypedIndex[index]]<<" " << tHap.AlleleFreq[rHap.RefTypedIndex[index]]/(double)tHap.TotalSample[rHap.RefTypedIndex[index]]<< endl; ifprintf(vcfdosepartial,"\t%s",GT?(DS?(GP?"GT:DS:GP":"GT:DS"):(GP?"GT:GP":"GT")):(DS?(GP?"DS:GP":"DS"):(GP?"GP":""))); for(int j=1;j<=MaxIndex;j++) { string tempFileIndex(outFile); stringstream strs; strs<<(j); tempFileIndex+=(".dose.vcf.part." + (string)(strs.str()) +(gzip ? ".gz" : "")); line.clear(); vcfdosepartialList[j-1]->readLine(line); ifprintf(vcfdosepartial,"%s",line.c_str()); } // ifprintf(vcfdosepartial,"\n"); } } for(int i=1;i<=MaxIndex;i++) { ifclose(vcfdosepartialList[i-1]); string tempFileIndex(outFile); stringstream strs; strs<<(i); tempFileIndex+=(".dose.vcf.part." + (string)(strs.str())+ (gzip ? ".gz" : "")); remove(tempFileIndex.c_str()); } ifclose(vcfdosepartial); printf("\n Merging Finished ..." ); cout<<endl <<endl; }
int Stats::execute(int argc, char **argv) { // Extract command line arguments. String inFile = ""; String indexFile = ""; bool basic = false; bool noeof = false; bool params = false; bool qual = false; bool phred = false; int maxNumReads = -1; bool unmapped = false; String pBaseQC = ""; String cBaseQC = ""; String regionList = ""; int excludeFlags = 0; int requiredFlags = 0; bool withinRegion = false; int minMapQual = 0; String dbsnp = ""; PosList *dbsnpListPtr = NULL; bool baseSum = false; int bufferSize = PileupHelper::DEFAULT_WINDOW_SIZE; ParameterList inputParameters; BEGIN_LONG_PARAMETERS(longParameterList) LONG_PARAMETER_GROUP("Required Parameters") LONG_STRINGPARAMETER("in", &inFile) LONG_PARAMETER_GROUP("Types of Statistics") LONG_PARAMETER("basic", &basic) LONG_PARAMETER("qual", &qual) LONG_PARAMETER("phred", &phred) LONG_STRINGPARAMETER("pBaseQC", &pBaseQC) LONG_STRINGPARAMETER("cBaseQC", &cBaseQC) LONG_PARAMETER_GROUP("Optional Parameters") LONG_INTPARAMETER("maxNumReads", &maxNumReads) LONG_PARAMETER("unmapped", &unmapped) LONG_STRINGPARAMETER("bamIndex", &indexFile) LONG_STRINGPARAMETER("regionList", ®ionList) LONG_INTPARAMETER("excludeFlags", &excludeFlags) LONG_INTPARAMETER("requiredFlags", &requiredFlags) LONG_PARAMETER("noeof", &noeof) LONG_PARAMETER("params", ¶ms) LONG_PARAMETER_GROUP("Optional phred/qual Only Parameters") LONG_PARAMETER("withinRegion", &withinRegion) LONG_PARAMETER_GROUP("Optional BaseQC Only Parameters") LONG_PARAMETER("baseSum", &baseSum) LONG_INTPARAMETER("bufferSize", &bufferSize) LONG_INTPARAMETER("minMapQual", &minMapQual) LONG_STRINGPARAMETER("dbsnp", &dbsnp) END_LONG_PARAMETERS(); inputParameters.Add(new LongParameters ("Input Parameters", longParameterList)); inputParameters.Read(argc-1, &(argv[1])); // If no eof block is required for a bgzf file, set the bgzf file type to // not look for it. if(noeof) { // Set that the eof block is not required. BgzfFileType::setRequireEofBlock(false); } // Check to see if the in file was specified, if not, report an error. if(inFile == "") { usage(); inputParameters.Status(); // In file was not specified but it is mandatory. std::cerr << "--in is a mandatory argument for stats, " << "but was not specified" << std::endl; return(-1); } // Use the index file if unmapped or regionList is not empty. bool useIndex = (unmapped|| (!regionList.IsEmpty())); // IndexFile is required, so check to see if it has been set. if(useIndex && (indexFile == "")) { // In file was not specified, so set it to the in file // + ".bai" indexFile = inFile + ".bai"; } //////////////////////////////////////// // Setup in case pileup is used. Pileup<PileupElementBaseQCStats> pileup(bufferSize); // Initialize start/end positions. myStartPos = 0; myEndPos = -1; // Open the output qc file if applicable. IFILE baseQCPtr = NULL; if(!pBaseQC.IsEmpty() && !cBaseQC.IsEmpty()) { usage(); inputParameters.Status(); // Cannot specify both types of baseQC. std::cerr << "Cannot specify both --pBaseQC & --cBaseQC." << std::endl; return(-1); } else if(!pBaseQC.IsEmpty()) { baseQCPtr = ifopen(pBaseQC, "w"); PileupElementBaseQCStats::setPercentStats(true); } else if(!cBaseQC.IsEmpty()) { baseQCPtr = ifopen(cBaseQC, "w"); PileupElementBaseQCStats::setPercentStats(false); } if(baseQCPtr != NULL) { PileupElementBaseQCStats::setOutputFile(baseQCPtr); PileupElementBaseQCStats::printHeader(); } if((baseQCPtr != NULL) || baseSum) { PileupElementBaseQCStats::setMapQualFilter(minMapQual); PileupElementBaseQCStats::setBaseSum(baseSum); } if(params) { inputParameters.Status(); } // Open the file for reading. SamFile samIn; if(!samIn.OpenForRead(inFile)) { fprintf(stderr, "%s\n", samIn.GetStatusMessage()); return(samIn.GetStatus()); } samIn.SetReadFlags(requiredFlags, excludeFlags); // Set whether or not basic statistics should be generated. samIn.GenerateStatistics(basic); // Read the sam header. SamFileHeader samHeader; if(!samIn.ReadHeader(samHeader)) { fprintf(stderr, "%s\n", samIn.GetStatusMessage()); return(samIn.GetStatus()); } // Open the bam index file for reading if we are // doing unmapped reads (also set the read section). if(useIndex) { samIn.ReadBamIndex(indexFile); if(unmapped) { samIn.SetReadSection(-1); } if(!regionList.IsEmpty()) { myRegionList = ifopen(regionList, "r"); } } ////////////////////////// // Read dbsnp if specified and doing baseQC if(((baseQCPtr != NULL) || baseSum) && (!dbsnp.IsEmpty())) { // Read the dbsnp file. IFILE fdbSnp; fdbSnp = ifopen(dbsnp,"r"); // Determine how many entries. const SamReferenceInfo& refInfo = samHeader.getReferenceInfo(); int maxRefLen = 0; for(int i = 0; i < refInfo.getNumEntries(); i++) { int refLen = refInfo.getReferenceLength(i); if(refLen >= maxRefLen) { maxRefLen = refLen + 1; } } dbsnpListPtr = new PosList(refInfo.getNumEntries(),maxRefLen); if(fdbSnp==NULL) { std::cerr << "Open dbSNP file " << dbsnp.c_str() << " failed!\n"; } else if(dbsnpListPtr == NULL) { std::cerr << "Failed to init the memory allocation for the dbsnpList.\n"; } else { // Read the dbsnp file. StringArray tokens; String buffer; int position = 0; int refID = 0; // Loop til the end of the file. while (!ifeof(fdbSnp)) { // Read the next line. buffer.ReadLine(fdbSnp); // If it does not have at least 2 columns, // continue to the next line. if (buffer.IsEmpty() || buffer[0] == '#') continue; tokens.AddTokens(buffer); if(tokens.Length() < 2) continue; if(!tokens[1].AsInteger(position)) { std::cerr << "Improperly formatted region line, start position " << "(2nd column) is not an integer: " << tokens[1] << "; Skipping to the next line.\n"; continue; } // Look up the reference name. refID = samHeader.getReferenceID(tokens[0]); if(refID != SamReferenceInfo::NO_REF_ID) { // Reference id was found, so add it to the dbsnp dbsnpListPtr->addPosition(refID, position); } tokens.Clear(); buffer.Clear(); } } ifclose(fdbSnp); } // Read the sam records. SamRecord samRecord; int numReads = 0; ////////////////////// // Setup in case doing a quality count. // Quality histogram. const int MAX_QUAL = 126; const int START_QUAL = 33; uint64_t qualCount[MAX_QUAL+1]; for(int i = 0; i <= MAX_QUAL; i++) { qualCount[i] = 0; } const int START_PHRED = 0; const int PHRED_DIFF = START_QUAL - START_PHRED; const int MAX_PHRED = MAX_QUAL - PHRED_DIFF; uint64_t phredCount[MAX_PHRED+1]; for(int i = 0; i <= MAX_PHRED; i++) { phredCount[i] = 0; } int refPos = 0; Cigar* cigarPtr = NULL; char cigarChar = '?'; // Exclude clips from the qual/phred counts if unmapped reads are excluded. bool qualExcludeClips = excludeFlags & SamFlag::UNMAPPED; ////////////////////////////////// // When not reading by sections, getNextSection returns true // the first time, then false the next time. while(getNextSection(samIn)) { // Keep reading records from the file until SamFile::ReadRecord // indicates to stop (returns false). while(((maxNumReads < 0) || (numReads < maxNumReads)) && samIn.ReadRecord(samHeader, samRecord)) { // Another record was read, so increment the number of reads. ++numReads; // See if the quality histogram should be genereated. if(qual || phred) { // Get the quality. const char* qual = samRecord.getQuality(); // Check for no quality ('*'). if((qual[0] == '*') && (qual[1] == 0)) { // This record does not have a quality string, so no // quality processing is necessary. } else { int index = 0; cigarPtr = samRecord.getCigarInfo(); cigarChar = '?'; refPos = samRecord.get0BasedPosition(); if(!qualExcludeClips && (cigarPtr != NULL)) { // Offset the reference position by any soft clips // by subtracting the queryIndex of this start position. // refPos is now the start position of the clips. refPos -= cigarPtr->getQueryIndex(0); } while(qual[index] != 0) { // Skip this quality if it is clipped and we are skipping clips. if(cigarPtr != NULL) { cigarChar = cigarPtr->getCigarCharOpFromQueryIndex(index); } if(qualExcludeClips && Cigar::isClip(cigarChar)) { // Skip a clipped quality. ++index; // Increment the position. continue; } if(withinRegion && (myEndPos != -1) && (refPos >= myEndPos)) { // We have hit the end of the region, stop processing this // quality string. break; } if(withinRegion && (refPos < myStartPos)) { // This position is not in the target. ++index; // Update the position if this is found in the reference or a clip. if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar)) { ++refPos; } continue; } // Check for valid quality. if((qual[index] < START_QUAL) || (qual[index] > MAX_QUAL)) { if(qual) { std::cerr << "Invalid Quality found: " << qual[index] << ". Must be between " << START_QUAL << " and " << MAX_QUAL << ".\n"; } if(phred) { std::cerr << "Invalid Phred Quality found: " << qual[index] - PHRED_DIFF << ". Must be between " << START_QUAL << " and " << MAX_QUAL << ".\n"; } // Skip an invalid quality. ++index; // Update the position if this is found in the reference or a clip. if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar)) { ++refPos; } continue; } // Increment the count for this quality. ++(qualCount[(int)(qual[index])]); ++(phredCount[(int)(qual[index]) - PHRED_DIFF]); // Update the position if this is found in the reference or a clip. if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar)) { ++refPos; } ++index; } } } // Check the next thing to do for the read. if((baseQCPtr != NULL) || baseSum) { // Pileup the bases for this read. pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr); } } // Done with a section, move on to the next one. // New section, so flush the pileup. pileup.flushPileup(); } // Flush the rest of the pileup. if((baseQCPtr != NULL) || baseSum) { // Pileup the bases. pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr); PileupElementBaseQCStats::printSummary(); ifclose(baseQCPtr); } std::cerr << "Number of records read = " << samIn.GetCurrentRecordCount() << std::endl; if(basic) { std::cerr << std::endl; samIn.PrintStatistics(); } // Print the quality stats. if(qual) { std::cerr << std::endl; std::cerr << "Quality\tCount\n"; for(int i = START_QUAL; i <= MAX_QUAL; i++) { std::cerr << i << "\t" << qualCount[i] << std::endl; } } // Print the phred quality stats. if(phred) { std::cerr << std::endl; std::cerr << "Phred\tCount\n"; for(int i = START_PHRED; i <= MAX_PHRED; i++) { std::cerr << i << "\t" << phredCount[i] << std::endl; } } SamStatus::Status status = samIn.GetStatus(); if(status == SamStatus::NO_MORE_RECS) { // A status of NO_MORE_RECS means that all reads were successful. status = SamStatus::SUCCESS; } return(status); }
void Imputation::performImputation(HaplotypeSet &tHap,HaplotypeSet &rHap, String Golden) { vector<int> optStructure=rHap.optEndPoints; int time_prev = time(0),time_load,vcfSampleIndex=0;; includeGwas=true; MarkovParameters* MP=createEstimates(rHap,tHap,rHap.optEndPoints,1-includeGwas); cout<<" ------------------------------------------------------------------------------"<<endl; cout<<" MAIN IMPUTATION "<<endl; cout<<" ------------------------------------------------------------------------------"<<endl; ImputationStatistics stats(rHap.numMarkers ); IFILE dosages=NULL, hapdose=NULL, haps=NULL,vcfdosepartial=NULL; HaplotypeSet DosageForVcfPartial; DosageForVcfPartial.unphasedOutput=unphasedOutput; DosageForVcfPartial.TypedOnly=tHap.TypedOnly; DosageForVcfPartial.GWASOnlycounter=tHap.GWASOnlycounter; if(tHap.TypedOnly) { printf("\n Calculating Allele Frequency for Typed-Only variants ... "); cout<<endl; tHap.CalculateGWASOnlyFreq(); } cout << "\n Starting Imputation ..."; printf("\n\n Setting up Markov Model for Imputation ..."); cout<<endl<<endl; if (phased && !unphasedOutput) { hapdose = ifopen(outFile + ".hapDose" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED); haps = ifopen(outFile + ".hapLabel" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED); } int maxVcfSample=200,NumVcfWritten=0,NumVcfCreated=0,NovcfParts=1; if((maxVcfSample)>=tHap.numSamples) maxVcfSample=tHap.numSamples; if(vcfOutput) { vcfdosepartial = ifopen(outFile + ".dose.vcf" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED); ifprintf(vcfdosepartial,"##fileformat=VCFv4.1\n"); time_t t = time(0); struct tm * now = localtime( & t ); ifprintf(vcfdosepartial,"##filedate=%d.%d.%d\n",(now->tm_year + 1900),(now->tm_mon + 1) ,now->tm_mday); ifprintf(vcfdosepartial,"##source=Minimac3\n"); if(GT) ifprintf(vcfdosepartial,"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n"); if(tHap.AllMaleTarget) { if(DS) ifprintf(vcfdosepartial,"##FORMAT=<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage (For Male Chr: X) : [P(Alt Allele)]\">\n"); if(GP) ifprintf(vcfdosepartial,"##FORMAT=<ID=GP,Number=2,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0 and 1 (For Male Chr: X) \">\n"); } else { if(DS) ifprintf(vcfdosepartial,"##FORMAT=<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]\">\n"); if(GP) ifprintf(vcfdosepartial,"##FORMAT=<ID=GP,Number=3,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1 \">\n"); } ifprintf(vcfdosepartial,"##INFO=<ID=MAF,Number=1,Type=Float,Description=\"Estimated Alternate Allele Frequency\">\n"); ifprintf(vcfdosepartial,"##INFO=<ID=R2,Number=1,Type=Float,Description=\"Estimated Imputation Accuracy\">\n"); ifprintf(vcfdosepartial,"##INFO=<ID=ER2,Number=1,Type=Float,Description=\"Empirical (Leave-One-Out) R-square (available only for genotyped variants)\">\n"); ifprintf(vcfdosepartial,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"); ifclose(vcfdosepartial); if(!tHap.AllMaleTarget) DosageForVcfPartial.InitializePartialDosageForVcfOutput((2*maxVcfSample),rHap.numMarkers,format); else DosageForVcfPartial.InitializePartialDosageForVcfOutputMaleSamples(maxVcfSample<MaxSample?maxVcfSample:MaxSample,rHap.numMarkers,format); } if(doseOutput) dosages = ifopen(outFile + ".dose" + (gzip ? ".gz" : ""), "wb",(gzip ? InputFile::BGZF:InputFile::UNCOMPRESSED) ); #pragma omp parallel for for(int hapId=0;hapId<MaxSample;hapId++) { if (hapId %2==1) { if(rHap.finChromosome!="X") continue; else if(!tHap.AllMaleTarget) continue; } vector<float> foldedProb,recomProb,noRecomProb, rightProb,probAlleleNoStandardize(8,0.0),tempDoseHap1; vector<bool> tempHap(rHap.numMarkers),tempMissHap(rHap.numMarkers); vector<bool> tempDoseAlleleHap1; MarkovModel MM(tHap,rHap,tHap.missing,rHap.major); MM.CopyParameters(MP); int hapIdIndiv=hapId; do{ MM.initializeMatrices(tHap,rHap,optStructure,rHap.ReducedStructureInfo); printf(" Processing Haplotype %d of %d ...", hapIdIndiv + 1, MaxSample); cout<<endl; MM.ThisHapId=hapIdIndiv; for(int group=1;group<(int)optStructure.size();group++) { MM.foldProbabilities(foldedProb,group-1,rHap.ReducedStructureInfo[group-1],0,refCount); MM.leftNoRecoProb[group-1][0]=foldedProb; if(group==1 && !tHap.missing[0]) if(!tHap.getMissingScaffoldedHaplotype(hapIdIndiv,0)) { Condition(rHap,0,foldedProb,MM.leftNoRecoProb[group-1][0],MM.Error[0], tHap.getScaffoldedHaplotype(hapIdIndiv,0)? rHap.AlleleFreq[0] : 1-rHap.AlleleFreq[0], tHap.getScaffoldedHaplotype(hapIdIndiv,0),MM.backgroundError, foldedProb.size(),rHap.ReducedStructureInfo[0]); } MM.WalkLeft(tHap,hapIdIndiv,MM.leftProb[group-1],MM.leftNoRecoProb[group-1], foldedProb,optStructure[group-1],optStructure[group], rHap.ReducedStructureInfo[group-1],rHap.AlleleFreq); splitFoldedProb(recomProb,MM.leftProb[group-1][optStructure[group]-optStructure[group-1]],MM.leftNoRecoProb[group-1][optStructure[group]-optStructure[group-1]]); MM.unfoldProbabilities(group-1,recomProb,MM.leftNoRecoProb[group-1][optStructure[group]-optStructure[group-1]],foldedProb,0,rHap.ReducedStructureInfo,refCount); } for(int group=optStructure.size()-1;group>0;group--) { MM.foldProbabilities(foldedProb,group-1,rHap.ReducedStructureInfo[group-1],1,refCount); rightProb=foldedProb; noRecomProb=foldedProb; MM.Impute(tHap,foldedProb,hapIdIndiv,MM.leftProb[group-1],MM.leftNoRecoProb[group-1],rightProb,noRecomProb,MM.junctionLeftProb[group-1], MM.junctionRightProb[group],optStructure[group-1], optStructure[group],rHap.ReducedStructureInfo[group-1],1,rHap.AlleleFreq); splitFoldedProb(recomProb,rightProb,noRecomProb); MM.unfoldProbabilities(group-1,recomProb,noRecomProb,foldedProb,1,rHap.ReducedStructureInfo,refCount); } for(int jjj=0;jjj<rHap.numMarkers;jjj++) { tempHap[jjj]=tHap.getScaffoldedHaplotype(hapIdIndiv,jjj); tempMissHap[jjj]=tHap.getMissingScaffoldedHaplotype(hapIdIndiv,jjj); } if(vcfOutput) { if(hapIdIndiv%2==0) { tempDoseHap1= MM.imputedHap; tempDoseAlleleHap1= MM.imputedAlleleNumber; } } #pragma omp critical { stats.Update(MM.imputedHap, MM.leaveOneOut,tempHap,tempMissHap,rHap.major); } #pragma omp critical if (phased && !unphasedOutput) { PrintHaplotypeData(rHap, tHap, hapdose, haps, MM.imputedHap, MM.imputedAlleleNumber, hapIdIndiv, tHap.AllMaleTarget?hapId:hapId/2); } if(tHap.AllMaleTarget) break; hapIdIndiv++; }while(hapIdIndiv<MaxSample && hapIdIndiv%2==1); #pragma omp critical if(doseOutput) { PrintDosageData(rHap, tHap, dosages, MM.imputedDose, tHap.AllMaleTarget?hapId:hapId/2); } #pragma omp critical if(vcfOutput) { printf(" Saving Individual %s for VCF File...\n", tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2].c_str()); if(!tHap.AllMaleTarget) DosageForVcfPartial.SaveDosageForVcfOutputSampleWise(NumVcfCreated-NumVcfWritten, tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2], tempDoseHap1,MM.imputedHap, tempDoseAlleleHap1,MM.imputedAlleleNumber); else DosageForVcfPartial.SaveDosageForVcfOutputSampleWiseChrX(NumVcfCreated-NumVcfWritten, tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2], MM.imputedHap, MM.imputedAlleleNumber); if(DosageForVcfPartial.TypedOnly) { DosageForVcfPartial.SaveIndexForGWASOnlyForVcfOutput(NumVcfCreated-NumVcfWritten, tHap.AllMaleTarget?hapId:hapId/2); } NumVcfCreated++; vcfSampleIndex++; if(NumVcfCreated%maxVcfSample==0 || NumVcfCreated==(tHap.AllMaleTarget?MaxSample:MaxSample/2)) { string PartialVcfFileName(outFile),tempFileIndex1(outFile); stringstream strs; strs<<(NovcfParts); PartialVcfFileName+=(".dose.vcf.part." + (string)(strs.str()) +(gzip ? ".gz" : "")); if(!tHap.AllMaleTarget) printf("\n --->>> Saving samples %d-%d in VCF file : %s ...\n\n", (NumVcfWritten)+1,(MaxSample/2<(NumVcfWritten+maxVcfSample)?MaxSample/2:(NumVcfWritten+maxVcfSample)), PartialVcfFileName.c_str()); else printf("\n --->>> Saving samples %d-%d in VCF file : %s ...\n\n", (NumVcfWritten)+1,(MaxSample<(NumVcfWritten+maxVcfSample)?MaxSample:(NumVcfWritten+maxVcfSample)), PartialVcfFileName.c_str()); //if(NovcfParts==2) // abort(); FlushPartialVcf(rHap,tHap,DosageForVcfPartial,PartialVcfFileName,NovcfParts); if(NumVcfCreated<(tHap.AllMaleTarget?MaxSample:MaxSample/2)) { NovcfParts++; NumVcfWritten+=maxVcfSample; //int gg=maxVcfSample<(((tHap.AllMaleTarget?MaxSample:MaxSample/2))-NumVcfWritten)? //2*maxVcfSample:2*(((tHap.AllMaleTarget?MaxSample:MaxSample/2))-NumVcfWritten); // // //abort(); if(!tHap.AllMaleTarget) DosageForVcfPartial.InitializePartialDosageForVcfOutput(maxVcfSample<(MaxSample/2-NumVcfWritten)?2*maxVcfSample:2*(MaxSample/2-NumVcfWritten),rHap.numMarkers,format); else DosageForVcfPartial.InitializePartialDosageForVcfOutputMaleSamples(maxVcfSample<(MaxSample-NumVcfWritten)?maxVcfSample:(MaxSample-NumVcfWritten),rHap.numMarkers,format); } } } } cout<<endl<<" Imputation Finished ... "<<endl; if (phased && !unphasedOutput) { ifclose(hapdose); ifclose(haps); cout<<endl<<" Haplotype Dosage information written to : "<< outFile + ".hapDose" + (gzip ? ".gz" : "")<<endl; cout<<endl<<" Haplotype Allele information written to : "<< outFile + ".hapLabel" + (gzip ? ".gz" : "")<<endl; } if(doseOutput) { ifclose(dosages); cout<<endl<<" Dosage information written to : "<< outFile + ".dose" + (gzip ? ".gz" : "")<<endl; } PrintInfoFile(rHap,tHap,stats); time_load = time(0) - time_prev; cout << "\n Time taken for imputation = " << time_load << " seconds."<<endl<<endl; if(vcfOutput) MergeFinalVcfAllVariants(rHap,tHap,stats,NovcfParts); }
void glfHandler::Close() { if (isOpen()) ifclose(handle); }
// main function of verifyBamID int execute(int argc, char** argv) { printf("verifyBamID %s -- verify identity and purity of sequence data\n" "(c) 2010-2014 Hyun Min Kang, Goo Jun, and Goncalo Abecasis\n\n", VERSION); VerifyBamIDArgs args; ParameterList pl; BEGIN_LONG_PARAMETERS(longParameters) LONG_PARAMETER_GROUP("Input Files") LONG_STRINGPARAMETER("vcf",&args.sVcfFile) LONG_STRINGPARAMETER("bam",&args.sBamFile) LONG_STRINGPARAMETER("subset",&args.sSubsetInds) LONG_STRINGPARAMETER("smID",&args.sSMID) LONG_PARAMETER_GROUP("VCF analysis options") LONG_DOUBLEPARAMETER("genoError",&args.genoError) LONG_DOUBLEPARAMETER("minAF",&args.minAF) LONG_DOUBLEPARAMETER("minCallRate",&args.minCallRate) LONG_PARAMETER_GROUP("Individuals to compare with chip data") EXCLUSIVE_PARAMETER("site",&args.bSiteOnly) EXCLUSIVE_PARAMETER("self",&args.bSelfOnly) EXCLUSIVE_PARAMETER("best",&args.bFindBest) LONG_PARAMETER_GROUP("Chip-free optimization options") EXCLUSIVE_PARAMETER("free-none",&args.bFreeNone) EXCLUSIVE_PARAMETER("free-mix",&args.bFreeMixOnly) EXCLUSIVE_PARAMETER("free-refBias",&args.bFreeRefBiasOnly) EXCLUSIVE_PARAMETER("free-full",&args.bFreeFull) LONG_PARAMETER_GROUP("With-chip optimization options") EXCLUSIVE_PARAMETER("chip-none",&args.bChipNone) EXCLUSIVE_PARAMETER("chip-mix",&args.bChipMixOnly) EXCLUSIVE_PARAMETER("chip-refBias",&args.bChipRefBiasOnly) EXCLUSIVE_PARAMETER("chip-full",&args.bChipFull) LONG_PARAMETER_GROUP("BAM analysis options") LONG_PARAMETER("ignoreRG",&args.bIgnoreRG) LONG_PARAMETER("ignoreOverlapPair",&args.bIgnoreOverlapPair) LONG_PARAMETER("noEOF",&args.bNoEOF) LONG_PARAMETER("precise",&args.bPrecise) LONG_INTPARAMETER("minMapQ",&args.minMapQ) LONG_INTPARAMETER("maxDepth",&args.maxDepth) LONG_INTPARAMETER("minQ",&args.minQ) LONG_INTPARAMETER("maxQ",&args.maxQ) LONG_DOUBLEPARAMETER("grid",&args.grid) LONG_PARAMETER_GROUP("Modeling Reference Bias") LONG_DOUBLEPARAMETER("refRef",&args.pRefRef) LONG_DOUBLEPARAMETER("refHet",&args.pRefHet) LONG_DOUBLEPARAMETER("refAlt",&args.pRefAlt) LONG_PARAMETER_GROUP("Output options") LONG_STRINGPARAMETER("out",&args.sOutFile) LONG_PARAMETER("verbose",&args.bVerbose) LONG_PHONEHOME(VERSION) END_LONG_PARAMETERS(); pl.Add(new LongParameters("Available Options",longParameters)); pl.Read(argc, argv); pl.Status(); // check the validity of input files if ( args.sVcfFile.IsEmpty() ) { error("--vcf [vcf file] required"); } if ( args.sBamFile.IsEmpty() ) { error("--bam [bam file] is required"); } if ( args.sOutFile.IsEmpty() ) { error("--out [output prefix] is required"); } Logger::gLogger = new Logger((args.sOutFile + ".log").c_str(), args.bVerbose); if ( ! ( args.bSiteOnly || args.bSelfOnly || args.bFindBest ) ) { warning("--self option was autotomatically turned on by default. Specify --best option if you wanted to check across all possible samples in the VCF"); args.bSelfOnly = true; } if ( ( args.maxDepth > 20 ) && ( !args.bPrecise ) ) { warning("--precise option is not turned on at --maxDepth %d : may be prone to precision errors",args.maxDepth); } if ( ( args.bChipRefBiasOnly ) && ( !args.bSelfOnly ) ) { error("--self must be set for --chip-refBias to work. Skipping.."); } // check timestamp time_t t; time(&t); Logger::gLogger->writeLog("Analysis started on %s",ctime(&t)); // load arguments VerifyBamID vbid(&args); // load input VCF and BAM files Logger::gLogger->writeLog("Opening Input Files"); vbid.loadFiles(args.sBamFile.c_str(), args.sVcfFile.c_str()); // Check which genotype-free method is used if ( args.bFreeNone ) { // if no genotype-free mode is tested. skip it // do nothing for genotype-free estimation Logger::gLogger->writeLog("Skipping chip-free estimation of sample mixture"); } else if ( args.bFreeMixOnly ) { // only mixture is estimated. // genotype-free method Logger::gLogger->writeLog("Performing chip-free estimation of sample mixture at fixed reference bias parameters (%lf, %lf, %lf)",args.pRefRef,args.pRefHet,args.pRefAlt); // scan across multiple readgroups for(int rg=-1; rg < vbid.nRGs - (int)args.bIgnoreRG; ++rg) { VerifyBamID::mixLLK mix(&vbid); mix.OptimizeLLK(rg); Logger::gLogger->writeLog("Optimal per-sample fMix = %lf, LLK0 = %lf, LLK1 = %lf\n",mix.fMix,mix.llk0,mix.llk1); vbid.mixOut.llk0s[rg+1] = mix.llk0; vbid.mixOut.llk1s[rg+1] = mix.llk1; vbid.mixOut.fMixs[rg+1] = mix.fMix; } //vbid.mixRefHet = 0.5; //vbid.mixRefAlt = 0.00; } else if ( args.bFreeRefBiasOnly ) { Logger::gLogger->writeLog("Performing chip-free estimation of reference-bias without sample mixture"); for(int rg=-1; rg < vbid.nRGs - (int)args.bIgnoreRG; ++rg) { VerifyBamID::refBiasMixLLKFunc myFunc(&vbid, rg); AmoebaMinimizer myMinimizer; Vector startingPoint(2); startingPoint[0] = 0; // pRefHet = 0.5 startingPoint[1] = -4.595; // pRefAlt = 0.01 myMinimizer.func = &myFunc; myMinimizer.Reset(2); myMinimizer.point = startingPoint; myMinimizer.Minimize(1e-6); double pRefHet = VerifyBamID::invLogit(myMinimizer.point[0]); double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[1]); Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf at readGroup %d",pRefHet,pRefAlt,myMinimizer.fmin,rg); //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt); vbid.mixOut.llk0s[rg+1] = myFunc.llk0; vbid.mixOut.llk1s[rg+1] = myFunc.llk1; vbid.mixOut.refHets[rg+1] = myFunc.pRefHet; vbid.mixOut.refAlts[rg+1] = myFunc.pRefAlt; } } else if ( args.bFreeFull ) { Logger::gLogger->writeLog("Performing chip-free estimation of reference-bias and sample mixture together"); for(int rg = -1; rg < vbid.nRGs - args.bIgnoreRG; ++rg) { VerifyBamID::fullMixLLKFunc myFunc(&vbid, rg); AmoebaMinimizer myMinimizer; Vector startingPoint(3); startingPoint[0] = -3.91; // start with fMix = 0.01 startingPoint[1] = 0; // pRefHet = 0.5 startingPoint[2] = -4.595; // pRefAlt = 0.01 myMinimizer.func = &myFunc; myMinimizer.Reset(3); myMinimizer.point = startingPoint; myMinimizer.Minimize(1e-6); double fMix = VerifyBamID::invLogit(myMinimizer.point[0]); if ( fMix > 0.5 ) fMix = 1.-fMix; double pRefHet = VerifyBamID::invLogit(myMinimizer.point[1]); double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[2]); Logger::gLogger->writeLog("Optimal per-sample fMix = %lf\n",fMix); Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin); //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt); vbid.mixOut.llk0s[rg+1] = myFunc.llk0; vbid.mixOut.llk1s[rg+1] = myFunc.llk1; vbid.mixOut.fMixs[rg+1] = myFunc.fMix; vbid.mixOut.refHets[rg+1] = myFunc.pRefHet; vbid.mixOut.refAlts[rg+1] = myFunc.pRefAlt; } } Logger::gLogger->writeLog("calculating depth distribution"); vbid.calculateDepthDistribution(args.maxDepth, vbid.mixOut); Logger::gLogger->writeLog("finished calculating depth distribution"); std::vector<int> bestInds(vbid.nRGs+1,-1); std::vector<int> selfInds(vbid.nRGs+1,-1); if ( args.bChipNone ) { // do nothing Logger::gLogger->writeLog("Skipping with-chip estimation of sample mixture"); } else if ( args.bChipMixOnly ) { Logger::gLogger->writeLog("Performing with-chip estimation of sample mixture at fixed reference bias parameter (%lf, %lf, %lf)",args.pRefRef,args.pRefHet,args.pRefAlt); for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) { double maxIBD = -1; VerifyBamID::ibdLLK ibd(&vbid); for(int i=0; i < (int)vbid.pGenotypes->indids.size(); ++i) { double fIBD = ibd.OptimizeLLK(i, rg); Logger::gLogger->writeLog("Comparing with individual %s.. Optimal fIBD = %lf, LLK0 = %lf, LLK1 = %lf for readgroup %d",vbid.pGenotypes->indids[i].c_str(),fIBD, ibd.llk0, ibd.llk1, rg); if ( maxIBD < fIBD ) { bestInds[rg+1] = i; vbid.bestOut.llk0s[rg+1] = ibd.llk0; vbid.bestOut.llk1s[rg+1] = ibd.llk1; vbid.bestOut.fMixs[rg+1] = 1-ibd.fIBD; maxIBD = ibd.fIBD; } if ( ( (rg < 0) && (vbid.pPile->sBamSMID == vbid.pGenotypes->indids[i] ) ) || ( ( rg >= 0 ) && ( vbid.pPile->vsSMIDs[rg] == vbid.pGenotypes->indids[i]) ) ) { selfInds[rg+1] = i; vbid.selfOut.llk0s[rg+1] = ibd.llk0; vbid.selfOut.llk1s[rg+1] = ibd.llk1; vbid.selfOut.fMixs[rg+1] = 1-ibd.fIBD; } } if ( bestInds[rg+1] >= 0 ) { Logger::gLogger->writeLog("Best Matching Individual is %s with IBD = %lf",vbid.pGenotypes->indids[bestInds[rg+1]].c_str(),maxIBD); vbid.calculateDepthByGenotype(bestInds[rg+1],rg,vbid.bestOut); } if ( selfInds[rg+1] >= 0 ) { Logger::gLogger->writeLog("Self Individual is %s with IBD = %lf",vbid.pGenotypes->indids[selfInds[rg+1]].c_str(),vbid.selfOut.fMixs[rg+1]); vbid.calculateDepthByGenotype(selfInds[rg+1],rg,vbid.selfOut); } } } else if ( args.bChipRefBiasOnly ) { Logger::gLogger->writeLog("Performing with-chip estimation of reference-bias without sample mixture"); if ( args.bSelfOnly ) { for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) { VerifyBamID::refBiasIbdLLKFunc myFunc(&vbid, rg); AmoebaMinimizer myMinimizer; Vector startingPoint(2); startingPoint[0] = 0; // pRefHet = 0.5 startingPoint[1] = -4.595; // pRefAlt = 0.01 myMinimizer.func = &myFunc; myMinimizer.Reset(2); myMinimizer.point = startingPoint; myMinimizer.Minimize(1e-6); double pRefHet = VerifyBamID::invLogit(myMinimizer.point[0]); double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[1]); Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin); //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt); vbid.selfOut.llk0s[rg+1] = myFunc.llk0; vbid.selfOut.llk1s[rg+1] = myFunc.llk1; vbid.selfOut.refHets[rg+1] = myFunc.pRefHet; vbid.selfOut.refAlts[rg+1] = myFunc.pRefAlt; vbid.calculateDepthByGenotype(0,rg,vbid.selfOut); } } else { Logger::gLogger->warning("--self must be set for --chip-refBias to work. Skipping.."); } } else if ( args.bChipFull ) { Logger::gLogger->writeLog("Performing with-chip estimation of reference-bias and sample mixture together"); for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) { double maxIBD = -1; for(int i=0; i < (int)vbid.pGenotypes->indids.size(); ++i) { VerifyBamID::fullIbdLLKFunc myFunc(&vbid,i,rg); AmoebaMinimizer myMinimizer; Vector startingPoint(3); startingPoint[0] = 3.91; // start with fIBD = 0.99 startingPoint[1] = 0; // pRefHet = 0.5 startingPoint[2] = -4.595; // pRefAlt = 0.01 myMinimizer.func = &myFunc; myFunc.indIdx = i; myMinimizer.Reset(3); myMinimizer.point = startingPoint; myMinimizer.Minimize(1e-6); double fIBD = VerifyBamID::invLogit(myMinimizer.point[0]); double pRefHet = VerifyBamID::invLogit(myMinimizer.point[1]); double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[2]); Logger::gLogger->writeLog("Comparing with individual %s.. Optimal fIBD = %lf, LLK0 = %lf, LLK1 = %lf for readgroup %d",vbid.pGenotypes->indids[i].c_str(), fIBD, myFunc.llk0, myFunc.llk1, rg); //Logger::gLogger->writeLog("Optimal per-sample fIBD = %lf, ",fIBD); Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf ) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin); if ( maxIBD < fIBD ) { bestInds[rg+1] = i; maxIBD = fIBD; vbid.bestOut.llk0s[rg+1] = myFunc.llk0; vbid.bestOut.llk1s[rg+1] = myFunc.llk1; vbid.bestOut.fMixs[rg+1] = 1.-myFunc.fIBD; vbid.bestOut.refHets[rg+1] = myFunc.pRefHet; vbid.bestOut.refAlts[rg+1] = myFunc.pRefAlt; } if ( ( (rg < 0) && (vbid.pPile->sBamSMID == vbid.pGenotypes->indids[i] ) ) || ( ( rg >= 0 ) && ( vbid.pPile->vsSMIDs[rg] == vbid.pGenotypes->indids[i]) ) ) { selfInds[rg+1] = i; vbid.selfOut.llk0s[rg+1] = myFunc.llk0; vbid.selfOut.llk1s[rg+1] = myFunc.llk1; vbid.selfOut.fMixs[rg+1] = 1.-myFunc.fIBD; vbid.selfOut.refHets[rg+1] = myFunc.pRefHet; vbid.selfOut.refAlts[rg+1] = myFunc.pRefAlt; vbid.calculateDepthByGenotype(i, rg, vbid.selfOut); } } //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt); if ( bestInds[rg+1] >= 0 ) { Logger::gLogger->writeLog("Best Matching Individual is %s with IBD = %lf",vbid.pGenotypes->indids[bestInds[rg+1]].c_str(),maxIBD); vbid.calculateDepthByGenotype(bestInds[rg+1], rg, vbid.bestOut); } if ( selfInds[rg+1] >= 0 ) { Logger::gLogger->writeLog("Self Individual is %s with IBD = %lf",vbid.pGenotypes->indids[selfInds[rg+1]].c_str(),vbid.selfOut.fMixs[rg+1]); vbid.calculateDepthByGenotype(selfInds[rg+1],rg,vbid.selfOut); } } } // PRINT OUTPUT FILE - ".selfSM" // [SEQ_ID] : SAMPLE ID in the sequence file // [CHIP_ID] : SAMPLE ID in the chip file (NA if not available) // [#SNPS] : Number of markers evaluated // [#READS] : Number of reads evaluated // [AVG_DP] : Mean depth // [FREEMIX] : Chip-free estimated alpha (% MIX in 0-1 scale), NA if unavailable // [FREELK1] : Chip-free log-likelihood at estimated alpha // [FREELK0] : Chip-free log-likelihood at 0% contamination // [CHIPIBD] : With-chip estimated alpha (% MIX in 0-1 scale) // [CHIPLK1] : With-chip log-likelihood at estimated alpha // [CHIPLK0] : With-chip log-likelihood at 0% contamination // [DPREF] : Depth at reference site in the chip // [RDPHET] : Relative depth at HET site in the chip // [RDPALT] : Relative depth at HOMALT site in the chip // [FREE_RF] : Pr(Ref|Ref) site estimated without chip data // [FREE_RH] : Pr(Ref|Het) site estimated without chip data // [FREE_RA] : Pr(Ref|Alt) site estimated without chip data // [CHIP_RF] : Pr(Ref|Ref) site estimated with chip data // [CHIP_RH] : Pr(Ref|Het) site estimated with chip data // [CHIP_RA] : Pr(Ref|Alt) site estimated with chip data // [DPREF] : Depth at reference alleles // [RDPHET] : Relative depth at heterozygous alleles // [RDPALT] : Relative depth at hom-alt alleles String selfSMFN = args.sOutFile + ".selfSM"; String bestSMFN = args.sOutFile + ".bestSM"; String selfRGFN = args.sOutFile + ".selfRG"; String bestRGFN = args.sOutFile + ".bestRG"; String dpSMFN = args.sOutFile + ".depthSM"; String dpRGFN = args.sOutFile + ".depthRG"; IFILE selfSMF = ifopen(selfSMFN,"wb"); IFILE bestSMF = (args.bFindBest ? ifopen(bestSMFN,"wb") : NULL); IFILE selfRGF = (args.bIgnoreRG ? NULL : ifopen(selfRGFN,"wb")); IFILE bestRGF = (args.bFindBest && !args.bIgnoreRG) ? ifopen(bestRGFN,"wb") : NULL; IFILE dpSMF = ifopen(dpSMFN,"wb"); IFILE dpRGF = (args.bIgnoreRG ? NULL : ifopen(dpRGFN,"wb")); if ( selfSMF == NULL ) { Logger::gLogger->error("Cannot write to %s",selfSMF); } if ( args.bFindBest && ( bestSMF == NULL ) ) { Logger::gLogger->error("Cannot write to %s",bestSMF); } if ( dpSMF == NULL ) { Logger::gLogger->error("Cannot write to %s",dpSMF); } ifprintf(dpSMF,"#RG\tDEPTH\t#SNPs\t%%SNPs\t%%CUMUL\n"); int nCumMarkers = 0; for(int i=args.maxDepth; i >= 0; --i) { nCumMarkers += vbid.mixOut.depths[i]; ifprintf(dpSMF,"ALL\t%d\t%d\t%.5lf\t%.5lf\n",i, vbid.mixOut.depths[i],(double) vbid.mixOut.depths[i]/(double)vbid.nMarkers,(double)nCumMarkers/(double)vbid.nMarkers); } ifclose(dpSMF); if ( dpRGF != NULL ) { ifprintf(dpRGF,"#RG\tDEPTH\t#SNPs\t%%SNPs\t%%CUMUL\n"); for(int rg=0; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) { const char* rgID = vbid.pPile->vsRGIDs[rg].c_str(); int nMarkers = 0; for(int i=args.maxDepth; i >= 0; --i) { nMarkers += vbid.mixOut.depths[(rg+1)*(args.maxDepth+1) + i]; } nCumMarkers = 0; for(int i=args.maxDepth; i >= 0; --i) { int d = vbid.mixOut.depths[(rg+1)*(args.maxDepth+1) + i]; nCumMarkers += d; ifprintf(dpRGF,"%s\t%d\t%d\t%.5lf\t%.5lf\n",rgID,i,d,(double)d/(double)vbid.nMarkers,(double)nCumMarkers/(double)nMarkers); } } ifclose(dpRGF); } const char* headers[] = {"#SEQ_ID","RG","CHIP_ID","#SNPS","#READS","AVG_DP","FREEMIX","FREELK1","FREELK0","FREE_RH","FREE_RA","CHIPMIX","CHIPLK1","CHIPLK0","CHIP_RH","CHIP_RA","DPREF","RDPHET","RDPALT"}; int nheaders = sizeof(headers)/sizeof(headers[0]); for(int i=0; i < nheaders; ++i) { ifprintf(selfSMF,"%s%s",i>0 ? "\t" : "",headers[i]); } ifprintf(selfSMF,"\n"); ifprintf(selfSMF,"%s\tALL",vbid.pPile->sBamSMID.c_str()); ifprintf(selfSMF,"\t%s",selfInds[0] >= 0 ? vbid.pGenotypes->indids[selfInds[0]].c_str() : "NA"); ifprintf(selfSMF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[0],(double)vbid.mixOut.numReads[0]/(double)vbid.nMarkers); if ( args.bFreeNone ) { ifprintf(selfSMF,"\tNA\tNA\tNA\tNA\tNA"); } else if ( args.bFreeMixOnly ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0]); } else if ( args.bFreeRefBiasOnly ) { ifprintf(selfSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); } else if ( args.bFreeFull ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); } else { error("Invalid option in handling bFree"); } if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(selfSMF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); } else if ( args.bChipMixOnly ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.fMixs[0],vbid.selfOut.llk1s[0],vbid.selfOut.llk0s[0],(double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); } else if ( args.bChipMixOnly ) { ifprintf(selfSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.llk1s[0], vbid.selfOut.llk0s[0], vbid.selfOut.refHets[0], vbid.selfOut.refAlts[0], (double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); } else if ( args.bChipFull ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.selfOut.fMixs[0], vbid.selfOut.llk1s[0], vbid.selfOut.llk0s[0], vbid.selfOut.refHets[0], vbid.selfOut.refAlts[0], (double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); } else { error("Invalid option in handling bChip"); } ifprintf(selfSMF,"\n"); ifclose(selfSMF); if ( bestSMF != NULL ) { for(int i=0; i < nheaders; ++i) { ifprintf(bestSMF,"%s%s",i>0 ? "\t" : "",headers[i]); } ifprintf(bestSMF,"\n"); ifprintf(bestSMF,"%s\tALL",vbid.pPile->sBamSMID.c_str()); ifprintf(bestSMF,"\t%s",bestInds[0] >= 0 ? vbid.pGenotypes->indids[bestInds[0]].c_str() : "NA"); ifprintf(bestSMF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[0],(double)vbid.mixOut.numReads[0]/(double)vbid.nMarkers); if ( args.bFreeNone ) { ifprintf(bestSMF,"\tNA\tNA\tNA\tNA\tNA"); } else if ( args.bFreeMixOnly ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0]); } else if ( args.bFreeRefBiasOnly ) { ifprintf(bestSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); } else if ( args.bFreeFull ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); } else { error("Invalid option in handling bFree"); } if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(bestSMF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); } else if ( args.bChipMixOnly ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.fMixs[0],vbid.bestOut.llk1s[0],vbid.bestOut.llk0s[0],(double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); } else if ( args.bChipMixOnly ) { ifprintf(bestSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.llk1s[0], vbid.bestOut.llk0s[0], vbid.bestOut.refHets[0], vbid.bestOut.refAlts[0], (double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); } else if ( args.bChipFull ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.bestOut.fMixs[0], vbid.bestOut.llk1s[0], vbid.bestOut.llk0s[0], vbid.bestOut.refHets[0], vbid.bestOut.refAlts[0], (double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); } else { error("Invalid option in handling bChip"); } ifprintf(bestSMF,"\n"); ifclose(bestSMF); } if ( selfRGF != NULL ) { for(int i=0; i < nheaders; ++i) { ifprintf(selfRGF,"%s%s",i>0 ? "\t" : "",headers[i]); } ifprintf(selfRGF,"\n"); for(int rg=0; rg < vbid.nRGs; ++rg) { ifprintf(selfRGF,"%s\t%s",vbid.pPile->sBamSMID.c_str(),vbid.pPile->vsRGIDs[rg].c_str()); ifprintf(selfRGF,"\t%s",bestInds[rg] >= 0 ? vbid.pGenotypes->indids[bestInds[rg]].c_str() : "NA"); ifprintf(selfRGF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[(rg+1)*4],(double)vbid.mixOut.numReads[(rg+1)*4]/(double)vbid.mixOut.numGenos[(rg+1)*4]); if ( args.bFreeNone ) { ifprintf(selfRGF,"\tNA\tNA\tNA\tNA\tNA"); } else if ( args.bFreeMixOnly ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1]); } else if ( args.bFreeRefBiasOnly ) { ifprintf(selfRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); } else if ( args.bFreeFull ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); } else { error("Invalid option in handling bFree"); } if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(selfRGF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); } else if ( args.bChipMixOnly ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.fMixs[rg+1], vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); } else if ( args.bChipMixOnly ) { ifprintf(selfRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], vbid.selfOut.refHets[rg+1], vbid.selfOut.refAlts[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); } else if ( args.bChipFull ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.selfOut.fMixs[rg+1], vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], vbid.selfOut.refHets[rg+1], vbid.selfOut.refAlts[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); } else { error("Invalid option in handling bChip"); } ifprintf(selfRGF,"\n"); } ifclose(selfRGF); } if ( bestRGF != NULL ) { for(int i=0; i < nheaders; ++i) { ifprintf(bestRGF,"%s%s",i>0 ? "\t" : "",headers[i]); } ifprintf(bestRGF,"\n"); for(int rg=0; rg < vbid.nRGs; ++rg) { ifprintf(bestRGF,"%s\t%s",vbid.pPile->sBamSMID.c_str(),vbid.pPile->vsRGIDs[rg].c_str()); ifprintf(bestRGF,"\t%s",bestInds[rg] >= 0 ? vbid.pGenotypes->indids[bestInds[rg]].c_str() : "NA"); ifprintf(bestRGF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[(rg+1)*4],(double)vbid.mixOut.numReads[(rg+1)*4]/(double)vbid.mixOut.numGenos[(rg+1)*4]); if ( args.bFreeNone ) { ifprintf(bestRGF,"\tNA\tNA\tNA\tNA\tNA"); } else if ( args.bFreeMixOnly ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1]); } else if ( args.bFreeRefBiasOnly ) { ifprintf(bestRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); } else if ( args.bFreeFull ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); } else { error("Invalid option in handling bFree"); } if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(bestRGF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); } else if ( args.bChipMixOnly ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.fMixs[rg+1], vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); } else if ( args.bChipMixOnly ) { ifprintf(bestRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], vbid.bestOut.refHets[rg+1], vbid.bestOut.refAlts[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); } else if ( args.bChipFull ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.bestOut.fMixs[rg+1], vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], vbid.bestOut.refHets[rg+1], vbid.bestOut.refAlts[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); } else { error("Invalid option in handling bChip"); } ifprintf(bestRGF,"\n"); } ifclose(bestRGF); } time(&t); Logger::gLogger->writeLog("Analysis finished on %s",ctime(&t)); return 0; }
void main(int argc, char **argv) { int c; int lib_opt = FALSE; int errflg = 0; extern int optind; extern char *optarg; char *libname,*fname; char *t_name; #ifdef IBM_PC fprintf(stderr, "NYU Ada/Ed Librarian Version 1.11.2\n"); fprintf(stderr, "Copyright (C) 1985-1992 by New York University.\n"); #endif #ifndef IBM_PC while((c = getopt(argc,argv,"l:"))!=EOF) { #else while((c = getopt(argc,argv,"L:l:"))!=EOF) { if (isupper(c)) c = tolower(c); #endif switch(c) { case 'l': /* specify library name */ lib_opt = TRUE; libname = strjoin(optarg,""); break; case '?': errflg++; } } fname = (char *)0; if (optind < argc) fname = argv[optind]; if (!lib_opt && fname == (char *)0) { fname = getenv("ADALIB"); if (fname!= (char *)0) { #ifdef IBM_PC fprintf(stderr, "L"); #else fprintf(stderr, "l"); #endif fprintf(stderr,"ibrary defined by ADALIB: %s\n", fname); } } if ((!lib_opt && fname == (char *)0) || errflg) { fprintf(stderr, "Usage: adalib [-l library]\n"); exit(1); } if (!lib_opt) { libname = emalloc(strlen(fname) + 1); strcpy(libname, fname); } t_name = libset(libname); LIBFILE = ifopen(LIBFILENAME, "", "r", 0); load_library(); exit(0); } static void load_library() /*;load_library*/ { /* * retrieve information from LIBFILE * Called only if lib_option and not newlib. */ int i, j, n, m, unumber, nodes, symbols; int comp_status, unit_count, cur_level; char *comp_date, *status_str, *uname, *aisname, *tmp_str; char *main_string; int is_main, empty_unit_slots, parent; int ignore; unit_count = getnum(LIBFILE, "lib-unit-count"); n = getnum(LIBFILE, "lib-n"); empty_unit_slots = getnum(LIBFILE, "lib-empty-slots"); tmp_str = getstr(LIBFILE, "lib-tmp-str"); for (i = 1; i <= unit_count; i++) { uname = getstr(LIBFILE, "lib-unit-name"); unumber = getnum(LIBFILE, "lib-unit-number"); aisname = getstr(LIBFILE, "lib-ais-name"); comp_date = getstr(LIBFILE, "unit-date"); symbols = getnum(LIBFILE, "lib-symbols"); nodes = getnum(LIBFILE, "lib-nodes"); is_main = getnum(LIBFILE, "lib-is-main"); if (is_main) { if (streq(unit_name_type(uname), "ma")) main_string = "(Interface)"; else main_string = " (Main) "; } else { main_string = ""; } comp_status = getnum(LIBFILE, "lib-status"); status_str = (comp_status) ? "active " : "obsolete"; printf("%8s %11s %-15s %s\n", status_str, main_string, convert_date(comp_date), formatted_name(uname)); } printf("\n"); n = getnum(LIBFILE, "lib-n"); if (n) { printf("stubs \n\n"); for (i = 1; i <= n; i++) { uname = getstr(LIBFILE, "lib-unit-name"); aisname = getstr(LIBFILE, "lib-ais-name"); parent = getnum(LIBFILE, "lib-parent"); cur_level = getnum(LIBFILE, "lib-cur-level"); m = getnum(LIBFILE, "stub-file-size"); for (j = 1; j <= m; j++) ignore = getnum(LIBFILE, "stub-file"); printf("%s\n", formatted_stub(uname)); } printf("\n"); } ifclose(LIBFILE); return; #ifdef TBSL n = getnum(LIBFILE, "precedes-map-size"); printf("precedes map\n"); for (i = 1; i <= n; i += 2) { dom = getnum(LIBFILE, "precedes-map-dom"); m = getnum(LIBFILE, "precedes-map-nelt"); printf(" %4d:", dom); for (j = 1; j <= m; j++) { range = getnum(LIBFILE, "precedes-map-ent"); printf(" %4d", range); } printf("\n"); } n = getnum(LIBFILE, "compilation_table_size"); if (n) { printf("\ncompilation table\n"); for (i = 1; i <= n; i++) { unum = (int) getnum(LIBFILE, "compilation-table-ent"); printf(" %d\n", unum); } printf("\n"); } /* late_instances */ n = getnum(LIBFILE, "late-instances-size"); if (n) { printf("late instances\n"); for (i = 1; i <= n; i++) { str = (char *) getstr(LIBFILE, "late-instances-str"); printf(" %s\n", str); } } /* current code segment */ n = getnum(LIBFILE, "unit-size"); printf("\ncurrent code segments\n"); printf(" unit cs\n"); for (i = 1; i <= n; i++) { cs = getnum(LIBFILE, "current-code-segment"); if (cs) printf(" %d: %d\n", i, cs); } /* local reference maps */ n = getnum(LIBFILE, "unit-size"); get_local_ref_maps(LIBFILE, n); cde_pos = get_cde_slots(LIBFILE, axq); /* could free axq_data_slots, etc., but keep for now */ /* read out LIB_STUB map (always empty for now) */ ifclose(LIBFILE); return; #endif }