// Open a glf file for writing with the specified filename. bool GlfFile::openForWrite(const char * filename, bool compressed) { // Reset for any previously operated on files. resetFile(); if(compressed) { myFilePtr = ifopen(filename, "wb", InputFile::BGZF); } else { myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED); } if (myFilePtr == NULL) { std::string errorMessage = "Failed to Open "; errorMessage += filename; errorMessage += " for writing"; myStatus.setStatus(GlfStatus::FAIL_IO, errorMessage.c_str()); throw(GlfException(myStatus)); return(false); } myIsOpenForWrite = true; // Successfully opened the file. myStatus = GlfStatus::SUCCESS; return(true); }
void testReadLine() { IFILE filePtr = ifopen("testFiles/testFile.txt", "rb"); assert(filePtr != NULL); String line = ""; line.ReadLine(filePtr); assert(line == " Hello, I am a testFile. "); line.Trim(); assert(line == "Hello, I am a testFile."); // Does not compile in current version, but compiles in old verison. // This can be added back in to ensure that it will catch the difference // in return value for ReadLine (now: int; used to be: string&) // testMethod(line.ReadLine(filePtr)); line.ReadLine(filePtr); assert(temp1 == 0); testMethod(line); assert(temp1 == 1); // line.ReadLine(filePtr).Trim(); line.ReadLine(filePtr); line.Trim(); assert(line == "ThirdLine."); ifclose(filePtr); }
int dhcp ( struct net_device *netdev ) { uint8_t *chaddr; uint8_t hlen; uint16_t flags; int rc; /* Check we can open the interface first */ if ( ( rc = ifopen ( netdev ) ) != 0 ) return rc; /* Wait for link-up */ if ( ( rc = iflinkwait ( netdev, LINK_WAIT_MS ) ) != 0 ) return rc; /* Perform DHCP */ chaddr = dhcp_chaddr ( netdev, &hlen, &flags ); printf ( "DHCP (%s ", netdev->name ); while ( hlen-- ) printf ( "%02x%c", *(chaddr++), ( hlen ? ':' : ')' ) ); if ( ( rc = start_dhcp ( &monojob, netdev ) ) == 0 ) { rc = monojob_wait ( "" ); } else if ( rc > 0 ) { printf ( " using cached\n" ); rc = 0; } return rc; }
void StringHash::ReadLinesFromFile(const char * filename) { IFILE f = ifopen(filename, "rb"); if (f == NULL) return; ReadLinesFromFile(f); ifclose(f); }
void VcfFile::openForRead(const char* filename, int nbuf) { reset(); iFile = ifopen(filename,"rb"); if ( iFile == NULL ) { throw VcfFileException("Failed opening file %s - %s",filename, strerror(errno)); } nBuffers = nbuf; nNumMarkers = 0; nHead = 0; if ( nBuffers == 0 ) { // infinite buffer size // do not set size of markers } else { vpVcfMarkers.resize( nBuffers ); for(int i=0; i < nBuffers; ++i) { VcfMarker* p = new VcfMarker; vpVcfMarkers[i] = p; } } parseMeta(); parseHeader(); if ( bUpgrade ) { upgradeMetaLines(); } }
void Imputation::PrintInfoFile(HaplotypeSet &rHap,HaplotypeSet &tHap, ImputationStatistics &stats) { cout<<endl<<" Writing summary (.info) files ... "<<endl; IFILE info = ifopen(outFile + ".info", "wb"); ifprintf(info, "SNP\tREF(0)\tALT(1)\tALT_Frq\tMAF\tAvgCall\tRsq\tGenotyped\tLooRsq\tEmpR\tEmpRsq\tDose0\tDose1\n"); int i=0; for (int index =0; index < rHap.RefTypedTotalCount; index++) { if(rHap.RefTypedIndex[index]==-1) { if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex) { ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t%.5f\t%.5f\t", RsId? rHap.VariantList[i].rsid.c_str(): rHap.VariantList[i].name.c_str(), rHap.VariantList[i].refAlleleString.c_str(), rHap.VariantList[i].altAlleleString.c_str(), stats.AlleleFrequency(i), stats.AlleleFrequency(i) > 0.5 ? 1.0 - stats.AlleleFrequency(i) : stats.AlleleFrequency(i), stats.AverageCallScore(i), stats.Rsq(i)); if (!tHap.missing[i]) { ifprintf(info, "Genotyped\t%.3f\t%.3f\t%.5f\t%.5f\t%.5f\n", stats.LooRsq(i), stats.EmpiricalR(i), stats.EmpiricalRsq(i), stats.LooMajorDose(i), stats.LooMinorDose(i)); } else ifprintf(info, "Imputed\t-\t-\t-\t-\t-\n"); } i++; } else { variant ThisTypedVariant =tHap.TypedOnlyVariantList[rHap.RefTypedIndex[index]]; ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t-\t-\tTyped_Only\t-\t-\t-\t-\t-\n", RsId? ThisTypedVariant.rsid.c_str(): ThisTypedVariant.name.c_str(), ThisTypedVariant.refAlleleString.c_str(), ThisTypedVariant.altAlleleString.c_str(), tHap.AlleleFreq[rHap.RefTypedIndex[index]], tHap.AlleleFreq[rHap.RefTypedIndex[index]] > 0.5 ? 1.0 - tHap.AlleleFreq[rHap.RefTypedIndex[index]] : tHap.AlleleFreq[rHap.RefTypedIndex[index]]); } } ifclose(info); cout<<endl<<" Summary information written to : "<<outFile<<".info"<<endl; }
void Imputation::FlushPartialVcf(HaplotypeSet &rHap,HaplotypeSet &tHap,HaplotypeSet &PartialDosage, string &filename,int &Index) { string tempFileIndex(outFile),tempFileIndex1(outFile); IFILE vcfdosepartial = ifopen(filename.c_str(), "wb", InputFile::BGZF); for(int hapId=0;hapId<(int)PartialDosage.individualName.size();hapId++) { ifprintf(vcfdosepartial,"\t%s",PartialDosage.individualName[hapId].c_str()); } ifprintf(vcfdosepartial,"\n"); int i=0; for (int index =0; index < rHap.RefTypedTotalCount; index++) { if(rHap.RefTypedIndex[index]==-1) { if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex) { bool majorIsReference=false; if(!rHap.major[i]) majorIsReference=true; if(!tHap.AllMaleTarget) PartialDosage.PrintDosageForVcfOutputForID(vcfdosepartial,i, majorIsReference,rHap.VariantList[i].refAllele); else PartialDosage.PrintDosageForVcfOutputForIDMaleSamples(vcfdosepartial,i, majorIsReference,rHap.VariantList[i].refAllele); ifprintf(vcfdosepartial,"\n"); } i++; } else { if(!tHap.AllMaleTarget) PartialDosage.PrintDosageGWASOnlyForVcfOutputForID (tHap,vcfdosepartial,rHap.RefTypedIndex[index]); else PartialDosage.PrintDosageGWASOnlyForVcfOutputForIDMaleSamples (tHap,vcfdosepartial,rHap.RefTypedIndex[index]); ifprintf(vcfdosepartial,"\n"); } } ifclose(vcfdosepartial); }
int init_predef() /*;init_predef*/ { char *lname; char *t_name; extern char *PREDEFNAME; lname = libset(PREDEFNAME); /* set PREDEF library as library */ LIBFILE = ifopen("predef", "lib", "r", 0); t_name =libset(lname); /* restore prior library */ return(read_lib()); /* number of units read */ }
void GCContent::LoadRegions(String & regionsFile, GenomeSequence &genome, bool invertRegion) { if(regionsFile.Length()==0) return; if(genome.sequenceLength()==0) error("No reference genome loaded!\n"); IFILE fhRegions; fhRegions = ifopen(regionsFile.c_str(),"r"); if(fhRegions==NULL) error("Open regions file %s failed!\n", regionsFile.c_str()); regionIndicator.resize(genome.sequenceLength()); StringArray tokens; String buffer; int len; fprintf(stderr, "Loading region list..."); while (!ifeof(fhRegions)){ buffer.ReadLine(fhRegions); if (buffer.IsEmpty() || buffer[0] == '#') continue; tokens.AddTokens(buffer, WHITESPACE); if(tokens.Length() < 3) continue; genomeIndex_t startGenomeIndex = 0; int chromosomeIndex = tokens[1].AsInteger(); // use chromosome name (token[0]) and position (token[1]) to query genome index. startGenomeIndex = genome.getGenomePosition(tokens[0].c_str(), chromosomeIndex); if(startGenomeIndex >= regionIndicator.size() ) { //fprintf(stderr, "WARNING: region list section %s position %u is not found in the reference and skipped...\n", tokens[0].c_str(), chromosomeIndex); continue; } len = tokens[2].AsInteger() - tokens[1].AsInteger() + 1; for(uint32_t i=startGenomeIndex; i<startGenomeIndex+len; i++) regionIndicator[i] = true; tokens.Clear(); buffer.Clear(); } if (invertRegion) { fprintf(stderr, " invert region..."); for (uint32_t i = 0; i < regionIndicator.size(); i++) { regionIndicator[i] = !regionIndicator[i]; } } ifclose(fhRegions); fprintf(stderr, "DONE!\n"); }
void MarkovParameters::WriteErrorRates(StringArray & markerNames, const char * filename) { IFILE output = ifopen(filename, "wb"); if (output == NULL) return; ifprintf(output, "MarkerName\tErrorRate\n"); for (int i = 0; i < markers; i++) ifprintf(output, "%s\t%.5g\n", (const char *) markerNames[i], E[i]); ifclose(output); }
bool StringAlias::ReadFromFile(const char * filename) { IFILE input = ifopen(filename, "rt"); if (input == NULL) return false; ReadFromFile(input); ifclose(input); return true; }
void MarkovParameters::WriteCrossoverRates(StringArray & markerNames, const char * filename) { IFILE output = ifopen(filename, "wb"); if (output == NULL) return; ifprintf(output, "Interval\tSwitchRate\n"); for (int i = 0; i < markers - 1; i++) ifprintf(output, "%s-%s\t%.5g\n", (const char *) markerNames[i], (const char *) markerNames[i+1], R[i]); ifclose(output); }
bool glfHandler::Create(const String & filename) { isStub = false; handle = ifopen(filename, "wb"); if (handle == NULL) { isStub = true; return false; } WriteHeader(); return handle != NULL; }
bool glfHandler::Create(const String & filename) { isStub = false; // glf is in BGZF format. handle = ifopen(filename, "wb", InputFile::BGZF); if (handle == NULL) { isStub = true; return false; } WriteHeader(); return handle != NULL; }
bool glfHandler::Open(const String & filename) { isStub = false; handle = ifopen(filename, "rb"); if (handle == NULL) { isStub = true; return false; } if (!ReadHeader()) ifclose(handle); endOfSection = true; return handle != NULL; }
void VerifyBamID::loadSubsetInds(const char* subsetFile) { if ( ( pPile == NULL ) && ( pGenotypes == NULL ) ) { if ( subsetInds.size() > 0 ) { Logger::gLogger->error("VerifyBamID::loadSubsetInds() called multiple times"); } IFILE f = ifopen(subsetFile,"rb"); String line; StringArray tok; while( line.ReadLine(f) > 0 ) { tok.ReplaceTokens(line,"\t \n\r"); subsetInds.push_back(tok[0].c_str()); } } else { Logger::gLogger->error("VerifyBamID::loadSubsetInds() called after VerifyBamID::loadFiles()"); } }
int dhcp ( struct net_device *netdev ) { int rc; /* Check we can open the interface first */ if ( ( rc = ifopen ( netdev ) ) != 0 ) return rc; /* Wait for link-up */ if ( ( rc = iflinkwait ( netdev, LINK_WAIT_MS ) ) != 0 ) return rc; /* Perform DHCP */ printf ( "DHCP (%s %s)", netdev->name, netdev->ll_protocol->ntoa ( netdev->ll_addr ) ); if ( ( rc = start_dhcp ( &monojob, netdev ) ) == 0 ) rc = monojob_wait ( "" ); return rc; }
int main() { String fn = "/home/zhanxw/compareMapSoft/index/mapreads/chr1.fa"; IFILE file = ifopen(fn.c_str(), "r"); int totalChar = 0; String line; int freq[256] = {0}; while (!ifeof(file)){ line.ReadLine(file); totalChar += line.Length(); for (int i = 0; i < line.Length(); i++) freq[(unsigned int) line[i]]++; } printf("A frequency: %d (%f)\n", freq[(int)'A'], (float)freq[(int)'A']/totalChar); printf("T frequency: %d (%f)\n", freq[(int)'T'], (float)freq[(int)'T']/totalChar); printf("G frequency: %d (%f)\n", freq[(int)'G'], (float)freq[(int)'G']/totalChar); printf("C frequency: %d (%f)\n", freq[(int)'C'], (float)freq[(int)'C']/totalChar); }
bool VcfFile::open(const char* filename, const char* mode, InputFile::ifileCompression compressionMode) { // Reset for any previously operated on files. reset(); myFilePtr = ifopen(filename, mode, compressionMode); if(myFilePtr == NULL) { std::string errorMessage = "Failed to Open "; errorMessage += filename; errorMessage += " for "; errorMessage += mode; myStatus.setStatus(StatGenStatus::FAIL_IO, errorMessage.c_str()); return(false); } return(true); }
bool FilterStat::writeMergedVcf(const char* outFile) { IFILE oFile = ifopen(outFile,"wb"); if ( oFile == NULL ) { Logger::gLogger->error("Cannot open output file %s",outFile); } VcfFile vcf; vcf.setSiteOnly(false); vcf.setParseValues(true); vcf.openForRead(sAnchorVcf.c_str(),1); vcf.printVCFHeader(oFile); VcfMarker* pMarker; String STC, STR; for( int i=0; vcf.iterateMarker(); ++i ) { pMarker = vcf.getLastMarker(); int c[FILTER_STAT_COUNTS]; for(int j=0; j < FILTER_STAT_COUNTS; ++j) { c[j] = vCounts[FILTER_STAT_COUNTS*i+j]; } STC.printf("%d,%d,%d,%d,%d,%d",c[0],c[1],c[2],c[3],c[4],c[5]); if ( ( c[0]+c[1] > 4 ) && ( c[1]+c[3] > 4 ) && ( c[0]+c[2] > 4 ) && ( c[1]+c[3] > 4 ) ) { STR.printf("%.2lf",((c[0]+.5)*(c[3]+.5)-(c[1]+.5)*(c[2]+.5))/sqrt((c[0]+c[1]+1.)*(c[2]+c[3]+1.)*(c[0]+c[2]+1.)*(c[1]+c[3]+1.))); } else { STR = "0"; } pMarker->asInfoKeys.Add("STC"); pMarker->asInfoKeys.Add("STR"); pMarker->asInfoValues.Add(STC); pMarker->asInfoValues.Add(STR); pMarker->printVCFMarker(oFile,false); } ifclose(oFile); return true; }
// Open a glf file for reading with the specified filename. bool GlfFile::openForRead(const char * filename) { // Reset for any previously operated on files. resetFile(); myFilePtr = ifopen(filename, "rb"); if (myFilePtr == NULL) { std::string errorMessage = "Failed to Open "; errorMessage += filename; errorMessage += " for reading"; myStatus.setStatus(GlfStatus::FAIL_IO, errorMessage.c_str()); throw(GlfException(myStatus)); return(false); } myIsOpenForRead = true; // Successfully opened the file. myStatus = GlfStatus::SUCCESS; return(true); }
// Open a FastQFile. FastQStatus::Status FastQFile::openFile(const char* fileName, BaseAsciiMap::SPACE_TYPE spaceType) { // reset the member data. reset(); myBaseComposition.resetBaseMapType(); myBaseComposition.setBaseMapType(spaceType); myQualPerCycle.clear(); myCountPerCycle.clear(); FastQStatus::Status status = FastQStatus::FASTQ_SUCCESS; // Close the file if there is already one open - checked by close. status = closeFile(); if(status == FastQStatus::FASTQ_SUCCESS) { // Successfully closed a previously opened file if there was one. // Open the file myFile = ifopen(fileName, "rt"); myFileName = fileName; if(myFile == NULL) { // Failed to open the file. status = FastQStatus::FASTQ_OPEN_ERROR; } } if(status != FastQStatus::FASTQ_SUCCESS) { // Failed to open the file. std::string errorMessage = "ERROR: Failed to open file: "; errorMessage += fileName; logMessage(errorMessage.c_str()); } return(status); }
char streamReader_open(streamReader *pThis, const char *fileName, int fatal) { #ifndef DREAMCAST #ifdef USE_IFOPEN pThis->fileHandle = ifopen(fileName, "rb"); #else pThis->fileHandle = fopen(fileName, "rb"); #endif #else pThis->fileHandle = gdFsOpen(fileName, NULL); #endif if (pThis->fileHandle) { pThis->currentSector = 0; streamReader_feedBuffer(pThis); return 1; } else { if (fatal) { printf("FATAL: Can't find %s\n", fileName); exit(-1); } return 0; } }
void VerifyBamID::printPerMarkerInfo(const char* filename, int indIdx) { IFILE oFile = ifopen(filename,"wb"); int nMarkers = (int)(pGenotypes->chroms.size()); char base, a1, a2; ifprintf(oFile,"#CHROM\tPOS\tA1\tA2\tAF\tGENO\t#REF\t#ALT\t#OTHERS\tBASES\tQUALS\tMAPQS\n"); for(int i=0; i < nMarkers; ++i) { int counts[3] = {0,0,0}; std::vector<char> bases; std::vector<char> quals; std::vector<char> mqs; ifprintf(oFile,"%s\t%d\t%c\t%c\t%.4lf\t",pGenotypes->chroms[i].c_str(),pGenotypes->positions[i],pGenotypes->refBases[i],pGenotypes->altBases[i],pGenotypes->alleleFrequencies[i]); int geno = pGenotypes->getGenotype(indIdx,i); switch(geno) { case 0: // MISSING ifprintf(oFile,"./."); break; case 1: // HOMREF; ifprintf(oFile,"0/0"); break; case 2: // HET; ifprintf(oFile,"0/1"); break; case 3: // HOMALT; ifprintf(oFile,"1/1"); break; default: Logger::gLogger->error("Unrecognized genotype %d at ind %d, marker %d",indIdx,i); } a1 = pGenotypes->refBases[i]; a2 = pGenotypes->altBases[i]; for(int j=(int)pPile->nBegins[i]; j < (int)pPile->nEnds[i]; ++j) { // obtain b (base), (error), and readgroup info base = pPile->cBases[j]; if ( base == a1 ) { ++counts[0]; } else if ( base == a2 ) { ++counts[1]; } else { ++counts[2]; } bases.push_back(base); quals.push_back(pPile->cQuals[j]); mqs.push_back(((uint8_t)(pPile->cMapQs[j]) > 90) ? '~' : static_cast<char>(pPile->cMapQs[j]+33)); } ifprintf(oFile,"\t%d\t%d\t%d\t%.3lf\t",counts[0],counts[1],counts[2],(counts[0]+counts[1] == 0) ? 0.5 : (double)counts[0]/(double)(counts[0]+counts[1])); ifprintf(oFile,"\t"); for(int j=0; j < (int)bases.size(); ++j) ifprintf(oFile,"%c",bases[j]); ifprintf(oFile,"\t"); for(int j=0; j < (int)quals.size(); ++j) ifprintf(oFile,"%c",quals[j]); ifprintf(oFile,"\t"); for(int j=0; j < (int)mqs.size(); ++j) ifprintf(oFile,"%c",mqs[j]); ifprintf(oFile,"\n"); } }
// Read & parse the specified index file. StatGenStatus::Status Tabix::readIndex(const char* filename) { // Reset the index from anything that may previously be set. resetIndex(); IFILE indexFile = ifopen(filename, "rb"); // Failed to open the index file. if(indexFile == NULL) { return(StatGenStatus::FAIL_IO); } // read the tabix index structure. // Read the magic string. char magic[4]; if(ifread(indexFile, magic, 4) != 4) { // Failed to read the magic return(StatGenStatus::FAIL_IO); } // If this is not an index file, set num references to 0. if (magic[0] != 'T' || magic[1] != 'B' || magic[2] != 'I' || magic[3] != 1) { // Not a Tabix Index file. return(StatGenStatus::FAIL_PARSE); } // It is a tabix index file. // Read the number of reference sequences. if(ifread(indexFile, &n_ref, 4) != 4) { // Failed to read. return(StatGenStatus::FAIL_IO); } // Size the references. myRefs.resize(n_ref); // Read the Format configuration. if(ifread(indexFile, &myFormat, sizeof(myFormat)) != sizeof(myFormat)) { // Failed to read. return(StatGenStatus::FAIL_IO); } // Read the length of the chromosome names. uint32_t l_nm; if(ifread(indexFile, &l_nm, sizeof(l_nm)) != sizeof(l_nm)) { // Failed to read. return(StatGenStatus::FAIL_IO); } // Read the chromosome names. myChromNamesBuffer = new char[l_nm]; if(ifread(indexFile, myChromNamesBuffer, l_nm) != l_nm) { return(StatGenStatus::FAIL_IO); } myChromNamesVector.resize(n_ref); // Parse out the chromosome names. bool prevNull = true; int chromIndex = 0; for(uint32_t i = 0; i < l_nm; i++) { if(chromIndex >= n_ref) { // already set the pointer for the last chromosome name, // so stop looping. break; } if(prevNull == true) { myChromNamesVector[chromIndex++] = myChromNamesBuffer + i; prevNull = false; } if(myChromNamesBuffer[i] == '\0') { prevNull = true; } } for(int refIndex = 0; refIndex < n_ref; refIndex++) { // Read each reference. Reference* ref = &(myRefs[refIndex]); // Resize the bins so they can be indexed by bin number. ref->bins.resize(MAX_NUM_BINS + 1); // Read the number of bins. if(ifread(indexFile, &(ref->n_bin), 4) != 4) { // Failed to read the number of bins. // Return failure. return(StatGenStatus::FAIL_PARSE); } // Read each bin. for(int binIndex = 0; binIndex < ref->n_bin; binIndex++) { uint32_t binNumber; // Read in the bin number. if(ifread(indexFile, &(binNumber), 4) != 4) { // Failed to read the bin number. // Return failure. return(StatGenStatus::FAIL_IO); } // Add the bin to the reference and get the // pointer back so the values can be set in it. Bin* binPtr = &(ref->bins[binNumber]); binPtr->bin = binNumber; // Read in the number of chunks. if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4) { // Failed to read number of chunks. // Return failure. return(StatGenStatus::FAIL_IO); } // Read in the chunks. // Allocate space for the chunks. uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk); binPtr->chunks = (Chunk*)malloc(sizeOfChunkList); if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList) { // Failed to read the chunks. // Return failure. return(StatGenStatus::FAIL_IO); } } // Read the number of intervals. if(ifread(indexFile, &(ref->n_intv), 4) != 4) { // Failed to read, set to 0. ref->n_intv = 0; // Return failure. return(StatGenStatus::FAIL_IO); } // Allocate space for the intervals and read them. uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t); ref->ioffsets = (uint64_t*)malloc(linearIndexSize); if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize) { // Failed to read the linear index. // Return failure. return(StatGenStatus::FAIL_IO); } } // Successfully read teh bam index file. return(StatGenStatus::SUCCESS); }
void Imputation::performImputation(HaplotypeSet &tHap,HaplotypeSet &rHap, String Golden) { vector<int> optStructure=rHap.optEndPoints; int time_prev = time(0),time_load,vcfSampleIndex=0;; includeGwas=true; MarkovParameters* MP=createEstimates(rHap,tHap,rHap.optEndPoints,1-includeGwas); cout<<" ------------------------------------------------------------------------------"<<endl; cout<<" MAIN IMPUTATION "<<endl; cout<<" ------------------------------------------------------------------------------"<<endl; ImputationStatistics stats(rHap.numMarkers ); IFILE dosages=NULL, hapdose=NULL, haps=NULL,vcfdosepartial=NULL; HaplotypeSet DosageForVcfPartial; DosageForVcfPartial.unphasedOutput=unphasedOutput; DosageForVcfPartial.TypedOnly=tHap.TypedOnly; DosageForVcfPartial.GWASOnlycounter=tHap.GWASOnlycounter; if(tHap.TypedOnly) { printf("\n Calculating Allele Frequency for Typed-Only variants ... "); cout<<endl; tHap.CalculateGWASOnlyFreq(); } cout << "\n Starting Imputation ..."; printf("\n\n Setting up Markov Model for Imputation ..."); cout<<endl<<endl; if (phased && !unphasedOutput) { hapdose = ifopen(outFile + ".hapDose" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED); haps = ifopen(outFile + ".hapLabel" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED); } int maxVcfSample=200,NumVcfWritten=0,NumVcfCreated=0,NovcfParts=1; if((maxVcfSample)>=tHap.numSamples) maxVcfSample=tHap.numSamples; if(vcfOutput) { vcfdosepartial = ifopen(outFile + ".dose.vcf" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED); ifprintf(vcfdosepartial,"##fileformat=VCFv4.1\n"); time_t t = time(0); struct tm * now = localtime( & t ); ifprintf(vcfdosepartial,"##filedate=%d.%d.%d\n",(now->tm_year + 1900),(now->tm_mon + 1) ,now->tm_mday); ifprintf(vcfdosepartial,"##source=Minimac3\n"); if(GT) ifprintf(vcfdosepartial,"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n"); if(tHap.AllMaleTarget) { if(DS) ifprintf(vcfdosepartial,"##FORMAT=<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage (For Male Chr: X) : [P(Alt Allele)]\">\n"); if(GP) ifprintf(vcfdosepartial,"##FORMAT=<ID=GP,Number=2,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0 and 1 (For Male Chr: X) \">\n"); } else { if(DS) ifprintf(vcfdosepartial,"##FORMAT=<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]\">\n"); if(GP) ifprintf(vcfdosepartial,"##FORMAT=<ID=GP,Number=3,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1 \">\n"); } ifprintf(vcfdosepartial,"##INFO=<ID=MAF,Number=1,Type=Float,Description=\"Estimated Alternate Allele Frequency\">\n"); ifprintf(vcfdosepartial,"##INFO=<ID=R2,Number=1,Type=Float,Description=\"Estimated Imputation Accuracy\">\n"); ifprintf(vcfdosepartial,"##INFO=<ID=ER2,Number=1,Type=Float,Description=\"Empirical (Leave-One-Out) R-square (available only for genotyped variants)\">\n"); ifprintf(vcfdosepartial,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"); ifclose(vcfdosepartial); if(!tHap.AllMaleTarget) DosageForVcfPartial.InitializePartialDosageForVcfOutput((2*maxVcfSample),rHap.numMarkers,format); else DosageForVcfPartial.InitializePartialDosageForVcfOutputMaleSamples(maxVcfSample<MaxSample?maxVcfSample:MaxSample,rHap.numMarkers,format); } if(doseOutput) dosages = ifopen(outFile + ".dose" + (gzip ? ".gz" : ""), "wb",(gzip ? InputFile::BGZF:InputFile::UNCOMPRESSED) ); #pragma omp parallel for for(int hapId=0;hapId<MaxSample;hapId++) { if (hapId %2==1) { if(rHap.finChromosome!="X") continue; else if(!tHap.AllMaleTarget) continue; } vector<float> foldedProb,recomProb,noRecomProb, rightProb,probAlleleNoStandardize(8,0.0),tempDoseHap1; vector<bool> tempHap(rHap.numMarkers),tempMissHap(rHap.numMarkers); vector<bool> tempDoseAlleleHap1; MarkovModel MM(tHap,rHap,tHap.missing,rHap.major); MM.CopyParameters(MP); int hapIdIndiv=hapId; do{ MM.initializeMatrices(tHap,rHap,optStructure,rHap.ReducedStructureInfo); printf(" Processing Haplotype %d of %d ...", hapIdIndiv + 1, MaxSample); cout<<endl; MM.ThisHapId=hapIdIndiv; for(int group=1;group<(int)optStructure.size();group++) { MM.foldProbabilities(foldedProb,group-1,rHap.ReducedStructureInfo[group-1],0,refCount); MM.leftNoRecoProb[group-1][0]=foldedProb; if(group==1 && !tHap.missing[0]) if(!tHap.getMissingScaffoldedHaplotype(hapIdIndiv,0)) { Condition(rHap,0,foldedProb,MM.leftNoRecoProb[group-1][0],MM.Error[0], tHap.getScaffoldedHaplotype(hapIdIndiv,0)? rHap.AlleleFreq[0] : 1-rHap.AlleleFreq[0], tHap.getScaffoldedHaplotype(hapIdIndiv,0),MM.backgroundError, foldedProb.size(),rHap.ReducedStructureInfo[0]); } MM.WalkLeft(tHap,hapIdIndiv,MM.leftProb[group-1],MM.leftNoRecoProb[group-1], foldedProb,optStructure[group-1],optStructure[group], rHap.ReducedStructureInfo[group-1],rHap.AlleleFreq); splitFoldedProb(recomProb,MM.leftProb[group-1][optStructure[group]-optStructure[group-1]],MM.leftNoRecoProb[group-1][optStructure[group]-optStructure[group-1]]); MM.unfoldProbabilities(group-1,recomProb,MM.leftNoRecoProb[group-1][optStructure[group]-optStructure[group-1]],foldedProb,0,rHap.ReducedStructureInfo,refCount); } for(int group=optStructure.size()-1;group>0;group--) { MM.foldProbabilities(foldedProb,group-1,rHap.ReducedStructureInfo[group-1],1,refCount); rightProb=foldedProb; noRecomProb=foldedProb; MM.Impute(tHap,foldedProb,hapIdIndiv,MM.leftProb[group-1],MM.leftNoRecoProb[group-1],rightProb,noRecomProb,MM.junctionLeftProb[group-1], MM.junctionRightProb[group],optStructure[group-1], optStructure[group],rHap.ReducedStructureInfo[group-1],1,rHap.AlleleFreq); splitFoldedProb(recomProb,rightProb,noRecomProb); MM.unfoldProbabilities(group-1,recomProb,noRecomProb,foldedProb,1,rHap.ReducedStructureInfo,refCount); } for(int jjj=0;jjj<rHap.numMarkers;jjj++) { tempHap[jjj]=tHap.getScaffoldedHaplotype(hapIdIndiv,jjj); tempMissHap[jjj]=tHap.getMissingScaffoldedHaplotype(hapIdIndiv,jjj); } if(vcfOutput) { if(hapIdIndiv%2==0) { tempDoseHap1= MM.imputedHap; tempDoseAlleleHap1= MM.imputedAlleleNumber; } } #pragma omp critical { stats.Update(MM.imputedHap, MM.leaveOneOut,tempHap,tempMissHap,rHap.major); } #pragma omp critical if (phased && !unphasedOutput) { PrintHaplotypeData(rHap, tHap, hapdose, haps, MM.imputedHap, MM.imputedAlleleNumber, hapIdIndiv, tHap.AllMaleTarget?hapId:hapId/2); } if(tHap.AllMaleTarget) break; hapIdIndiv++; }while(hapIdIndiv<MaxSample && hapIdIndiv%2==1); #pragma omp critical if(doseOutput) { PrintDosageData(rHap, tHap, dosages, MM.imputedDose, tHap.AllMaleTarget?hapId:hapId/2); } #pragma omp critical if(vcfOutput) { printf(" Saving Individual %s for VCF File...\n", tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2].c_str()); if(!tHap.AllMaleTarget) DosageForVcfPartial.SaveDosageForVcfOutputSampleWise(NumVcfCreated-NumVcfWritten, tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2], tempDoseHap1,MM.imputedHap, tempDoseAlleleHap1,MM.imputedAlleleNumber); else DosageForVcfPartial.SaveDosageForVcfOutputSampleWiseChrX(NumVcfCreated-NumVcfWritten, tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2], MM.imputedHap, MM.imputedAlleleNumber); if(DosageForVcfPartial.TypedOnly) { DosageForVcfPartial.SaveIndexForGWASOnlyForVcfOutput(NumVcfCreated-NumVcfWritten, tHap.AllMaleTarget?hapId:hapId/2); } NumVcfCreated++; vcfSampleIndex++; if(NumVcfCreated%maxVcfSample==0 || NumVcfCreated==(tHap.AllMaleTarget?MaxSample:MaxSample/2)) { string PartialVcfFileName(outFile),tempFileIndex1(outFile); stringstream strs; strs<<(NovcfParts); PartialVcfFileName+=(".dose.vcf.part." + (string)(strs.str()) +(gzip ? ".gz" : "")); if(!tHap.AllMaleTarget) printf("\n --->>> Saving samples %d-%d in VCF file : %s ...\n\n", (NumVcfWritten)+1,(MaxSample/2<(NumVcfWritten+maxVcfSample)?MaxSample/2:(NumVcfWritten+maxVcfSample)), PartialVcfFileName.c_str()); else printf("\n --->>> Saving samples %d-%d in VCF file : %s ...\n\n", (NumVcfWritten)+1,(MaxSample<(NumVcfWritten+maxVcfSample)?MaxSample:(NumVcfWritten+maxVcfSample)), PartialVcfFileName.c_str()); //if(NovcfParts==2) // abort(); FlushPartialVcf(rHap,tHap,DosageForVcfPartial,PartialVcfFileName,NovcfParts); if(NumVcfCreated<(tHap.AllMaleTarget?MaxSample:MaxSample/2)) { NovcfParts++; NumVcfWritten+=maxVcfSample; //int gg=maxVcfSample<(((tHap.AllMaleTarget?MaxSample:MaxSample/2))-NumVcfWritten)? //2*maxVcfSample:2*(((tHap.AllMaleTarget?MaxSample:MaxSample/2))-NumVcfWritten); // // //abort(); if(!tHap.AllMaleTarget) DosageForVcfPartial.InitializePartialDosageForVcfOutput(maxVcfSample<(MaxSample/2-NumVcfWritten)?2*maxVcfSample:2*(MaxSample/2-NumVcfWritten),rHap.numMarkers,format); else DosageForVcfPartial.InitializePartialDosageForVcfOutputMaleSamples(maxVcfSample<(MaxSample-NumVcfWritten)?maxVcfSample:(MaxSample-NumVcfWritten),rHap.numMarkers,format); } } } } cout<<endl<<" Imputation Finished ... "<<endl; if (phased && !unphasedOutput) { ifclose(hapdose); ifclose(haps); cout<<endl<<" Haplotype Dosage information written to : "<< outFile + ".hapDose" + (gzip ? ".gz" : "")<<endl; cout<<endl<<" Haplotype Allele information written to : "<< outFile + ".hapLabel" + (gzip ? ".gz" : "")<<endl; } if(doseOutput) { ifclose(dosages); cout<<endl<<" Dosage information written to : "<< outFile + ".dose" + (gzip ? ".gz" : "")<<endl; } PrintInfoFile(rHap,tHap,stats); time_load = time(0) - time_prev; cout << "\n Time taken for imputation = " << time_load << " seconds."<<endl<<endl; if(vcfOutput) MergeFinalVcfAllVariants(rHap,tHap,stats,NovcfParts); }
void Bam2FastQ::writeFastQ(SamRecord& samRec, IFILE filePtr, const std::string& fileNameExt, const char* readNameExt) { static int16_t flag; static std::string sequence; static String quality; static std::string rg; static std::string rgFastqExt; static std::string rgListStr; static std::string fileName; static std::string fq2; if(mySplitRG) { rg = samRec.getString("RG").c_str(); rgFastqExt = rg + fileNameExt; OutFastqMap::iterator it; it = myOutFastqs.find(rgFastqExt); if(it == myOutFastqs.end()) { // New file. fileName = myOutBase.c_str(); if(rg != "") { fileName += '.'; } else { rg = "."; } fileName += rgFastqExt; filePtr = ifopen(fileName.c_str(), "w", myCompression); myOutFastqs[rgFastqExt] = filePtr; if(fileNameExt != mySecondFileNameExt) { // first end. const char* sm = mySamHeader.getRGTagValue("SM", rg.c_str()); if(strcmp(sm, "") == 0){sm = myOutBase.c_str();} rgListStr.clear(); SamHeaderRG* rgPtr = mySamHeader.getRG(rg.c_str()); if((rgPtr == NULL) || (!rgPtr->appendString(rgListStr))) { // No RG info for this record. rgListStr = ".\n"; } fq2 = "."; if(fileNameExt == myFirstFileNameExt) { fq2 = myOutBase.c_str(); if(rg != ".") { fq2 += '.'; fq2 += rg; } fq2 += mySecondFileNameExt; } ifprintf(myFqList, "%s\t%s\t%s\t%s", sm, fileName.c_str(), fq2.c_str(), rgListStr.c_str()); } } else { filePtr = it->second; } } if(filePtr == NULL) { throw(std::runtime_error("Programming ERROR/EXITING: Bam2FastQ filePtr not set.")); return; } flag = samRec.getFlag(); const char* readName = samRec.getReadName(); sequence = samRec.getSequence(); if(myQField.IsEmpty()) { // Read the quality from the quality field quality = samRec.getQuality(); } else { // Read Quality from the specified tag const String* qTagPtr = samRec.getStringTag(myQField.c_str()); if((qTagPtr != NULL) && (qTagPtr->Length() == (int)sequence.length())) { // Use the tag value for quality quality = qTagPtr->c_str(); } else { // Tag was not found, so use the quality field. ++myNumQualTagErrors; if(myNumQualTagErrors == 1) { std::cerr << "Bam2FastQ: " << myQField.c_str() << " tag was not found/invalid, so using the quality field in records without the tag\n"; } quality = samRec.getQuality(); } } if(SamFlag::isReverse(flag) && myReverseComp) { // It is reverse, so reverse compliment the sequence BaseUtilities::reverseComplement(sequence); // Reverse the quality. quality.Reverse(); } else { // Ensure it is all capitalized. int seqLen = sequence.size(); for (int i = 0; i < seqLen; i++) { sequence[i] = (char)toupper(sequence[i]); } } if(myRNPlus) { ifprintf(filePtr, "@%s%s\n%s\n+%s%s\n%s\n", readName, readNameExt, sequence.c_str(), readName, readNameExt, quality.c_str()); } else { ifprintf(filePtr, "@%s%s\n%s\n+\n%s\n", readName, readNameExt, sequence.c_str(), quality.c_str()); } // Release the record. myPool.releaseRecord(&samRec); }
int Bam2FastQ::execute(int argc, char **argv) { // Extract command line arguments. String inFile = ""; bool readName = false; String refFile = ""; String firstOut = ""; String secondOut = ""; String unpairedOut = ""; bool interleave = false; bool noeof = false; bool gzip = false; bool params = false; myOutBase = ""; myNumMateFailures = 0; myNumPairs = 0; myNumUnpaired = 0; mySplitRG = false; myQField = ""; myNumQualTagErrors = 0; myReverseComp = true; myRNPlus = false; myFirstRNExt = DEFAULT_FIRST_EXT; mySecondRNExt = DEFAULT_SECOND_EXT; myCompression = InputFile::DEFAULT; ParameterList inputParameters; BEGIN_LONG_PARAMETERS(longParameterList) LONG_PARAMETER_GROUP("Required Parameters") LONG_STRINGPARAMETER("in", &inFile) LONG_PARAMETER_GROUP("Optional Parameters") LONG_PARAMETER("readName", &readName) LONG_PARAMETER("splitRG", &mySplitRG) LONG_STRINGPARAMETER("qualField", &myQField) LONG_PARAMETER("merge", &interleave) LONG_STRINGPARAMETER("refFile", &refFile) LONG_STRINGPARAMETER("firstRNExt", &myFirstRNExt) LONG_STRINGPARAMETER("secondRNExt", &mySecondRNExt) LONG_PARAMETER("rnPlus", &myRNPlus) LONG_PARAMETER("noReverseComp", &myReverseComp) LONG_PARAMETER("gzip", &gzip) LONG_PARAMETER("noeof", &noeof) LONG_PARAMETER("params", ¶ms) LONG_PARAMETER_GROUP("Optional OutputFile Names") LONG_STRINGPARAMETER("outBase", &myOutBase) LONG_STRINGPARAMETER("firstOut", &firstOut) LONG_STRINGPARAMETER("secondOut", &secondOut) LONG_STRINGPARAMETER("unpairedOut", &unpairedOut) LONG_PHONEHOME(VERSION) END_LONG_PARAMETERS(); inputParameters.Add(new LongParameters ("Input Parameters", longParameterList)); // parameters start at index 2 rather than 1. inputParameters.Read(argc, argv, 2); // If no eof block is required for a bgzf file, set the bgzf file type to // not look for it. if(noeof) { // Set that the eof block is not required. BgzfFileType::setRequireEofBlock(false); } if(gzip) { myCompression = InputFile::GZIP; } // Check to see if the in file was specified, if not, report an error. if(inFile == "") { usage(); inputParameters.Status(); // In file was not specified but it is mandatory. std::cerr << "--in is a mandatory argument, " << "but was not specified" << std::endl; return(-1); } // Cannot specify both interleaved & secondOut since secondOut would be N/A. if(interleave && !secondOut.IsEmpty()) { usage(); inputParameters.Status(); std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n"; return(-1); } // Cannot specify both interleaved & secondOut since secondOut would be N/A. if(interleave && !secondOut.IsEmpty()) { usage(); inputParameters.Status(); std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n"; return(-1); } // Cannot specify both splitRG & firstOut/secondOut/unpairedOut // since it needs a different file for each RG. if(mySplitRG && (!firstOut.IsEmpty() || !secondOut.IsEmpty() || !unpairedOut.IsEmpty())) { usage(); inputParameters.Status(); std::cerr << "ERROR: Cannot specify --splitRG & --firstOut/--secondOut/--unpairedOut.\n"; std::cerr << "Use --outBase instead.\n"; return(-1); } // Cannot specify splitRG & output to stdout. if(mySplitRG && (myOutBase[0] == '-')) { usage(); inputParameters.Status(); std::cerr << "ERROR: Cannot specify --splitRG & write to stdout.\n"; return(-1); } // Check to see if the out file was specified, if not, generate it from // the input filename. if(myOutBase == "") { // Just remove the extension from the input filename. int extStart = inFile.FastFindLastChar('.'); if(extStart <= 0) { myOutBase = inFile; } else { myOutBase = inFile.Left(extStart); } } if(mySplitRG) { std::string fqList = myOutBase.c_str(); fqList += ".list"; myFqList = ifopen(fqList.c_str(), "w"); ifprintf(myFqList, "MERGE_NAME\tFASTQ1\tFASTQ2\tRG\n"); } // Check to see if the first/second/single-ended were specified and // if not, set them. myFirstFileNameExt = "_1.fastq"; mySecondFileNameExt = "_2.fastq"; myUnpairedFileNameExt = ".fastq"; if(interleave) { myFirstFileNameExt = "_interleaved.fastq"; myFirstFileNameExt = "_interleaved.fastq"; } getFileName(firstOut, myFirstFileNameExt); getFileName(secondOut, mySecondFileNameExt); getFileName(unpairedOut, myUnpairedFileNameExt); if(params) { inputParameters.Status(); } // Open the files for reading/writing. // Open prior to opening the output files, // so if there is an error, the outputs don't get created. SamFile samIn; samIn.OpenForRead(inFile, &mySamHeader); // Skip non-primary reads. samIn.SetReadFlags(0, 0x0100); // Open the output files if not splitting RG if(!mySplitRG) { myUnpairedFile = ifopen(unpairedOut, "w", myCompression); // Only open the first file if it is different than an already opened file. if(firstOut != unpairedOut) { myFirstFile = ifopen(firstOut, "w", myCompression); } else { myFirstFile = myUnpairedFile; } // If it is interleaved or the 2nd file is not a new name, set it appropriately. if(interleave || secondOut == firstOut) { mySecondFile = myFirstFile; } else if(secondOut == unpairedOut) { mySecondFile = myUnpairedFile; } else { mySecondFile = ifopen(secondOut, "w", myCompression); } if(myUnpairedFile == NULL) { std::cerr << "Failed to open " << unpairedOut << " so can't convert bam2FastQ.\n"; return(-1); } if(myFirstFile == NULL) { std::cerr << "Failed to open " << firstOut << " so can't convert bam2FastQ.\n"; return(-1); } if(mySecondFile == NULL) { std::cerr << "Failed to open " << secondOut << " so can't convert bam2FastQ.\n"; return(-1); } } if((readName) || (strcmp(mySamHeader.getSortOrder(), "queryname") == 0)) { readName = true; } else { // defaulting to coordinate sorted. samIn.setSortedValidation(SamFile::COORDINATE); } // Setup the '=' translation if the reference was specified. if(!refFile.IsEmpty()) { GenomeSequence* refPtr = new GenomeSequence(refFile); samIn.SetReadSequenceTranslation(SamRecord::BASES); samIn.SetReference(refPtr); } SamRecord* recordPtr; int16_t samFlag; SamStatus::Status returnStatus = SamStatus::SUCCESS; while(returnStatus == SamStatus::SUCCESS) { recordPtr = myPool.getRecord(); if(recordPtr == NULL) { // Failed to allocate a new record. throw(std::runtime_error("Failed to allocate a new SAM/BAM record")); } if(!samIn.ReadRecord(mySamHeader, *recordPtr)) { // Failed to read a record. returnStatus = samIn.GetStatus(); continue; } // Have a record. Check to see if it is a pair or unpaired read. samFlag = recordPtr->getFlag(); if(SamFlag::isPaired(samFlag)) { if(readName) { handlePairedRN(*recordPtr); } else { handlePairedCoord(*recordPtr); } } else { ++myNumUnpaired; writeFastQ(*recordPtr, myUnpairedFile, myUnpairedFileNameExt); } } // Flush All cleanUpMateMap(0, true); if(returnStatus == SamStatus::NO_MORE_RECS) { returnStatus = SamStatus::SUCCESS; } samIn.Close(); closeFiles(); // Output the results std::cerr << "\nFound " << myNumPairs << " read pairs.\n"; std::cerr << "Found " << myNumUnpaired << " unpaired reads.\n"; if(myNumMateFailures != 0) { std::cerr << "Failed to find mates for " << myNumMateFailures << " reads, so they were written as unpaired\n" << " (not included in either of the above counts).\n"; } if(myNumQualTagErrors != 0) { std::cerr << myNumQualTagErrors << " records did not have tag " << myQField.c_str() << " or it was invalid, so the quality field was used for those records.\n"; } return(returnStatus); }
// Open a sam/bam file for reading with the specified filename. bool SamFile::OpenForRead(const char * filename, SamFileHeader* header) { // Reset for any previously operated on files. resetFile(); int lastchar = 0; while (filename[lastchar] != 0) lastchar++; // If at least one character, check for '-'. if((lastchar >= 1) && (filename[0] == '-')) { // Read from stdin - determine type of file to read. // Determine if compressed bam. if(strcmp(filename, "-.bam") == 0) { // Compressed bam - open as bgzf. // -.bam is the filename, read compressed bam from stdin filename = "-"; myFilePtr = new InputFile; // support recover mode - this switches in a reader // capable of recovering from bad BGZF compression blocks. myFilePtr->setAttemptRecovery(myAttemptRecovery); myFilePtr->openFile(filename, "rb", InputFile::BGZF); myInterfacePtr = new BamInterface; // Read the magic string. char magic[4]; ifread(myFilePtr, magic, 4); } else if(strcmp(filename, "-.ubam") == 0) { // uncompressed BAM File. // -.ubam is the filename, read uncompressed bam from stdin. // uncompressed BAM is still compressed with BGZF, but using // compression level 0, so still open as BGZF since it has a // BGZF header. filename = "-"; // Uncompressed, so do not require the eof block. #ifdef __ZLIB_AVAILABLE__ BgzfFileType::setRequireEofBlock(false); #endif myFilePtr = ifopen(filename, "rb", InputFile::BGZF); myInterfacePtr = new BamInterface; // Read the magic string. char magic[4]; ifread(myFilePtr, magic, 4); } else if((strcmp(filename, "-") == 0) || (strcmp(filename, "-.sam") == 0)) { // SAM File. // read sam from stdin filename = "-"; myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED); myInterfacePtr = new SamInterface; } else { std::string errorMessage = "Invalid SAM/BAM filename, "; errorMessage += filename; errorMessage += ". From stdin, can only be '-', '-.sam', '-.bam', or '-.ubam'"; myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str()); delete myFilePtr; myFilePtr = NULL; return(false); } } else { // Not from stdin. Read the file to determine the type. myFilePtr = new InputFile; // support recovery mode - this conditionally enables a reader // capable of recovering from bad BGZF compression blocks. myFilePtr->setAttemptRecovery(myAttemptRecovery); bool rc = myFilePtr->openFile(filename, "rb", InputFile::DEFAULT); if (rc == false) { std::string errorMessage = "Failed to Open "; errorMessage += filename; errorMessage += " for reading"; myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str()); delete myFilePtr; myFilePtr = NULL; return(false); } char magic[4]; ifread(myFilePtr, magic, 4); if (magic[0] == 'B' && magic[1] == 'A' && magic[2] == 'M' && magic[3] == 1) { myInterfacePtr = new BamInterface; // Set that it is a bam file open for reading. This is needed to // determine if an index file can be used. myIsBamOpenForRead = true; } else { // Not a bam, so rewind to the beginning of the file so it // can be read. ifrewind(myFilePtr); myInterfacePtr = new SamInterface; } } // File is open for reading. myIsOpenForRead = true; // Read the header if one was passed in. if(header != NULL) { return(ReadHeader(*header)); } // Successfully opened the file. myStatus = SamStatus::SUCCESS; return(true); }
// Open a sam/bam file for writing with the specified filename. bool SamFile::OpenForWrite(const char * filename, SamFileHeader* header) { // Reset for any previously operated on files. resetFile(); int lastchar = 0; while (filename[lastchar] != 0) lastchar++; if (lastchar >= 4 && filename[lastchar - 4] == 'u' && filename[lastchar - 3] == 'b' && filename[lastchar - 2] == 'a' && filename[lastchar - 1] == 'm') { // BAM File. // if -.ubam is the filename, write uncompressed bam to stdout if((lastchar == 6) && (filename[0] == '-') && (filename[1] == '.')) { filename = "-"; } myFilePtr = ifopen(filename, "wb0", InputFile::BGZF); myInterfacePtr = new BamInterface; } else if (lastchar >= 3 && filename[lastchar - 3] == 'b' && filename[lastchar - 2] == 'a' && filename[lastchar - 1] == 'm') { // BAM File. // if -.bam is the filename, write compressed bam to stdout if((lastchar == 5) && (filename[0] == '-') && (filename[1] == '.')) { filename = "-"; } myFilePtr = ifopen(filename, "wb", InputFile::BGZF); myInterfacePtr = new BamInterface; } else { // SAM File // if - (followed by anything is the filename, // write uncompressed sam to stdout if((lastchar >= 1) && (filename[0] == '-')) { filename = "-"; } myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED); myInterfacePtr = new SamInterface; } if (myFilePtr == NULL) { std::string errorMessage = "Failed to Open "; errorMessage += filename; errorMessage += " for writing"; myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str()); return(false); } myIsOpenForWrite = true; // Write the header if one was passed in. if(header != NULL) { return(WriteHeader(*header)); } // Successfully opened the file. myStatus = SamStatus::SUCCESS; return(true); }