Пример #1
0
void Bam2FastQ::closeFiles()
{
    // NULL out any duplicate file pointers
    // so files are only closed once.
    if(myFirstFile == myUnpairedFile)
    {
        myFirstFile = NULL;
    }
    if(mySecondFile == myUnpairedFile)
    {
        mySecondFile = NULL;
    }
    if(mySecondFile == myFirstFile)
    {
        mySecondFile = NULL;
    }

    if(myUnpairedFile != NULL)
    {
        ifclose(myUnpairedFile);
        myUnpairedFile = NULL;
    }
    if(myFirstFile != NULL)
    {
        ifclose(myFirstFile);
        myFirstFile = NULL;
    }
    if(mySecondFile != NULL)
    {
        ifclose(mySecondFile);
        mySecondFile = NULL;
    }
}
Пример #2
0
void Bam2FastQ::closeFiles()
{
    // NULL out any duplicate file pointers
    // so files are only closed once.
    if(myFirstFile == myUnpairedFile)
    {
        myFirstFile = NULL;
    }
    if(mySecondFile == myUnpairedFile)
    {
        mySecondFile = NULL;
    }
    if(mySecondFile == myFirstFile)
    {
        mySecondFile = NULL;
    }

    if(myUnpairedFile != NULL)
    {
        ifclose(myUnpairedFile);
        myUnpairedFile = NULL;
    }
    if(myFirstFile != NULL)
    {
        ifclose(myFirstFile);
        myFirstFile = NULL;
    }
    if(mySecondFile != NULL)
    {
        ifclose(mySecondFile);
        mySecondFile = NULL;
    }

    if(myFqList != NULL)
    {
        ifclose(myFqList);
        myFqList = NULL;
    }

    // Loop through the fastq map and close those files.
    for (OutFastqMap::iterator it=myOutFastqs.begin(); 
         it!=myOutFastqs.end(); ++it)
    {
        ifclose(it->second);
        it->second = NULL;
    }
    myOutFastqs.clear();
}
Пример #3
0
// Close a FastQFile.
FastQStatus::Status FastQFile::closeFile()
{
   int closeStatus = 0; // Success.

   // If a file has been opened, close it.
   if(myFile != NULL)
   {
      // Close the file.
      closeStatus = ifclose(myFile);
      myFile = NULL;
   }
   if(closeStatus == 0)
   {
      // Success - either there wasn't a file to close or it was closed
      // successfully.
      return(FastQStatus::FASTQ_SUCCESS);
   }
   else
   {
      std::string errorMessage = "Failed to close file: ";
      errorMessage += myFileName.c_str();
      logMessage(errorMessage.c_str());
      return(FastQStatus::FASTQ_CLOSE_ERROR);
   }
}
Пример #4
0
void testReadLine()
{
    IFILE filePtr = ifopen("testFiles/testFile.txt", "rb");
    assert(filePtr != NULL);
    
    String line = "";
    line.ReadLine(filePtr);

    assert(line == "  Hello, I am a testFile.  ");

    line.Trim();
    assert(line == "Hello, I am a testFile.");


    // Does not compile in current version, but compiles in old verison.
    // This can be added back in to ensure that it will catch the difference
    // in return value for ReadLine (now: int; used to be: string&)
    //    testMethod(line.ReadLine(filePtr));
    line.ReadLine(filePtr);
    assert(temp1 == 0);
    testMethod(line);
    assert(temp1 == 1);

    //    line.ReadLine(filePtr).Trim();
    line.ReadLine(filePtr);
    line.Trim();

    assert(line == "ThirdLine.");

    ifclose(filePtr);
}
Пример #5
0
// Reset variables for each file.
void GlfFile::resetFile()
{
    // Close the file.
    if (myFilePtr != NULL)
    {
        // If we already have an open file, close it.
        // First check to see if an end record needs to be written, which
        // is the case if the state is RECORD.
        if(myNextSection == RECORD)
        {
            if(!writeRecord(myEndMarker))
            {
                // Failed to write the end marker record.
                myStatus.setStatus(GlfStatus::FAIL_IO,
                                   "Failed to write end of chromosome/section marker.");
                throw(GlfException(myStatus));
            }
        }
        ifclose(myFilePtr);
        myFilePtr = NULL;
    }

    myIsOpenForRead = false;
    myIsOpenForWrite = false;
    myRecordCount = 0;
    myStatus = GlfStatus::SUCCESS;
    myNextSection = HEADER;
}
Пример #6
0
void StringHash::ReadLinesFromFile(const char * filename)
{
    IFILE f = ifopen(filename, "rb");
    if (f == NULL) return;
    ReadLinesFromFile(f);
    ifclose(f);
}
Пример #7
0
BedFile::~BedFile() {
  if ( iBimFile != NULL ) {
    ifclose(iBimFile);
  }
  iBimFile = NULL;

  if ( iFamFile != NULL ) {
    ifclose(iFamFile);
  }
  iFamFile = NULL;
  if ( iFile != NULL ) {
    ifclose(iFile);
  }
  if ( pBedBuffer != NULL ) {
    delete[] pBedBuffer;
  }
}
Пример #8
0
void Imputation::PrintInfoFile(HaplotypeSet &rHap,HaplotypeSet &tHap,  ImputationStatistics &stats)

{
    cout<<endl<<" Writing summary (.info) files ... "<<endl;
    IFILE info = ifopen(outFile + ".info", "wb");
    ifprintf(info, "SNP\tREF(0)\tALT(1)\tALT_Frq\tMAF\tAvgCall\tRsq\tGenotyped\tLooRsq\tEmpR\tEmpRsq\tDose0\tDose1\n");


    int i=0;
    for (int index =0; index < rHap.RefTypedTotalCount; index++)
    {

        if(rHap.RefTypedIndex[index]==-1)
        {

            if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex)
            {
                ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t%.5f\t%.5f\t",
                RsId? rHap.VariantList[i].rsid.c_str(): rHap.VariantList[i].name.c_str(),
                rHap.VariantList[i].refAlleleString.c_str(),
                rHap.VariantList[i].altAlleleString.c_str(),
                stats.AlleleFrequency(i),
                stats.AlleleFrequency(i) > 0.5 ? 1.0 - stats.AlleleFrequency(i) : stats.AlleleFrequency(i),
                stats.AverageCallScore(i),
                stats.Rsq(i));

                if (!tHap.missing[i])
                {
                    ifprintf(info, "Genotyped\t%.3f\t%.3f\t%.5f\t%.5f\t%.5f\n",
                      stats.LooRsq(i), stats.EmpiricalR(i), stats.EmpiricalRsq(i),
                      stats.LooMajorDose(i), stats.LooMinorDose(i));
                }
                else
                 ifprintf(info, "Imputed\t-\t-\t-\t-\t-\n");
            }
            i++;
        }
        else
        {
            variant ThisTypedVariant =tHap.TypedOnlyVariantList[rHap.RefTypedIndex[index]];

            ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t-\t-\tTyped_Only\t-\t-\t-\t-\t-\n",
            RsId? ThisTypedVariant.rsid.c_str(): ThisTypedVariant.name.c_str(),
            ThisTypedVariant.refAlleleString.c_str(),
            ThisTypedVariant.altAlleleString.c_str(),
            tHap.AlleleFreq[rHap.RefTypedIndex[index]],
            tHap.AlleleFreq[rHap.RefTypedIndex[index]] > 0.5 ?
                        1.0 - tHap.AlleleFreq[rHap.RefTypedIndex[index]] : tHap.AlleleFreq[rHap.RefTypedIndex[index]]);

        }
    }
    ifclose(info);


    cout<<endl<<" Summary information written to          : "<<outFile<<".info"<<endl;
   }
Пример #9
0
void Imputation::FlushPartialVcf(HaplotypeSet &rHap,HaplotypeSet &tHap,HaplotypeSet &PartialDosage, string &filename,int &Index)
{

    string tempFileIndex(outFile),tempFileIndex1(outFile);
    IFILE vcfdosepartial = ifopen(filename.c_str(), "wb", InputFile::BGZF);

    for(int hapId=0;hapId<(int)PartialDosage.individualName.size();hapId++)
    {
        ifprintf(vcfdosepartial,"\t%s",PartialDosage.individualName[hapId].c_str());
    }
    ifprintf(vcfdosepartial,"\n");

    int i=0;
    for (int index =0; index < rHap.RefTypedTotalCount; index++)
    {

        if(rHap.RefTypedIndex[index]==-1)
        {

            if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex)
            {
                bool majorIsReference=false;
                if(!rHap.major[i])
                    majorIsReference=true;

                if(!tHap.AllMaleTarget)
                    PartialDosage.PrintDosageForVcfOutputForID(vcfdosepartial,i, majorIsReference,rHap.VariantList[i].refAllele);
                else
                    PartialDosage.PrintDosageForVcfOutputForIDMaleSamples(vcfdosepartial,i, majorIsReference,rHap.VariantList[i].refAllele);

                ifprintf(vcfdosepartial,"\n");

            }
            i++;

        }
        else
        {


            if(!tHap.AllMaleTarget)
                PartialDosage.PrintDosageGWASOnlyForVcfOutputForID
                (tHap,vcfdosepartial,rHap.RefTypedIndex[index]);
            else
                PartialDosage.PrintDosageGWASOnlyForVcfOutputForIDMaleSamples
                (tHap,vcfdosepartial,rHap.RefTypedIndex[index]);
            ifprintf(vcfdosepartial,"\n");
        }

    }

    ifclose(vcfdosepartial);



}
Пример #10
0
VcfFile::~VcfFile()
{
    // Close the file.
    if (myFilePtr != NULL)
    {
        // If we already have an open file, close it.
        ifclose(myFilePtr);
        myFilePtr = NULL;
    }
}
Пример #11
0
void GCContent::LoadRegions(String & regionsFile, GenomeSequence &genome, bool invertRegion)
{
    if(regionsFile.Length()==0) return;
    if(genome.sequenceLength()==0) error("No reference genome loaded!\n");

    IFILE fhRegions;
    fhRegions = ifopen(regionsFile.c_str(),"r");
    if(fhRegions==NULL)
        error("Open regions file %s failed!\n", regionsFile.c_str());

    regionIndicator.resize(genome.sequenceLength());

    StringArray tokens;
    String buffer;
    int len;

    fprintf(stderr, "Loading region list...");

    while (!ifeof(fhRegions)){
        buffer.ReadLine(fhRegions);
        if (buffer.IsEmpty() || buffer[0] == '#') continue;

        tokens.AddTokens(buffer, WHITESPACE);
        if(tokens.Length() < 3) continue;

        genomeIndex_t startGenomeIndex = 0;
        int chromosomeIndex = tokens[1].AsInteger();

        // use chromosome name (token[0]) and position (token[1]) to query genome index.
        startGenomeIndex = genome.getGenomePosition(tokens[0].c_str(), chromosomeIndex);

        if(startGenomeIndex >= regionIndicator.size() ) {
            //fprintf(stderr, "WARNING: region list section %s position %u is not found in the reference and skipped...\n", tokens[0].c_str(), chromosomeIndex);
            continue;
        }

        len = tokens[2].AsInteger() - tokens[1].AsInteger() + 1;
        for(uint32_t i=startGenomeIndex; i<startGenomeIndex+len; i++)
            regionIndicator[i] = true;

        tokens.Clear();
        buffer.Clear();
    }

    if (invertRegion) {
        fprintf(stderr, " invert region...");
        for (uint32_t i = 0; i < regionIndicator.size(); i++) {
            regionIndicator[i] = !regionIndicator[i];
        }
    }

    ifclose(fhRegions);
    fprintf(stderr, "DONE!\n");
}
Пример #12
0
void MarkovParameters::WriteErrorRates(StringArray & markerNames, const char * filename)
   {
   IFILE output = ifopen(filename, "wb");

   if (output == NULL) return;

   ifprintf(output, "MarkerName\tErrorRate\n");
   for (int i = 0; i < markers; i++)
      ifprintf(output, "%s\t%.5g\n", (const char *) markerNames[i], E[i]);

   ifclose(output);
   }
Пример #13
0
void glfHandler::Rewind()
{
    if (isOpen())
    {
        ifrewind(handle);

        if (!ReadHeader())
            ifclose(handle);

        endOfSection = true;
    }
}
Пример #14
0
bool StringAlias::ReadFromFile(const char * filename)
{
    IFILE input = ifopen(filename, "rt");

    if (input == NULL)
        return false;

    ReadFromFile(input);

    ifclose(input);

    return true;
}
Пример #15
0
void MarkovParameters::WriteCrossoverRates(StringArray & markerNames, const char * filename)
   {
   IFILE output = ifopen(filename, "wb");

   if (output == NULL) return;

   ifprintf(output, "Interval\tSwitchRate\n");
   for (int i = 0; i < markers - 1; i++)
      ifprintf(output, "%s-%s\t%.5g\n",
               (const char *) markerNames[i],
               (const char *) markerNames[i+1], R[i]);

   ifclose(output);
   }
Пример #16
0
void VcfFile::reset()
{
    // Reset the child class.
    resetFile();

    // Close the file.
    if (myFilePtr != NULL)
    {
        // If we already have an open file, close it.
        ifclose(myFilePtr);
        myFilePtr = NULL;
    }
    myNumRecords = 0;
}
Пример #17
0
EXTERN_C void sread_atf(HDRTYPE* hdr) {

	if (VERBOSE_LEVEL>6) fprintf(stdout,"SREAD ATF [%i,%i]\n",(unsigned)hdr->NRec, (unsigned)hdr->SPR);

	if (hdr->AS.rawdata != NULL) return;
	//if (hdr->NRec * hdr->SPR > 0)
		hdr->AS.rawdata = malloc(hdr->NRec * hdr->SPR * hdr->AS.bpb);

	ifseek(hdr, hdr->HeadLen, SEEK_SET);

	size_t ll;
	char *line = NULL;

	if (VERBOSE_LEVEL>6) fprintf(stdout,"SREAD ATF\n");

	size_t ln = 0;
	while (~ifeof(hdr)) {
		if (line!=NULL) { free(line); line=NULL; }  // allocate line buffer as needed
		ssize_t nc = getline(&line, &ll, hdr->FILE.FID);
		if (nc < 0) break;

		if (VERBOSE_LEVEL>8) fprintf(stdout,"SREAD ATF 2 %i\t<%s>\n",(unsigned)ln,line );

		if ((hdr->NRec * hdr->SPR) <= (ln+1) ) {
			hdr->NRec = max(1024, ln*2);
			hdr->AS.rawdata = realloc(hdr->AS.rawdata, hdr->NRec * hdr->SPR * hdr->AS.bpb);
		}

		if (VERBOSE_LEVEL>8) fprintf(stdout,"SREAD ATF 4 %i\t<%s>\n",(unsigned)ln,line );

		char *str = strtok(line,"\t");
		typeof(hdr->NS) k;
		for (k = 0; k < hdr->NS; k++) {
			*(double*)(hdr->AS.rawdata + ln*hdr->AS.bpb + hdr->CHANNEL[k].bi) = strtod(str, &str);
			// extract next value
			//str = strtok(NULL,"\t");
		}
		if (VERBOSE_LEVEL>8) fprintf(stdout,"SREAD ATF 6 %i\t<%s>\n",(unsigned)ln,line );

		ln++;
	}
	free(line);
	ifclose(hdr);

	hdr->NRec = ln;
	hdr->AS.first  = 0;
	hdr->AS.length = hdr->NRec;

}
Пример #18
0
// Reset variables for each file.
void SamFile::resetFile()
{
    // Close the file.
    if (myFilePtr != NULL)
    {
        // If we already have an open file, close it.
        ifclose(myFilePtr);
        myFilePtr = NULL;
    }
    if(myInterfacePtr != NULL)
    {
        delete myInterfacePtr;
        myInterfacePtr = NULL;
    }

    myIsOpenForRead = false;
    myIsOpenForWrite = false;
    myHasHeader = false;
    mySortedType = UNSORTED;
    myPrevReadName.Clear();
    myPrevCoord = -1;
    myPrevRefID = 0;
    myRecordCount = 0;
    myStatus = SamStatus::SUCCESS;

    // Reset indexed bam values.
    myIsBamOpenForRead = false;
    myRefID = BamIndex::REF_ID_ALL;
    myStartPos = -1;
    myEndPos = -1;
    myNewSection = false;
    myOverlapSection = true;
    myCurrentChunkEnd = 0;
    myChunksToRead.clear();
    if(myBamIndex != NULL)
    {
        delete myBamIndex;
        myBamIndex = NULL;
    }

    // If statistics are being generated, reset them.
    if(myStatistics != NULL)
    {
        myStatistics->reset();
    }

    myRefName.clear();
}
Пример #19
0
void put_cde_slots(IFILE *file, int ifaxq)					/*;put_cde_slots*/
{
	long	dpos;

	dpos = iftell(file); /* get current position */
	putnum(file, "n-code_slots", tup_size(CODE_SLOTS));
	putnum(file, "n-data-slots", tup_size(DATA_SLOTS));
	putnum(file, "n-exception-slots", tup_size(EXCEPTION_SLOTS));
	put_slot(file, CODE_SLOTS);
	put_slot(file, DATA_SLOTS);
	put_slot(file, EXCEPTION_SLOTS);
	/* now replace word at start of  file with long giving offset to 
	 *start of information just written.
	 */
	file->fh_slots = dpos;
	ifclose(file);
}
Пример #20
0
void VcfFile::reset() {
  if ( iFile != NULL ) 
    ifclose(iFile);
  iFile = NULL;
  for(int i=0; i < (int) vpVcfInds.size(); ++i) {
    delete vpVcfInds[i];
  }
  vpVcfInds.clear();
  for(int i=0; i < (int) vpVcfMarkers.size(); ++i) {
    delete vpVcfMarkers[i];
  }
  vpVcfMarkers.clear();
  asMetaKeys.Clear();
  asMetaValues.Clear();

  nNumLines = 0;
}
Пример #21
0
bool glfHandler::Open(const String & filename)
{
    isStub = false;
    handle = ifopen(filename, "rb");

    if (handle == NULL)
    {
        isStub = true;
        return false;
    }

    if (!ReadHeader())
        ifclose(handle);

    endOfSection = true;

    return handle != NULL;
}
Пример #22
0
bool FilterStat::writeMergedVcf(const char* outFile) {
  IFILE oFile = ifopen(outFile,"wb");
  if ( oFile == NULL ) {
    Logger::gLogger->error("Cannot open output file %s",outFile);
  }

  VcfFile vcf;
  vcf.setSiteOnly(false);
  vcf.setParseValues(true);
  vcf.openForRead(sAnchorVcf.c_str(),1);  

  vcf.printVCFHeader(oFile);

  VcfMarker* pMarker;
  String STC, STR;
  for( int i=0; vcf.iterateMarker(); ++i ) {
     pMarker = vcf.getLastMarker();

     int c[FILTER_STAT_COUNTS];
     for(int j=0; j < FILTER_STAT_COUNTS; ++j) {
       c[j] = vCounts[FILTER_STAT_COUNTS*i+j];
     }
     STC.printf("%d,%d,%d,%d,%d,%d",c[0],c[1],c[2],c[3],c[4],c[5]);

     if ( ( c[0]+c[1] > 4 ) && ( c[1]+c[3] > 4 ) && ( c[0]+c[2] > 4 ) && ( c[1]+c[3] > 4 ) ) { 
       STR.printf("%.2lf",((c[0]+.5)*(c[3]+.5)-(c[1]+.5)*(c[2]+.5))/sqrt((c[0]+c[1]+1.)*(c[2]+c[3]+1.)*(c[0]+c[2]+1.)*(c[1]+c[3]+1.)));
     }
     else {
       STR = "0";
     }
     pMarker->asInfoKeys.Add("STC");
     pMarker->asInfoKeys.Add("STR");
     pMarker->asInfoValues.Add(STC);
     pMarker->asInfoValues.Add(STR);

     pMarker->printVCFMarker(oFile,false);
  }
  ifclose(oFile);
  return true;
}
Пример #23
0
// Read & parse the specified index file.
SamStatus::Status BamIndex::readIndex(const char* filename)
{
    // Reset the index from anything that may previously be set.
    resetIndex();

    IFILE indexFile = ifopen(filename, "rb");

    // Failed to open the index file.
    if(indexFile == NULL)
    {
        return(SamStatus::FAIL_IO);
    }

    // generate the bam index structure.

    // Read the magic string.
    char magic[4];
    if(ifread(indexFile, magic, 4) != 4)
    {
        // Failed to read the magic
        ifclose(indexFile);
        return(SamStatus::FAIL_IO);
    }

    // If this is not an index file, set num references to 0. 
    if (magic[0] != 'B' || magic[1] != 'A' || magic[2] != 'I' || magic[3] != 1)
    {
        // Not a BAM Index file.
        ifclose(indexFile);
        return(SamStatus::FAIL_PARSE);
    }

    // It is a bam index file.
    // Read the number of reference sequences.
    if(ifread(indexFile, &n_ref, 4) != 4)
    {
        // Failed to read.
        ifclose(indexFile);
        return(SamStatus::FAIL_IO);
    }

    // Size the references.
    myRefs.resize(n_ref);

    for(int refIndex = 0; refIndex < n_ref; refIndex++)
    {
        // Read each reference.
        Reference* ref = &(myRefs[refIndex]);
        
        // Read the number of bins.
        if(ifread(indexFile, &(ref->n_bin), 4) != 4)
        {
            // Failed to read the number of bins.
            // Return failure.
            ifclose(indexFile);
            return(SamStatus::FAIL_PARSE);
        }

        // If there are no bins, then there are no
        // mapped/unmapped reads.
        if(ref->n_bin == 0)
        {
            ref->n_mapped = 0;
            ref->n_unmapped = 0;
        }

        // Resize the bins so they can be indexed by bin number.
        ref->bins.resize(ref->n_bin + 1);
        
        // Read each bin.
        for(int binIndex = 0; binIndex < ref->n_bin; binIndex++)
        {
            uint32_t binNumber;

            // Read in the bin number.
            if(ifread(indexFile, &(binNumber), 4) != 4)
            {
                // Failed to read the bin number.
                // Return failure.
                ifclose(indexFile);
                return(SamStatus::FAIL_IO);
            }

            // Add the bin to the reference and get the
            // pointer back so the values can be set in it.
            Bin* binPtr = &(ref->bins[binIndex]);
            binPtr->bin = binNumber;
         
            // Read in the number of chunks.
            if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4)
            {
                // Failed to read number of chunks.
                // Return failure.
                ifclose(indexFile);
                return(SamStatus::FAIL_IO);
            }

            // Read in the chunks.
            // Allocate space for the chunks.
            uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk);
            binPtr->chunks = (Chunk*)malloc(sizeOfChunkList);
            if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList)
            {
                // Failed to read the chunks.
                // Return failure.
                ifclose(indexFile);
                return(SamStatus::FAIL_IO);
            }

            // Determine the min/max for this bin if it is not the max bin.
            if(binPtr->bin != MAX_NUM_BINS)
            {
                for(int i = 0; i < binPtr->n_chunk; i++)
                {
                    if(binPtr->chunks[i].chunk_beg < ref->minChunkOffset)
                    {
                        ref->minChunkOffset = binPtr->chunks[i].chunk_beg;
                    }
                    if(binPtr->chunks[i].chunk_end > ref->maxChunkOffset)
                    {
                        ref->maxChunkOffset = binPtr->chunks[i].chunk_end;
                    }
                    if(binPtr->chunks[i].chunk_end > maxOverallOffset)
                    {
                        maxOverallOffset = binPtr->chunks[i].chunk_end;
                    }
                }
            }
            else
            {
                // Mapped/unmapped are the last chunk of the
                // MAX BIN
                ref->n_mapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_beg;
                ref->n_unmapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_end;
            }
        }

        // Read the number of intervals.
        if(ifread(indexFile, &(ref->n_intv), 4) != 4)
        {
            // Failed to read, set to 0.
            ref->n_intv = 0;
            // Return failure.
            ifclose(indexFile);
            return(SamStatus::FAIL_IO);
        }

        // Allocate space for the intervals and read them.
        uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t);
        ref->ioffsets = (uint64_t*)malloc(linearIndexSize);
        if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize)
        {
            // Failed to read the linear index.
            // Return failure.
            ifclose(indexFile);
            return(SamStatus::FAIL_IO);
        }
    }

    int32_t numUnmapped = 0;
    if(ifread(indexFile, &numUnmapped, sizeof(int32_t)) == sizeof(int32_t))
    {
        myUnMappedNumReads = numUnmapped;
    }

    // Successfully read the bam index file.
    ifclose(indexFile);
    return(SamStatus::SUCCESS);
}
Пример #24
0
int main(int argc, char ** argv)
   {
   setbuf(stdout, NULL);

   time_t start = time(NULL);

   printf("MiniMac - Imputation into phased haplotypes\n"
          "(c) 2011 Goncalo Abecasis\n");
#ifdef __VERSION__
   printf("VERSION 5.0\n");
#else
   printf("UNDOCUMENTED RELEASE\n");
#endif

   int rounds = 5, states = 200, cpus = 0;
   bool em = false, gzip = false, phased = false;

   String referenceHaplotypes, referenceSnps;
   String haplotypes, snps;
   String prefix("minimac");
   String firstMarker, lastMarker;

   String recombinationRates, errorRates;

   BEGIN_LONG_PARAMETERS(longParameters)
      LONG_PARAMETER_GROUP("Reference Haplotypes")
         LONG_STRINGPARAMETER("refHaps", &referenceHaplotypes)
         LONG_STRINGPARAMETER("refSnps", &referenceSnps)
      LONG_PARAMETER_GROUP("Target Haplotypes")
         LONG_STRINGPARAMETER("haps", &haplotypes)
         LONG_STRINGPARAMETER("snps", &snps)
      LONG_PARAMETER_GROUP("Starting Parameters")
         LONG_STRINGPARAMETER("rec", &recombinationRates)
         LONG_STRINGPARAMETER("erate", &errorRates)
      LONG_PARAMETER_GROUP("Parameter Fitting")
         LONG_INTPARAMETER("rounds", &rounds)
         LONG_INTPARAMETER("states", &states)
         LONG_PARAMETER("em", &em)
      LONG_PARAMETER_GROUP("Output Files")
         LONG_STRINGPARAMETER("prefix", &prefix)
         LONG_PARAMETER("phased", &phased)
         LONG_PARAMETER("gzip", &gzip)
//    LONG_PARAMETER_GROUP("Clipping Window")
//      LONG_STRINGPARAMETER("start", &firstMarker)
//      LONG_STRINGPARAMETER("stop", &lastMarker)
#ifdef _OPENMP
      LONG_PARAMETER_GROUP("Multi-Threading")
         LONG_INTPARAMETER("cpus", &cpus)
#endif
   END_LONG_PARAMETERS();

   ParameterList pl;

   pl.Add(new LongParameters("Command Line Options", longParameters));
   pl.Read(argc, argv);
   pl.Status();

#ifdef _OPENMP
   if (cpus > 0)
      omp_set_num_threads(cpus);
#endif

   // Read marker list
   printf("Reading Reference Marker List ...\n");

   StringArray refMarkerList;
   refMarkerList.Read(referenceSnps);

   // Index markers
   StringIntHash referenceHash;
   for (int i = 0; i < refMarkerList.Length(); i++)
      referenceHash.Add(refMarkerList[i].Trim(), i);

   printf("  %d Markers in Reference Haplotypes...\n\n", refMarkerList.Length());

   // Load reference haplotypes
   printf("Loading reference haplotypes ...\n");
   HaplotypeSet reference;

   reference.markerCount = refMarkerList.Length();
   reference.LoadHaplotypes(referenceHaplotypes);

   printf("  %d Reference Haplotypes Loaded ...\n\n", reference.count);

   // Read framework marker list
   printf("Reading Framework Marker List ...\n");
   StringArray markerList;
   markerList.Read(snps);

   ClipReference(reference, refMarkerList, referenceHash, markerList,
                 firstMarker, lastMarker);

   // Crossref Marker Names to Reference Panel Positions
   IntArray markerIndex;
   markerIndex.Dimension(markerList.Length());

   int matches = 0;

   for (int i = 0; i < markerList.Length(); i++)
      {
      markerIndex[i] = referenceHash.Integer(markerList[i].Trim());

      if (markerIndex[i] >= 0) matches++;
      }

   printf("  %d Markers in Framework Haplotypes Overlap Reference ...\n", matches);

   if (matches == 0)
      error("No markers overlap between target and reference\n"
            "Please check correct reference is being used and markers are named consistently");

   printf("  %d Other Markers in Framework Haplotypes Discarded ...\n\n", markerList.Length() - matches);

   // Check for flips in reference vs. target haplotypes
   int flips = 0;
   int previous = -1;
   for (int i = 0; i < markerIndex.Length(); i++)
      if (markerIndex[i] >= 0)
         if (markerIndex[i] < previous)
            {
            if (flips++ < 10)
               printf("  -> Marker %s precedes %s in reference, but follows it in target\n",
                     (const char *) refMarkerList[previous],
                     (const char *) markerList[i]);
            previous = markerIndex[i];
            }
   if (flips > 10)
      printf("  -> %d Additional Marker Order Changes Not Listed\n", flips - 10);
   if (flips)
      printf("  %d Marker Pairs Change Order in Target vs Framework Haplotypes\n", flips);

   // Load target haplotypes
   printf("Loading target haplotypes ...\n");
   HaplotypeSet target;

   target.markerCount = markerList.Length();
   target.LoadHaplotypes(haplotypes, true);

   reference.CalculateFrequencies();
   target.CalculateFrequencies();
   target.CompareFrequencies(reference, markerIndex, markerList);

   printf("  %d Target Haplotypes Loaded ...\n\n", target.count);

   int startIndex = firstMarker.IsEmpty() ? 0 : referenceHash.Integer(firstMarker);
   int stopIndex = lastMarker.IsEmpty() ? reference.markerCount - 1 : referenceHash.Integer(lastMarker);

   if (startIndex < 0 || stopIndex < 0)
      error("Clipping requested, but no position available for one of the endpoints");

   printf("Setting up Markov Model...\n\n");

   // Setup Markov Model
   MarkovParameters mp;

   mp.Allocate(reference.markerCount);

   if (rounds > 0)
      printf("Initializing Model Parameters (using %s and up to %d haplotypes)\n",
             em ? "E-M" : "MCMC", states);

   // Simple initial estimates of error and recombination rate
   for (int i = 0; i < reference.markerCount; i++)
      mp.E[i] = 0.01;

   for (int i = 0; i < reference.markerCount - 1; i++)
      mp.R[i] = 0.001;

   if (mp.ReadErrorRates(errorRates))
      printf("  Updated error rates using data in %s ...\n", (const char *) errorRates);

   if (mp.ReadCrossoverRates(recombinationRates))
      printf("  Updated recombination rates using %s ...\n", (const char *) recombinationRates);

   // Parameter estimation loop
   for (int round = 0; round < rounds; round++)
      {
      printf("  Round %d of Parameter Refinement ...\n", round + 1);

      int iterations = states < reference.count ? states : reference.count;

      MarkovModel original;
      original.CopyParameters(mp);

      #pragma omp parallel for
      for (int i = 0; i < iterations; i++)
         {
         MarkovModel mm;

         mm.Allocate(reference.markerCount, reference.count - 1);
         mm.CopyParameters(original);

         // Reference leave one out (loo) panel
         char ** reference_loo = new char * [reference.count - 1];
         for (int in = 0, out = 0; in < reference.count; in++)
            if (in != i)
               reference_loo[out++] = reference.haplotypes[in];

         mm.WalkLeft(reference.haplotypes[i], reference_loo, reference.freq);

         if (em)
            mm.CountExpected(reference.haplotypes[i], reference_loo, reference.freq);
         else
            {
            #pragma omp critical
            { mm.ProfileModel(reference.haplotypes[i], reference_loo, reference.freq); }
            }

         delete [] reference_loo;

         #pragma omp critical
         mp += mm;
         }

      if (round >= rounds / 2)
         {
         int iterations = states < target.count ? states : target.count;

         #pragma omp parallel for
         for (int i = 0; i < iterations; i++)
            {
            MarkovModel mm;

            mm.Allocate(reference.markerCount, reference.count);
            mm.CopyParameters(original);

            // Padded version of target haplotype, including missing sites
            char * padded = new char [reference.markerCount];
            for (int k = 0; k < reference.markerCount; k++)
               padded[k] = 0;

            // Copy current haplotype into padded vector
            for (int j = 0; j < target.markerCount; j++)
               if (markerIndex[j] >= 0)
                  padded[markerIndex[j]] = target.haplotypes[i][j];

            mm.WalkLeft(padded, reference.haplotypes, reference.freq);

            if (em)
               mm.CountExpected(padded, reference.haplotypes, reference.freq);
            else
               {
               #pragma omp critical
               { mm.ProfileModel(padded, reference.haplotypes, reference.freq); }
               }

            delete [] padded;

            #pragma omp critical
            mp += mm;
            }
         }

      mp.UpdateModel();

      double crossovers = 0;
      for (int i = 0; i < reference.markerCount - 1; i++)
         crossovers += mp.R[i];

      double errors = 0;
      for (int i = 0; i < reference.markerCount; i++)
         {
         double heterozygosity = 1.0 - square(reference.freq[1][i])
                                     - square(reference.freq[2][i])
                                     - square(reference.freq[3][i])
                                     - square(reference.freq[4][i]);

         errors += mp.E[i] * heterozygosity;
         }
      errors /= reference.markerCount + 1e-30;

      printf("      %.0f mosaic crossovers expected per haplotype\n", crossovers);
      printf("      %.1f%% of crossovers are due to reference flips\n", mp.empiricalFlipRate * 100.);
      printf("      %.3g errors in mosaic expected per marker\n", errors);
      }

   if (rounds > 0)
      {
      printf("  Saving estimated parameters for future use ...\n");
      mp.WriteParameters(refMarkerList, prefix, gzip);
      }

   printf("\n");

   // List the major allele at each location
   reference.ListMajorAlleles();

   printf("Generating Draft .info File ...\n\n");

   // Output some basic information
   IFILE info = ifopen(prefix + ".info.draft", "wt");

   ifprintf(info, "SNP\tAl1\tAl2\tFreq1\tGenotyped\n");

   for (int i = 0, j = 0; i <= stopIndex; i++)
      if (i >= startIndex)
         ifprintf(info, "%s\t%s\t%s\t%.4f\t%s\n",
            (const char *) refMarkerList[i],
            reference.MajorAlleleLabel(i), reference.MinorAlleleLabel(i),
            reference.freq[reference.major[i]][i],
            j < markerIndex.Length() && i == markerIndex[j] ? (j++, "Genotyped") : "-");
      else
         if (j < markerIndex.Length() && i == markerIndex[j])
            j++;

   ifclose(info);

   printf("Imputing Genotypes ...\n");

   IFILE dosages = ifopen(prefix + ".dose" + (gzip ? ".gz" : ""), "wt");
   IFILE hapdose, haps;

   if (phased)
      {
      hapdose = ifopen(prefix + ".hapDose" + (gzip ? ".gz" : ""), "wt");
      haps = ifopen(prefix + ".haps" + (gzip ? ".gz" : ""), "wt");
      }

   ImputationStatistics stats(reference.markerCount);

   // Impute each haplotype
   #pragma omp parallel for
   for (int i = 0; i < target.count; i++)
      {
      if (i != 0 && target.labels[i] == target.labels[i-1])
         continue;

      MarkovModel mm;

      mm.Allocate(reference.markerCount, reference.count);
      mm.ClearImputedDose();
      mm.CopyParameters(mp);

      // Padded version of target haplotype, including missing sites
      char * padded = new char [reference.markerCount];
      for (int j = 0; j < reference.markerCount; j++)
         padded[j] = 0;

      int k = i;

      do {
         printf("  Processing Haplotype %d of %d ...\n", k + 1, target.count);

         // Copy current haplotype into padded vector
         for (int j = 0; j < target.markerCount; j++)
            if (markerIndex[j] >= 0)
               padded[markerIndex[j]] = target.haplotypes[k][j];

         mm.WalkLeft(padded, reference.haplotypes, reference.freq);
         mm.Impute(reference.major, padded, reference.haplotypes, reference.freq);

         #pragma omp critical
         { stats.Update(mm.imputedHap, mm.leaveOneOut, padded, reference.major); }

         #pragma omp critical
         if (phased)
            {
            ifprintf(hapdose, "%s\tHAPLO%d", (const char *) target.labels[i], k - i + 1);
            ifprintf(haps, "%s\tHAPLO%d", (const char *) target.labels[i], k - i + 1);
            for (int j = startIndex; j <= stopIndex; j++)
               {
               ifprintf(hapdose, "\t%.3f", mm.imputedHap[j]);
               ifprintf(haps, "%s%c", j % 8 == 0 ? " " : "", mm.imputedAlleles[j]);
               }
            ifprintf(hapdose, "\n");
            ifprintf(haps, "\n");
            }

         k++;
      } while (k < target.count && target.labels[k] == target.labels[i]);

      printf("    Outputting Individual %s ...\n", (const char *) target.labels[i]);

      #pragma omp critical
         {
         ifprintf(dosages, "%s\tDOSE", (const char *) target.labels[i]);
         for (int j = startIndex; j <= stopIndex; j++)
            ifprintf(dosages, "\t%.3f", mm.imputedDose[j]);
         ifprintf(dosages, "\n");
         }

      delete [] padded;
      }

   ifclose(dosages);

   if (phased)
      {
      ifclose(hapdose);
      ifclose(haps);
      }

   // Output some basic information
   info = ifopen(prefix + ".info" + (gzip ? ".gz" : ""), "wt");

   ifprintf(info, "SNP\tAl1\tAl2\tFreq1\tMAF\tAvgCall\tRsq\tGenotyped\tLooRsq\tEmpR\tEmpRsq\tDose1\tDose2\n");

   // Padded version of target haplotype, including missing sites
   char * padded = new char [reference.markerCount];
   for (int k = 0; k < reference.markerCount; k++)
      padded[k] = 0;

   // Mark genotyped SNPs in padded vector
   for (int j = 0; j < target.markerCount; j++)
      if (markerIndex[j] >= 0)
          padded[markerIndex[j]] = 1;

   for (int i = startIndex; i <= stopIndex; i++)
      {
      ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t%.5f\t%.5f\t",
            (const char *) refMarkerList[i],
            reference.MajorAlleleLabel(i),
            reference.MinorAlleleLabel(i),
            stats.AlleleFrequency(i),
            stats.AlleleFrequency(i) > 0.5 ? 1.0 - stats.AlleleFrequency(i) : stats.AlleleFrequency(i),
            stats.AverageCallScore(i),
            stats.Rsq(i));

      if (padded[i])
         ifprintf(info, "Genotyped\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\n",
                  stats.LooRsq(i), stats.EmpiricalR(i), stats.EmpiricalRsq(i),
                  stats.LooMajorDose(i), stats.LooMinorDose(i));
      else
         ifprintf(info, "-\t-\t-\t-\t-\t-\n");
      }

   ifclose(info);

   delete [] padded;

   time_t stop = time(NULL);
   int seconds = stop - start;

   printf("\nRun completed in %d hours, %d mins, %d seconds on %s\n\n",
          seconds / 3600, (seconds % 3600) / 60, seconds % 60,
          ctime(&stop));
   }
Пример #25
0
void Imputation::MergeFinalVcfAllVariants(HaplotypeSet &rHap,HaplotypeSet &tHap,ImputationStatistics &stats,int MaxIndex)
{
    cout<<" ------------------------------------------------------------------------------"<<endl;
    cout<<"                                FINAL VCF MERGE                                "<<endl;
    cout<<" ------------------------------------------------------------------------------"<<endl;

    printf("\n Merging partial VCF files to final output VCF File :  %s ",(outFile + ".dose.vcf" + (gzip ? ".gz" : "")).c_str() );
    cout<<endl<<endl;

    IFILE vcfdosepartial = ifopen(outFile + ".dose.vcf" + (gzip ? ".gz" : ""),  "a", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED);

    vector<IFILE> vcfdosepartialList(MaxIndex);






    for(int i=1;i<=MaxIndex;i++)
    {
        string tempFileIndex(outFile);
        stringstream strs;
        strs<<(i);
        tempFileIndex+=(".dose.vcf.part." +
                         (string)(strs.str())+(gzip ? ".gz" : ""));
        vcfdosepartialList[i-1] = ifopen(tempFileIndex.c_str(), "r");
    }
    string line;
    for(int i=1;i<=MaxIndex;i++)
    {
        line.clear();
        vcfdosepartialList[i-1]->readLine(line);
        ifprintf(vcfdosepartial,"%s",line.c_str());
    }

    int i=0;
    for (int index =0; index < rHap.RefTypedTotalCount; index++)
    {

//abort();
        if(index%10000==0)
        {
            printf("    Merging marker %d of %d [%.1f%%] to VCF File ...", index + 1, rHap.RefTypedTotalCount,100*(double)(index + 1)/(int)rHap.RefTypedTotalCount);
            cout<<endl;
        }


        if(rHap.RefTypedIndex[index]==-1)
        {

            if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex)
            {

                ifprintf(vcfdosepartial,"\n%s\t%d\t%s\t%s\t%s\t.\tPASS\tMAF=%.5f;R2=%.5f",
                rHap.VariantList[i].chr.c_str(),rHap.VariantList[i].bp,
                RsId?rHap.VariantList[i].rsid.c_str():rHap.VariantList[i].name.c_str(),rHap.VariantList[i].refAlleleString.c_str(),
                rHap.VariantList[i].altAlleleString.c_str(),stats.AlleleFrequency(i) > 0.5 ? 1.0 - stats.AlleleFrequency(i) : stats.AlleleFrequency(i),stats.Rsq(i));


                if(!tHap.missing[i])
                    ifprintf(vcfdosepartial,";ER2=%.5f",stats.EmpiricalRsq(i));

                ifprintf(vcfdosepartial,"\t%s",GT?(DS?(GP?"GT:DS:GP":"GT:DS"):(GP?"GT:GP":"GT")):(DS?(GP?"DS:GP":"DS"):(GP?"GP":"")));

                for(int j=1;j<=MaxIndex;j++)
                {
                    string tempFileIndex(outFile);
                    stringstream strs;
                    strs<<(j);
                    tempFileIndex+=(".dose.vcf.part."
                                    + (string)(strs.str())
                                    +(gzip ? ".gz" : ""));
                    line.clear();
                    vcfdosepartialList[j-1]->readLine(line);
                    ifprintf(vcfdosepartial,"%s",line.c_str());
                }

            }



            i++;
        }
        else
        {


            variant ThisTypedVariant =tHap.TypedOnlyVariantList[rHap.RefTypedIndex[index]];
            ifprintf(vcfdosepartial,"\n%s\t%d\t%s\t%s\t%s\t.\tPASS\t",
                     ThisTypedVariant.chr.c_str(),
                     ThisTypedVariant.bp,
                     RsId? ThisTypedVariant.rsid.c_str():ThisTypedVariant.name.c_str(),
                     ThisTypedVariant.refAlleleString.c_str(),
                     ThisTypedVariant.altAlleleString.c_str());


            ifprintf(vcfdosepartial,"GENOTYPED_ONLY;AN=%d;MAF=%.5f",
                     tHap.TotalSample[rHap.RefTypedIndex[index]],
                     tHap.AlleleFreq[rHap.RefTypedIndex[index]]);

//cout<<rHap.RefTypedIndex[index]<<" " <<tHap.TotalSample[rHap.RefTypedIndex[index]]<<" " << tHap.AlleleFreq[rHap.RefTypedIndex[index]]/(double)tHap.TotalSample[rHap.RefTypedIndex[index]]<< endl;

            ifprintf(vcfdosepartial,"\t%s",GT?(DS?(GP?"GT:DS:GP":"GT:DS"):(GP?"GT:GP":"GT")):(DS?(GP?"DS:GP":"DS"):(GP?"GP":"")));

            for(int j=1;j<=MaxIndex;j++)
            {
                string tempFileIndex(outFile);
                stringstream strs;
                strs<<(j);
                tempFileIndex+=(".dose.vcf.part."
                                + (string)(strs.str())
                                +(gzip ? ".gz" : ""));
                line.clear();
                vcfdosepartialList[j-1]->readLine(line);
                ifprintf(vcfdosepartial,"%s",line.c_str());

            }

//            ifprintf(vcfdosepartial,"\n");



        }

    }


    for(int i=1;i<=MaxIndex;i++)
    {
        ifclose(vcfdosepartialList[i-1]);
        string tempFileIndex(outFile);
        stringstream strs;
        strs<<(i);
        tempFileIndex+=(".dose.vcf.part." +
                        (string)(strs.str())+
                        (gzip ? ".gz" : ""));
        remove(tempFileIndex.c_str());
    }

    ifclose(vcfdosepartial);

    printf("\n Merging Finished ..." );
    cout<<endl <<endl;
}
Пример #26
0
int Stats::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String indexFile = "";
    bool basic = false;
    bool noeof = false;
    bool params = false;
    bool qual = false;
    bool phred = false;
    int maxNumReads = -1;
    bool unmapped = false;
    String pBaseQC = "";
    String cBaseQC = "";
    String regionList = "";
    int excludeFlags = 0;
    int requiredFlags = 0;
    bool withinRegion = false;
    int minMapQual = 0;
    String dbsnp = "";
    PosList *dbsnpListPtr = NULL;
    bool baseSum = false;
    int bufferSize = PileupHelper::DEFAULT_WINDOW_SIZE;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER_GROUP("Types of Statistics")
        LONG_PARAMETER("basic", &basic)
        LONG_PARAMETER("qual", &qual)
        LONG_PARAMETER("phred", &phred)
        LONG_STRINGPARAMETER("pBaseQC", &pBaseQC)
        LONG_STRINGPARAMETER("cBaseQC", &cBaseQC)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_INTPARAMETER("maxNumReads", &maxNumReads)
        LONG_PARAMETER("unmapped", &unmapped)
        LONG_STRINGPARAMETER("bamIndex", &indexFile)
        LONG_STRINGPARAMETER("regionList", &regionList)
        LONG_INTPARAMETER("excludeFlags", &excludeFlags)
        LONG_INTPARAMETER("requiredFlags", &requiredFlags)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Optional phred/qual Only Parameters")
        LONG_PARAMETER("withinRegion", &withinRegion)
        LONG_PARAMETER_GROUP("Optional BaseQC Only Parameters")
        LONG_PARAMETER("baseSum", &baseSum)
        LONG_INTPARAMETER("bufferSize", &bufferSize)
        LONG_INTPARAMETER("minMapQual", &minMapQual)
        LONG_STRINGPARAMETER("dbsnp", &dbsnp)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    inputParameters.Read(argc-1, &(argv[1]));

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument for stats, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Use the index file if unmapped or regionList is not empty.
    bool useIndex = (unmapped|| (!regionList.IsEmpty()));

    // IndexFile is required, so check to see if it has been set.
    if(useIndex && (indexFile == ""))
    {
        // In file was not specified, so set it to the in file
        // + ".bai"
        indexFile = inFile + ".bai";
    }
    ////////////////////////////////////////
    // Setup in case pileup is used.
    Pileup<PileupElementBaseQCStats> pileup(bufferSize);
    // Initialize start/end positions.
    myStartPos = 0;
    myEndPos = -1;
    
    // Open the output qc file if applicable.
    IFILE baseQCPtr = NULL;
    if(!pBaseQC.IsEmpty() && !cBaseQC.IsEmpty())
    {
        usage();
        inputParameters.Status();
        // Cannot specify both types of baseQC.
        std::cerr << "Cannot specify both --pBaseQC & --cBaseQC." << std::endl;
        return(-1);
    }
    else if(!pBaseQC.IsEmpty())
    {
        baseQCPtr = ifopen(pBaseQC, "w");
        PileupElementBaseQCStats::setPercentStats(true);
    }
    else if(!cBaseQC.IsEmpty())
    {
        baseQCPtr = ifopen(cBaseQC, "w");
        PileupElementBaseQCStats::setPercentStats(false);
    }

    if(baseQCPtr != NULL)
    {
        PileupElementBaseQCStats::setOutputFile(baseQCPtr);
        PileupElementBaseQCStats::printHeader();
    }
    if((baseQCPtr != NULL) || baseSum)
    {
        PileupElementBaseQCStats::setMapQualFilter(minMapQual);
        PileupElementBaseQCStats::setBaseSum(baseSum);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the file for reading.
    SamFile samIn;
    if(!samIn.OpenForRead(inFile))
    {
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        return(samIn.GetStatus());
    }

    samIn.SetReadFlags(requiredFlags, excludeFlags);

    // Set whether or not basic statistics should be generated.
    samIn.GenerateStatistics(basic);

    // Read the sam header.
    SamFileHeader samHeader;
    if(!samIn.ReadHeader(samHeader))
    {
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        return(samIn.GetStatus());
    }

    // Open the bam index file for reading if we are
    // doing unmapped reads (also set the read section).
    if(useIndex)
    {
        samIn.ReadBamIndex(indexFile);

        if(unmapped)
        {
            samIn.SetReadSection(-1);
        }

        if(!regionList.IsEmpty())
        {
            myRegionList = ifopen(regionList, "r");
        }
    }

    //////////////////////////
    // Read dbsnp if specified and doing baseQC
    if(((baseQCPtr != NULL) || baseSum) && (!dbsnp.IsEmpty()))
    {
        // Read the dbsnp file.
        IFILE fdbSnp;
        fdbSnp = ifopen(dbsnp,"r");
        // Determine how many entries.
        const SamReferenceInfo& refInfo = samHeader.getReferenceInfo();
        int maxRefLen = 0;
        for(int i = 0; i < refInfo.getNumEntries(); i++)
        {
            int refLen = refInfo.getReferenceLength(i);
            if(refLen >= maxRefLen)
            {
                maxRefLen = refLen + 1;
            }
        }
        
        dbsnpListPtr = new PosList(refInfo.getNumEntries(),maxRefLen);

        if(fdbSnp==NULL)
        {
            std::cerr << "Open dbSNP file " << dbsnp.c_str() << " failed!\n";
        }
        else if(dbsnpListPtr == NULL)
        {
            std::cerr << "Failed to init the memory allocation for the dbsnpList.\n";
        }
        else
        {
            // Read the dbsnp file.
            StringArray tokens;
            String buffer;
            int position = 0;
            int refID = 0;

            // Loop til the end of the file.
            while (!ifeof(fdbSnp))
            {
                // Read the next line.
                buffer.ReadLine(fdbSnp);
                // If it does not have at least 2 columns, 
                // continue to the next line.
                if (buffer.IsEmpty() || buffer[0] == '#') continue;
                tokens.AddTokens(buffer);
                if(tokens.Length() < 2) continue;

                if(!tokens[1].AsInteger(position))
                {
                    std::cerr << "Improperly formatted region line, start position "
                              << "(2nd column) is not an integer: "
                              << tokens[1]
                              << "; Skipping to the next line.\n";         
                    continue;
                }

                // Look up the reference name.
                refID = samHeader.getReferenceID(tokens[0]);
                if(refID != SamReferenceInfo::NO_REF_ID)
                {
                    // Reference id was found, so add it to the dbsnp
                    dbsnpListPtr->addPosition(refID, position);
                }
        
                tokens.Clear();
                buffer.Clear();
            }
        }
        ifclose(fdbSnp);
    }

    // Read the sam records.
    SamRecord samRecord;

    int numReads = 0;

    //////////////////////
    // Setup in case doing a quality count.
    // Quality histogram.
    const int MAX_QUAL = 126;
    const int START_QUAL = 33;
    uint64_t qualCount[MAX_QUAL+1];
    for(int i = 0; i <= MAX_QUAL; i++)
    {
        qualCount[i] = 0;
    }
    
    const int START_PHRED = 0;
    const int PHRED_DIFF = START_QUAL - START_PHRED;
    const int MAX_PHRED = MAX_QUAL - PHRED_DIFF;
    uint64_t phredCount[MAX_PHRED+1];
    for(int i = 0; i <= MAX_PHRED; i++)
    {
        phredCount[i] = 0;
    }
    
    int refPos = 0;
    Cigar* cigarPtr = NULL;
    char cigarChar = '?';
    // Exclude clips from the qual/phred counts if unmapped reads are excluded.
    bool qualExcludeClips = excludeFlags & SamFlag::UNMAPPED;

    //////////////////////////////////
    // When not reading by sections, getNextSection returns true
    // the first time, then false the next time.
    while(getNextSection(samIn))
    {
        // Keep reading records from the file until SamFile::ReadRecord
        // indicates to stop (returns false).
        while(((maxNumReads < 0) || (numReads < maxNumReads)) && samIn.ReadRecord(samHeader, samRecord))
        {
            // Another record was read, so increment the number of reads.
            ++numReads;
            // See if the quality histogram should be genereated.
            if(qual || phred)
            {
                // Get the quality.
                const char* qual = samRecord.getQuality();
                // Check for no quality ('*').
                if((qual[0] == '*') && (qual[1] == 0))
                {
                    // This record does not have a quality string, so no 
                    // quality processing is necessary.
                }
                else
                {
                    int index = 0;
                    cigarPtr = samRecord.getCigarInfo();
                    cigarChar = '?';
                    refPos = samRecord.get0BasedPosition();
                    if(!qualExcludeClips && (cigarPtr != NULL))
                    {
                        // Offset the reference position by any soft clips
                        // by subtracting the queryIndex of this start position.
                        // refPos is now the start position of the clips.
                        refPos -= cigarPtr->getQueryIndex(0);
                    }

                    while(qual[index] != 0)
                    {
                        // Skip this quality if it is clipped and we are skipping clips.
                        if(cigarPtr != NULL)
                        {
                            cigarChar = cigarPtr->getCigarCharOpFromQueryIndex(index);
                        }
                        if(qualExcludeClips && Cigar::isClip(cigarChar))
                        {
                            // Skip a clipped quality.
                            ++index;
                            // Increment the position.
                            continue;
                        }

                        if(withinRegion && (myEndPos != -1) && (refPos >= myEndPos))
                        {
                            // We have hit the end of the region, stop processing this
                            // quality string.
                            break;
                        }

                        if(withinRegion && (refPos < myStartPos))
                        {
                            // This position is not in the target.
                            ++index;
                            // Update the position if this is found in the reference or a clip.
                            if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                            {
                                ++refPos;
                            }
                            continue;
                        }

                        // Check for valid quality.
                        if((qual[index] < START_QUAL) || (qual[index] > MAX_QUAL))
                        {
                            if(qual)
                            {
                                std::cerr << "Invalid Quality found: " << qual[index] 
                                          << ".  Must be between "
                                          << START_QUAL << " and " << MAX_QUAL << ".\n";
                            }
                            if(phred)
                            {
                                std::cerr << "Invalid Phred Quality found: " << qual[index] - PHRED_DIFF
                                          << ".  Must be between "
                                          << START_QUAL << " and " << MAX_QUAL << ".\n";
                            }
                            // Skip an invalid quality.
                            ++index;
                            // Update the position if this is found in the reference or a clip.
                            if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                            {
                                ++refPos;
                            }
                            continue;
                        }
                        
                        // Increment the count for this quality.
                        ++(qualCount[(int)(qual[index])]);
                        ++(phredCount[(int)(qual[index]) - PHRED_DIFF]);
                        // Update the position if this is found in the reference or a clip.
                        if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                        {
                            ++refPos;
                        }
                        ++index;
                    }
                }
            }

            // Check the next thing to do for the read.
            if((baseQCPtr != NULL) || baseSum)
            {
                // Pileup the bases for this read.
                pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr);
            }
        }

        // Done with a section, move on to the next one.

        // New section, so flush the pileup.
        pileup.flushPileup();
    }

    // Flush the rest of the pileup.
    if((baseQCPtr != NULL) || baseSum)
    {
        // Pileup the bases.
        pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr);
        PileupElementBaseQCStats::printSummary();
        ifclose(baseQCPtr);
    }

    std::cerr << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;

    if(basic)
    {
        std::cerr << std::endl;
        samIn.PrintStatistics();
    }

    // Print the quality stats.
    if(qual)
    {
        std::cerr << std::endl;
        std::cerr << "Quality\tCount\n";
        for(int i = START_QUAL; i <= MAX_QUAL; i++)
        {
            std::cerr << i << "\t" << qualCount[i] << std::endl;
        }
    }
    // Print the phred quality stats.
    if(phred)
    {
        std::cerr << std::endl;
        std::cerr << "Phred\tCount\n";
        for(int i = START_PHRED; i <= MAX_PHRED; i++)
        {
            std::cerr << i << "\t" << phredCount[i] << std::endl;
        }
    }

    SamStatus::Status status = samIn.GetStatus();
    if(status == SamStatus::NO_MORE_RECS)
    {
        // A status of NO_MORE_RECS means that all reads were successful.
        status = SamStatus::SUCCESS;
    }

    return(status);
}
Пример #27
0
void Imputation::performImputation(HaplotypeSet &tHap,HaplotypeSet &rHap, String Golden)
{

    vector<int> optStructure=rHap.optEndPoints;

    int time_prev = time(0),time_load,vcfSampleIndex=0;;
    includeGwas=true;
    MarkovParameters* MP=createEstimates(rHap,tHap,rHap.optEndPoints,1-includeGwas);

    cout<<" ------------------------------------------------------------------------------"<<endl;
    cout<<"                              MAIN IMPUTATION                                  "<<endl;
    cout<<" ------------------------------------------------------------------------------"<<endl;


    ImputationStatistics stats(rHap.numMarkers );
    IFILE dosages=NULL, hapdose=NULL, haps=NULL,vcfdosepartial=NULL;
    HaplotypeSet DosageForVcfPartial;
    DosageForVcfPartial.unphasedOutput=unphasedOutput;
    DosageForVcfPartial.TypedOnly=tHap.TypedOnly;
    DosageForVcfPartial.GWASOnlycounter=tHap.GWASOnlycounter;

    if(tHap.TypedOnly)
    {
        printf("\n Calculating Allele Frequency for Typed-Only variants ... ");
        cout<<endl;
        tHap.CalculateGWASOnlyFreq();

    }

    cout << "\n Starting Imputation ...";
    printf("\n\n Setting up Markov Model for Imputation ...");
    cout<<endl<<endl;


    if (phased && !unphasedOutput)
    {

        hapdose = ifopen(outFile + ".hapDose" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED);
        haps = ifopen(outFile + ".hapLabel" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED);

    }

    int maxVcfSample=200,NumVcfWritten=0,NumVcfCreated=0,NovcfParts=1;

    if((maxVcfSample)>=tHap.numSamples)
        maxVcfSample=tHap.numSamples;

    if(vcfOutput)
    {


        vcfdosepartial = ifopen(outFile + ".dose.vcf" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED);
        ifprintf(vcfdosepartial,"##fileformat=VCFv4.1\n");
        time_t t = time(0);
        struct tm * now = localtime( & t );
        ifprintf(vcfdosepartial,"##filedate=%d.%d.%d\n",(now->tm_year + 1900),(now->tm_mon + 1) ,now->tm_mday);
        ifprintf(vcfdosepartial,"##source=Minimac3\n");
        if(GT)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n");
        if(tHap.AllMaleTarget)
        {
            if(DS)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage (For Male Chr: X) : [P(Alt Allele)]\">\n");
            if(GP)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=GP,Number=2,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0 and 1 (For Male Chr: X) \">\n");
        }
        else
        {
            if(DS)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]\">\n");
            if(GP)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=GP,Number=3,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1 \">\n");
        }


        ifprintf(vcfdosepartial,"##INFO=<ID=MAF,Number=1,Type=Float,Description=\"Estimated Alternate Allele Frequency\">\n");
        ifprintf(vcfdosepartial,"##INFO=<ID=R2,Number=1,Type=Float,Description=\"Estimated Imputation Accuracy\">\n");
        ifprintf(vcfdosepartial,"##INFO=<ID=ER2,Number=1,Type=Float,Description=\"Empirical (Leave-One-Out) R-square (available only for genotyped variants)\">\n");
        ifprintf(vcfdosepartial,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
        ifclose(vcfdosepartial);

        if(!tHap.AllMaleTarget)
            DosageForVcfPartial.InitializePartialDosageForVcfOutput((2*maxVcfSample),rHap.numMarkers,format);
        else
            DosageForVcfPartial.InitializePartialDosageForVcfOutputMaleSamples(maxVcfSample<MaxSample?maxVcfSample:MaxSample,rHap.numMarkers,format);
    }

    if(doseOutput)
        dosages = ifopen(outFile + ".dose" + (gzip ? ".gz" : ""), "wb",(gzip ? InputFile::BGZF:InputFile::UNCOMPRESSED) );


    #pragma omp parallel for
    for(int hapId=0;hapId<MaxSample;hapId++)
    {


        if (hapId %2==1)
        {
            if(rHap.finChromosome!="X")
                continue;
            else if(!tHap.AllMaleTarget)
                continue;
        }

        vector<float> foldedProb,recomProb,noRecomProb, rightProb,probAlleleNoStandardize(8,0.0),tempDoseHap1;
        vector<bool> tempHap(rHap.numMarkers),tempMissHap(rHap.numMarkers);
        vector<bool> tempDoseAlleleHap1;

        MarkovModel MM(tHap,rHap,tHap.missing,rHap.major);

        MM.CopyParameters(MP);

        int hapIdIndiv=hapId;

        do{

            MM.initializeMatrices(tHap,rHap,optStructure,rHap.ReducedStructureInfo);
            printf("  Processing Haplotype %d of %d ...", hapIdIndiv + 1, MaxSample);
            cout<<endl;


            MM.ThisHapId=hapIdIndiv;


            for(int group=1;group<(int)optStructure.size();group++)
            {

                MM.foldProbabilities(foldedProb,group-1,rHap.ReducedStructureInfo[group-1],0,refCount);
                MM.leftNoRecoProb[group-1][0]=foldedProb;


                if(group==1 && !tHap.missing[0])
                        if(!tHap.getMissingScaffoldedHaplotype(hapIdIndiv,0))
                            {

                                Condition(rHap,0,foldedProb,MM.leftNoRecoProb[group-1][0],MM.Error[0],
                                tHap.getScaffoldedHaplotype(hapIdIndiv,0)? rHap.AlleleFreq[0] : 1-rHap.AlleleFreq[0],
                                tHap.getScaffoldedHaplotype(hapIdIndiv,0),MM.backgroundError,
                                      foldedProb.size(),rHap.ReducedStructureInfo[0]);
                            }



                MM.WalkLeft(tHap,hapIdIndiv,MM.leftProb[group-1],MM.leftNoRecoProb[group-1],
                            foldedProb,optStructure[group-1],optStructure[group],
                            rHap.ReducedStructureInfo[group-1],rHap.AlleleFreq);

                splitFoldedProb(recomProb,MM.leftProb[group-1][optStructure[group]-optStructure[group-1]],MM.leftNoRecoProb[group-1][optStructure[group]-optStructure[group-1]]);

                MM.unfoldProbabilities(group-1,recomProb,MM.leftNoRecoProb[group-1][optStructure[group]-optStructure[group-1]],foldedProb,0,rHap.ReducedStructureInfo,refCount);

            }



            for(int group=optStructure.size()-1;group>0;group--)
            {

                MM.foldProbabilities(foldedProb,group-1,rHap.ReducedStructureInfo[group-1],1,refCount);
                rightProb=foldedProb;
                noRecomProb=foldedProb;

                MM.Impute(tHap,foldedProb,hapIdIndiv,MM.leftProb[group-1],MM.leftNoRecoProb[group-1],rightProb,noRecomProb,MM.junctionLeftProb[group-1],
                          MM.junctionRightProb[group],optStructure[group-1], optStructure[group],rHap.ReducedStructureInfo[group-1],1,rHap.AlleleFreq);

                splitFoldedProb(recomProb,rightProb,noRecomProb);
                MM.unfoldProbabilities(group-1,recomProb,noRecomProb,foldedProb,1,rHap.ReducedStructureInfo,refCount);
            }

            for(int jjj=0;jjj<rHap.numMarkers;jjj++)
                {
                    tempHap[jjj]=tHap.getScaffoldedHaplotype(hapIdIndiv,jjj);
                    tempMissHap[jjj]=tHap.getMissingScaffoldedHaplotype(hapIdIndiv,jjj);

                }

            if(vcfOutput)
            {
                if(hapIdIndiv%2==0)
                {
                   tempDoseHap1= MM.imputedHap;
                   tempDoseAlleleHap1= MM.imputedAlleleNumber;
                }
            }
            #pragma omp critical
            {
                stats.Update(MM.imputedHap, MM.leaveOneOut,tempHap,tempMissHap,rHap.major);
            }

            #pragma omp critical
            if (phased && !unphasedOutput)
            {

                PrintHaplotypeData(rHap, tHap, hapdose, haps,
                                    MM.imputedHap, MM.imputedAlleleNumber,
                                    hapIdIndiv, tHap.AllMaleTarget?hapId:hapId/2);
            }


            if(tHap.AllMaleTarget)
                break;
            hapIdIndiv++;
        }while(hapIdIndiv<MaxSample && hapIdIndiv%2==1);

        #pragma omp critical
        if(doseOutput)
        {
            PrintDosageData(rHap, tHap, dosages, MM.imputedDose, tHap.AllMaleTarget?hapId:hapId/2);
        }
         #pragma omp critical
        if(vcfOutput)
        {

            printf("    Saving Individual %s for VCF File...\n",  tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2].c_str());
            if(!tHap.AllMaleTarget)
                DosageForVcfPartial.SaveDosageForVcfOutputSampleWise(NumVcfCreated-NumVcfWritten,
                                                                 tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2],
                                                                 tempDoseHap1,MM.imputedHap,
                                                                 tempDoseAlleleHap1,MM.imputedAlleleNumber);
            else
                DosageForVcfPartial.SaveDosageForVcfOutputSampleWiseChrX(NumVcfCreated-NumVcfWritten,
                                                                 tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2],
                                                                  MM.imputedHap,
                                                                 MM.imputedAlleleNumber);

            if(DosageForVcfPartial.TypedOnly)
            {

                DosageForVcfPartial.SaveIndexForGWASOnlyForVcfOutput(NumVcfCreated-NumVcfWritten,
                                                                     tHap.AllMaleTarget?hapId:hapId/2);
            }



            NumVcfCreated++;
            vcfSampleIndex++;

            if(NumVcfCreated%maxVcfSample==0 || NumVcfCreated==(tHap.AllMaleTarget?MaxSample:MaxSample/2))
            {

                string PartialVcfFileName(outFile),tempFileIndex1(outFile);
                stringstream strs;
                strs<<(NovcfParts);


                PartialVcfFileName+=(".dose.vcf.part." +
                                      (string)(strs.str())
                                     +(gzip ? ".gz" : ""));
                if(!tHap.AllMaleTarget)
                    printf("\n    --->>> Saving samples %d-%d in VCF file : %s ...\n\n",
                       (NumVcfWritten)+1,(MaxSample/2<(NumVcfWritten+maxVcfSample)?MaxSample/2:(NumVcfWritten+maxVcfSample)),
                       PartialVcfFileName.c_str());
                else
                    printf("\n    --->>> Saving samples %d-%d in VCF file : %s ...\n\n",
                       (NumVcfWritten)+1,(MaxSample<(NumVcfWritten+maxVcfSample)?MaxSample:(NumVcfWritten+maxVcfSample)),
                       PartialVcfFileName.c_str());

//if(NovcfParts==2)
//    abort();


                FlushPartialVcf(rHap,tHap,DosageForVcfPartial,PartialVcfFileName,NovcfParts);
                if(NumVcfCreated<(tHap.AllMaleTarget?MaxSample:MaxSample/2))
                {
                    NovcfParts++;
                    NumVcfWritten+=maxVcfSample;



//int gg=maxVcfSample<(((tHap.AllMaleTarget?MaxSample:MaxSample/2))-NumVcfWritten)?
//2*maxVcfSample:2*(((tHap.AllMaleTarget?MaxSample:MaxSample/2))-NumVcfWritten);
//
//
//abort();

                    if(!tHap.AllMaleTarget)
                        DosageForVcfPartial.InitializePartialDosageForVcfOutput(maxVcfSample<(MaxSample/2-NumVcfWritten)?2*maxVcfSample:2*(MaxSample/2-NumVcfWritten),rHap.numMarkers,format);
                    else
                        DosageForVcfPartial.InitializePartialDosageForVcfOutputMaleSamples(maxVcfSample<(MaxSample-NumVcfWritten)?maxVcfSample:(MaxSample-NumVcfWritten),rHap.numMarkers,format);


                }
            }

        }
    }

    cout<<endl<<" Imputation Finished ... "<<endl;


    if (phased && !unphasedOutput)
    {
        ifclose(hapdose);
        ifclose(haps);

        cout<<endl<<" Haplotype Dosage information written to : "<<
            outFile + ".hapDose" + (gzip ? ".gz" : "")<<endl;
        cout<<endl<<" Haplotype Allele information written to : "<<
        outFile + ".hapLabel" + (gzip ? ".gz" : "")<<endl;
    }



    if(doseOutput)
    {
        ifclose(dosages);
        cout<<endl<<" Dosage information written to           : "<<
        outFile + ".dose" + (gzip ? ".gz" : "")<<endl;
    }

    PrintInfoFile(rHap,tHap,stats);

    time_load = time(0) - time_prev;
    cout << "\n Time taken for imputation = " << time_load << " seconds."<<endl<<endl;


    if(vcfOutput)
        MergeFinalVcfAllVariants(rHap,tHap,stats,NovcfParts);

}
Пример #28
0
void glfHandler::Close()
{
    if (isOpen())
        ifclose(handle);
}
Пример #29
0
// main function of verifyBamID
int execute(int argc, char** argv) {
  printf("verifyBamID %s -- verify identity and purity of sequence data\n"
	 "(c) 2010-2014 Hyun Min Kang, Goo Jun, and Goncalo Abecasis\n\n", VERSION);

  VerifyBamIDArgs args;
  ParameterList pl;

  BEGIN_LONG_PARAMETERS(longParameters)
    LONG_PARAMETER_GROUP("Input Files")
    LONG_STRINGPARAMETER("vcf",&args.sVcfFile)
    LONG_STRINGPARAMETER("bam",&args.sBamFile)
    LONG_STRINGPARAMETER("subset",&args.sSubsetInds)
    LONG_STRINGPARAMETER("smID",&args.sSMID)

    LONG_PARAMETER_GROUP("VCF analysis options")
    LONG_DOUBLEPARAMETER("genoError",&args.genoError)
    LONG_DOUBLEPARAMETER("minAF",&args.minAF)
    LONG_DOUBLEPARAMETER("minCallRate",&args.minCallRate)

    LONG_PARAMETER_GROUP("Individuals to compare with chip data")
    EXCLUSIVE_PARAMETER("site",&args.bSiteOnly)
    EXCLUSIVE_PARAMETER("self",&args.bSelfOnly)
    EXCLUSIVE_PARAMETER("best",&args.bFindBest)

    LONG_PARAMETER_GROUP("Chip-free optimization options")
    EXCLUSIVE_PARAMETER("free-none",&args.bFreeNone)
    EXCLUSIVE_PARAMETER("free-mix",&args.bFreeMixOnly)
    EXCLUSIVE_PARAMETER("free-refBias",&args.bFreeRefBiasOnly)
    EXCLUSIVE_PARAMETER("free-full",&args.bFreeFull)

    LONG_PARAMETER_GROUP("With-chip optimization options")
    EXCLUSIVE_PARAMETER("chip-none",&args.bChipNone)
    EXCLUSIVE_PARAMETER("chip-mix",&args.bChipMixOnly)
    EXCLUSIVE_PARAMETER("chip-refBias",&args.bChipRefBiasOnly)
    EXCLUSIVE_PARAMETER("chip-full",&args.bChipFull)

    LONG_PARAMETER_GROUP("BAM analysis options")
    LONG_PARAMETER("ignoreRG",&args.bIgnoreRG)
    LONG_PARAMETER("ignoreOverlapPair",&args.bIgnoreOverlapPair)
    LONG_PARAMETER("noEOF",&args.bNoEOF)
    LONG_PARAMETER("precise",&args.bPrecise)
    LONG_INTPARAMETER("minMapQ",&args.minMapQ)
    LONG_INTPARAMETER("maxDepth",&args.maxDepth)
    LONG_INTPARAMETER("minQ",&args.minQ)
    LONG_INTPARAMETER("maxQ",&args.maxQ)
    LONG_DOUBLEPARAMETER("grid",&args.grid)

    LONG_PARAMETER_GROUP("Modeling Reference Bias")
    LONG_DOUBLEPARAMETER("refRef",&args.pRefRef)
    LONG_DOUBLEPARAMETER("refHet",&args.pRefHet)
    LONG_DOUBLEPARAMETER("refAlt",&args.pRefAlt)

    LONG_PARAMETER_GROUP("Output options")
    LONG_STRINGPARAMETER("out",&args.sOutFile)
    LONG_PARAMETER("verbose",&args.bVerbose)
    LONG_PHONEHOME(VERSION)
  END_LONG_PARAMETERS();

  pl.Add(new LongParameters("Available Options",longParameters));
  pl.Read(argc, argv);
  pl.Status();

  // check the validity of input files
  if ( args.sVcfFile.IsEmpty() ) {
    error("--vcf [vcf file] required");
  }

  if ( args.sBamFile.IsEmpty() ) {
    error("--bam [bam file] is required");
  }

  if ( args.sOutFile.IsEmpty() ) {
    error("--out [output prefix] is required");
  }
  Logger::gLogger = new Logger((args.sOutFile + ".log").c_str(), args.bVerbose);

  if ( ! ( args.bSiteOnly || args.bSelfOnly || args.bFindBest ) ) {
    warning("--self option was autotomatically turned on by default. Specify --best option if you wanted to check across all possible samples in the VCF");
    args.bSelfOnly = true;
  }

  if ( ( args.maxDepth > 20 ) && ( !args.bPrecise ) ) {
    warning("--precise option is not turned on at --maxDepth %d : may be prone to precision errors",args.maxDepth);
  }

  if ( ( args.bChipRefBiasOnly ) && ( !args.bSelfOnly ) ) {
    error("--self must be set for --chip-refBias to work. Skipping..");
  }

  // check timestamp
  time_t t;
  time(&t);
  Logger::gLogger->writeLog("Analysis started on %s",ctime(&t));

  // load arguments
  VerifyBamID vbid(&args);

  // load input VCF and BAM files
  Logger::gLogger->writeLog("Opening Input Files");
  vbid.loadFiles(args.sBamFile.c_str(), args.sVcfFile.c_str());

  // Check which genotype-free method is used
  if ( args.bFreeNone ) {  // if no genotype-free mode is tested. skip it
    // do nothing for genotype-free estimation
    Logger::gLogger->writeLog("Skipping chip-free estimation of sample mixture");
  }
  else if ( args.bFreeMixOnly ) { // only mixture is estimated.
    // genotype-free method
    Logger::gLogger->writeLog("Performing chip-free estimation of sample mixture at fixed reference bias parameters (%lf, %lf, %lf)",args.pRefRef,args.pRefHet,args.pRefAlt);

    // scan across multiple readgroups
    for(int rg=-1; rg < vbid.nRGs - (int)args.bIgnoreRG; ++rg) {
      VerifyBamID::mixLLK mix(&vbid);
      mix.OptimizeLLK(rg);
      Logger::gLogger->writeLog("Optimal per-sample fMix = %lf, LLK0 = %lf, LLK1 = %lf\n",mix.fMix,mix.llk0,mix.llk1);
      vbid.mixOut.llk0s[rg+1] = mix.llk0;
      vbid.mixOut.llk1s[rg+1] = mix.llk1;
      vbid.mixOut.fMixs[rg+1] = mix.fMix;
    }

    //vbid.mixRefHet = 0.5;
    //vbid.mixRefAlt = 0.00;
  }
  else if ( args.bFreeRefBiasOnly ) {
    Logger::gLogger->writeLog("Performing chip-free estimation of reference-bias without sample mixture");
    for(int rg=-1; rg < vbid.nRGs - (int)args.bIgnoreRG; ++rg) {
      VerifyBamID::refBiasMixLLKFunc myFunc(&vbid, rg);
      AmoebaMinimizer myMinimizer;
      Vector startingPoint(2);
      startingPoint[0] = 0;      // pRefHet = 0.5
      startingPoint[1] = -4.595; // pRefAlt = 0.01
      myMinimizer.func = &myFunc;
      myMinimizer.Reset(2);
      myMinimizer.point = startingPoint;
      myMinimizer.Minimize(1e-6);
      double pRefHet = VerifyBamID::invLogit(myMinimizer.point[0]);
      double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[1]);
      Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf at readGroup %d",pRefHet,pRefAlt,myMinimizer.fmin,rg);
      //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt);

      vbid.mixOut.llk0s[rg+1] = myFunc.llk0;
      vbid.mixOut.llk1s[rg+1] = myFunc.llk1;
      vbid.mixOut.refHets[rg+1] = myFunc.pRefHet;
      vbid.mixOut.refAlts[rg+1] = myFunc.pRefAlt;
    }
  }
  else if ( args.bFreeFull ) {
    Logger::gLogger->writeLog("Performing chip-free estimation of reference-bias and sample mixture together");
    for(int rg = -1; rg < vbid.nRGs - args.bIgnoreRG; ++rg) {
      VerifyBamID::fullMixLLKFunc myFunc(&vbid, rg);
      AmoebaMinimizer myMinimizer;
      Vector startingPoint(3);
      startingPoint[0] = -3.91;  // start with fMix = 0.01
      startingPoint[1] = 0;      // pRefHet = 0.5
      startingPoint[2] = -4.595; // pRefAlt = 0.01
      myMinimizer.func = &myFunc;
      myMinimizer.Reset(3);
      myMinimizer.point = startingPoint;
      myMinimizer.Minimize(1e-6);
      double fMix = VerifyBamID::invLogit(myMinimizer.point[0]);
      if ( fMix > 0.5 ) 
	fMix = 1.-fMix;
      double pRefHet = VerifyBamID::invLogit(myMinimizer.point[1]);
      double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[2]);
      Logger::gLogger->writeLog("Optimal per-sample fMix = %lf\n",fMix);
      Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin);
      //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt);

      vbid.mixOut.llk0s[rg+1] = myFunc.llk0;
      vbid.mixOut.llk1s[rg+1] = myFunc.llk1;
      vbid.mixOut.fMixs[rg+1] = myFunc.fMix;
      vbid.mixOut.refHets[rg+1] = myFunc.pRefHet;
      vbid.mixOut.refAlts[rg+1] = myFunc.pRefAlt;
    }
  }
  Logger::gLogger->writeLog("calculating depth distribution");  
  vbid.calculateDepthDistribution(args.maxDepth, vbid.mixOut);

  Logger::gLogger->writeLog("finished calculating depth distribution");  

  std::vector<int> bestInds(vbid.nRGs+1,-1);
  std::vector<int> selfInds(vbid.nRGs+1,-1);

  if ( args.bChipNone ) {
    // do nothing
    Logger::gLogger->writeLog("Skipping with-chip estimation of sample mixture");
  }
  else if ( args.bChipMixOnly ) {
    Logger::gLogger->writeLog("Performing with-chip estimation of sample mixture at fixed reference bias parameter (%lf, %lf, %lf)",args.pRefRef,args.pRefHet,args.pRefAlt);
    
    for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) {
      double maxIBD = -1;
      VerifyBamID::ibdLLK ibd(&vbid);
      for(int i=0; i < (int)vbid.pGenotypes->indids.size(); ++i) {
	double fIBD = ibd.OptimizeLLK(i, rg);
	Logger::gLogger->writeLog("Comparing with individual %s.. Optimal fIBD = %lf, LLK0 = %lf, LLK1 = %lf for readgroup %d",vbid.pGenotypes->indids[i].c_str(),fIBD, ibd.llk0, ibd.llk1, rg);
	if ( maxIBD < fIBD ) {
	  bestInds[rg+1] = i;
	  vbid.bestOut.llk0s[rg+1] = ibd.llk0;
	  vbid.bestOut.llk1s[rg+1] = ibd.llk1;
	  vbid.bestOut.fMixs[rg+1] = 1-ibd.fIBD;
	  maxIBD = ibd.fIBD;
	}

	if ( ( (rg < 0) && (vbid.pPile->sBamSMID == vbid.pGenotypes->indids[i] ) ) || ( ( rg >= 0 ) && ( vbid.pPile->vsSMIDs[rg] == vbid.pGenotypes->indids[i]) ) ) {
	  selfInds[rg+1] = i;
	  vbid.selfOut.llk0s[rg+1] = ibd.llk0;
	  vbid.selfOut.llk1s[rg+1] = ibd.llk1;
	  vbid.selfOut.fMixs[rg+1] = 1-ibd.fIBD;
	}
      }

      if ( bestInds[rg+1] >= 0 ) {
	Logger::gLogger->writeLog("Best Matching Individual is %s with IBD = %lf",vbid.pGenotypes->indids[bestInds[rg+1]].c_str(),maxIBD);
	vbid.calculateDepthByGenotype(bestInds[rg+1],rg,vbid.bestOut);
      }

      if ( selfInds[rg+1] >= 0 ) {
	Logger::gLogger->writeLog("Self Individual is %s with IBD = %lf",vbid.pGenotypes->indids[selfInds[rg+1]].c_str(),vbid.selfOut.fMixs[rg+1]);
	vbid.calculateDepthByGenotype(selfInds[rg+1],rg,vbid.selfOut);
      }
    }
  }
  else if ( args.bChipRefBiasOnly ) {
    Logger::gLogger->writeLog("Performing with-chip estimation of reference-bias without sample mixture");
    if ( args.bSelfOnly ) {
      for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) {
	VerifyBamID::refBiasIbdLLKFunc myFunc(&vbid, rg);
	AmoebaMinimizer myMinimizer;
	Vector startingPoint(2);
	startingPoint[0] = 0;      // pRefHet = 0.5
	startingPoint[1] = -4.595; // pRefAlt = 0.01
	myMinimizer.func = &myFunc;
	myMinimizer.Reset(2);
	myMinimizer.point = startingPoint;
	myMinimizer.Minimize(1e-6);
	double pRefHet = VerifyBamID::invLogit(myMinimizer.point[0]);
	double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[1]);
	Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin);
	//vbid.setRefBiasParams(1.0, pRefHet, pRefAlt);

	vbid.selfOut.llk0s[rg+1] = myFunc.llk0;
	vbid.selfOut.llk1s[rg+1] = myFunc.llk1;
	vbid.selfOut.refHets[rg+1] = myFunc.pRefHet;
	vbid.selfOut.refAlts[rg+1] = myFunc.pRefAlt;
	vbid.calculateDepthByGenotype(0,rg,vbid.selfOut);
      }
    }
    else {
      Logger::gLogger->warning("--self must be set for --chip-refBias to work. Skipping..");
    }
  }
  else if ( args.bChipFull ) {
    Logger::gLogger->writeLog("Performing with-chip estimation of reference-bias and sample mixture together");
    for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) {
      double maxIBD = -1;

      for(int i=0; i < (int)vbid.pGenotypes->indids.size(); ++i) {
	VerifyBamID::fullIbdLLKFunc myFunc(&vbid,i,rg);
	AmoebaMinimizer myMinimizer;
	Vector startingPoint(3);
	startingPoint[0] = 3.91;  // start with fIBD = 0.99
	startingPoint[1] = 0;      // pRefHet = 0.5
	startingPoint[2] = -4.595; // pRefAlt = 0.01
	myMinimizer.func = &myFunc;

	myFunc.indIdx = i;
	myMinimizer.Reset(3);
	myMinimizer.point = startingPoint;
	myMinimizer.Minimize(1e-6);
	double fIBD = VerifyBamID::invLogit(myMinimizer.point[0]);
	double pRefHet = VerifyBamID::invLogit(myMinimizer.point[1]);
	double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[2]);

	Logger::gLogger->writeLog("Comparing with individual %s.. Optimal fIBD = %lf, LLK0 = %lf, LLK1 = %lf for readgroup %d",vbid.pGenotypes->indids[i].c_str(), fIBD, myFunc.llk0, myFunc.llk1, rg);
	//Logger::gLogger->writeLog("Optimal per-sample fIBD = %lf, ",fIBD);
	Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf ) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin);
	if ( maxIBD < fIBD ) {
	  bestInds[rg+1] = i;
	  maxIBD = fIBD;
	  vbid.bestOut.llk0s[rg+1] = myFunc.llk0;
	  vbid.bestOut.llk1s[rg+1] = myFunc.llk1;
	  vbid.bestOut.fMixs[rg+1] = 1.-myFunc.fIBD;
	  vbid.bestOut.refHets[rg+1] = myFunc.pRefHet;
	  vbid.bestOut.refAlts[rg+1] = myFunc.pRefAlt;
	}

	if ( ( (rg < 0) && (vbid.pPile->sBamSMID == vbid.pGenotypes->indids[i] ) ) || ( ( rg >= 0 ) && ( vbid.pPile->vsSMIDs[rg] == vbid.pGenotypes->indids[i]) ) ) {
	  selfInds[rg+1] = i;
	  vbid.selfOut.llk0s[rg+1] = myFunc.llk0;
	  vbid.selfOut.llk1s[rg+1] = myFunc.llk1;
	  vbid.selfOut.fMixs[rg+1] = 1.-myFunc.fIBD;
	  vbid.selfOut.refHets[rg+1] = myFunc.pRefHet;
	  vbid.selfOut.refAlts[rg+1] = myFunc.pRefAlt;
	  vbid.calculateDepthByGenotype(i, rg, vbid.selfOut);
	}
      }
      //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt);
      if ( bestInds[rg+1] >= 0 ) {
	Logger::gLogger->writeLog("Best Matching Individual is %s with IBD = %lf",vbid.pGenotypes->indids[bestInds[rg+1]].c_str(),maxIBD);
	vbid.calculateDepthByGenotype(bestInds[rg+1], rg, vbid.bestOut);
      }

      if ( selfInds[rg+1] >= 0 ) {
	Logger::gLogger->writeLog("Self Individual is %s with IBD = %lf",vbid.pGenotypes->indids[selfInds[rg+1]].c_str(),vbid.selfOut.fMixs[rg+1]);
	vbid.calculateDepthByGenotype(selfInds[rg+1],rg,vbid.selfOut);
      }
    }
  }

  // PRINT OUTPUT FILE - ".selfSM"
  // [SEQ_ID]  : SAMPLE ID in the sequence file
  // [CHIP_ID] : SAMPLE ID in the chip file (NA if not available)
  // [#SNPS] : Number of markers evaluated
  // [#READS]   : Number of reads evaluated
  // [AVG_DP]   : Mean depth
  // [FREEMIX]  : Chip-free estimated alpha (% MIX in 0-1 scale), NA if unavailable
  // [FREELK1]  : Chip-free log-likelihood at estimated alpha
  // [FREELK0]  : Chip-free log-likelihood at 0% contamination
  // [CHIPIBD]  : With-chip estimated alpha (% MIX in 0-1 scale)
  // [CHIPLK1]  : With-chip log-likelihood at estimated alpha
  // [CHIPLK0]  : With-chip log-likelihood at 0% contamination
  // [DPREF]    : Depth at reference site in the chip
  // [RDPHET]   : Relative depth at HET site in the chip
  // [RDPALT]   : Relative depth at HOMALT site in the chip
  // [FREE_RF]  : Pr(Ref|Ref) site estimated without chip data
  // [FREE_RH]  : Pr(Ref|Het) site estimated without chip data
  // [FREE_RA]  : Pr(Ref|Alt) site estimated without chip data
  // [CHIP_RF]  : Pr(Ref|Ref) site estimated with chip data
  // [CHIP_RH]  : Pr(Ref|Het) site estimated with chip data
  // [CHIP_RA]  : Pr(Ref|Alt) site estimated with chip data
  // [DPREF]    : Depth at reference alleles
  // [RDPHET]   : Relative depth at heterozygous alleles
  // [RDPALT]   : Relative depth at hom-alt alleles

  String selfSMFN = args.sOutFile + ".selfSM";
  String bestSMFN = args.sOutFile + ".bestSM";
  String selfRGFN = args.sOutFile + ".selfRG";
  String bestRGFN = args.sOutFile + ".bestRG";
  String dpSMFN = args.sOutFile + ".depthSM";
  String dpRGFN = args.sOutFile + ".depthRG";

  IFILE selfSMF = ifopen(selfSMFN,"wb");
  IFILE bestSMF = (args.bFindBest ? ifopen(bestSMFN,"wb") : NULL);
  IFILE selfRGF = (args.bIgnoreRG ? NULL : ifopen(selfRGFN,"wb"));
  IFILE bestRGF = (args.bFindBest && !args.bIgnoreRG) ? ifopen(bestRGFN,"wb") : NULL;

  IFILE dpSMF = ifopen(dpSMFN,"wb");
  IFILE dpRGF = (args.bIgnoreRG ? NULL : ifopen(dpRGFN,"wb"));
  if ( selfSMF == NULL ) {
    Logger::gLogger->error("Cannot write to %s",selfSMF);
  }
  if ( args.bFindBest && ( bestSMF == NULL ) ) {
    Logger::gLogger->error("Cannot write to %s",bestSMF);
  }
  if ( dpSMF == NULL ) {
    Logger::gLogger->error("Cannot write to %s",dpSMF);
  }

  ifprintf(dpSMF,"#RG\tDEPTH\t#SNPs\t%%SNPs\t%%CUMUL\n");
  int nCumMarkers = 0;
  for(int i=args.maxDepth; i >= 0; --i) {
    nCumMarkers += vbid.mixOut.depths[i];
    ifprintf(dpSMF,"ALL\t%d\t%d\t%.5lf\t%.5lf\n",i, vbid.mixOut.depths[i],(double) vbid.mixOut.depths[i]/(double)vbid.nMarkers,(double)nCumMarkers/(double)vbid.nMarkers);
  }
  ifclose(dpSMF);


  if ( dpRGF != NULL ) {
    ifprintf(dpRGF,"#RG\tDEPTH\t#SNPs\t%%SNPs\t%%CUMUL\n");
    for(int rg=0; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) {
      const char* rgID = vbid.pPile->vsRGIDs[rg].c_str();

      int nMarkers = 0;
      for(int i=args.maxDepth; i >= 0; --i) {
	nMarkers += vbid.mixOut.depths[(rg+1)*(args.maxDepth+1) + i];
      }

      nCumMarkers = 0;
      for(int i=args.maxDepth; i >= 0; --i) {
	int d = vbid.mixOut.depths[(rg+1)*(args.maxDepth+1) + i];
	nCumMarkers += d;
	ifprintf(dpRGF,"%s\t%d\t%d\t%.5lf\t%.5lf\n",rgID,i,d,(double)d/(double)vbid.nMarkers,(double)nCumMarkers/(double)nMarkers);
      }
    }
    ifclose(dpRGF);
  }

  const char* headers[] = {"#SEQ_ID","RG","CHIP_ID","#SNPS","#READS","AVG_DP","FREEMIX","FREELK1","FREELK0","FREE_RH","FREE_RA","CHIPMIX","CHIPLK1","CHIPLK0","CHIP_RH","CHIP_RA","DPREF","RDPHET","RDPALT"};
  int nheaders = sizeof(headers)/sizeof(headers[0]);

  for(int i=0; i < nheaders; ++i) { ifprintf(selfSMF,"%s%s",i>0 ? "\t" : "",headers[i]); }
  ifprintf(selfSMF,"\n");
  ifprintf(selfSMF,"%s\tALL",vbid.pPile->sBamSMID.c_str());
  ifprintf(selfSMF,"\t%s",selfInds[0] >= 0 ? vbid.pGenotypes->indids[selfInds[0]].c_str() : "NA");
  ifprintf(selfSMF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[0],(double)vbid.mixOut.numReads[0]/(double)vbid.nMarkers);
  if ( args.bFreeNone ) { ifprintf(selfSMF,"\tNA\tNA\tNA\tNA\tNA"); }
  else if ( args.bFreeMixOnly ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0]); }
  else if ( args.bFreeRefBiasOnly ) { ifprintf(selfSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); }
  else if ( args.bFreeFull ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); }
  else { error("Invalid option in handling bFree"); }

  if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(selfSMF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); }
  else if ( args.bChipMixOnly ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.fMixs[0],vbid.selfOut.llk1s[0],vbid.selfOut.llk0s[0],(double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); }
  else if ( args.bChipMixOnly ) { ifprintf(selfSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.llk1s[0], vbid.selfOut.llk0s[0], vbid.selfOut.refHets[0], vbid.selfOut.refAlts[0], (double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); }
  else if ( args.bChipFull ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.selfOut.fMixs[0], vbid.selfOut.llk1s[0], vbid.selfOut.llk0s[0], vbid.selfOut.refHets[0], vbid.selfOut.refAlts[0], (double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); }
  else { error("Invalid option in handling bChip"); }
  ifprintf(selfSMF,"\n");
  ifclose(selfSMF);

  if ( bestSMF != NULL ) {
    for(int i=0; i < nheaders; ++i) { ifprintf(bestSMF,"%s%s",i>0 ? "\t" : "",headers[i]); }
    ifprintf(bestSMF,"\n");
    ifprintf(bestSMF,"%s\tALL",vbid.pPile->sBamSMID.c_str());
    ifprintf(bestSMF,"\t%s",bestInds[0] >= 0 ? vbid.pGenotypes->indids[bestInds[0]].c_str() : "NA");
    ifprintf(bestSMF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[0],(double)vbid.mixOut.numReads[0]/(double)vbid.nMarkers);
    if ( args.bFreeNone ) { ifprintf(bestSMF,"\tNA\tNA\tNA\tNA\tNA"); }
    else if ( args.bFreeMixOnly ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0]); }
    else if ( args.bFreeRefBiasOnly ) { ifprintf(bestSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); }
    else if ( args.bFreeFull ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); }
    else { error("Invalid option in handling bFree"); }
    
    if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(bestSMF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); }
    else if ( args.bChipMixOnly ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.fMixs[0],vbid.bestOut.llk1s[0],vbid.bestOut.llk0s[0],(double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); }
    else if ( args.bChipMixOnly ) { ifprintf(bestSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.llk1s[0], vbid.bestOut.llk0s[0], vbid.bestOut.refHets[0], vbid.bestOut.refAlts[0], (double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); }
    else if ( args.bChipFull ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.bestOut.fMixs[0], vbid.bestOut.llk1s[0], vbid.bestOut.llk0s[0], vbid.bestOut.refHets[0], vbid.bestOut.refAlts[0], (double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); }
    else { error("Invalid option in handling bChip"); }
    ifprintf(bestSMF,"\n");
    ifclose(bestSMF);
  }

  if ( selfRGF != NULL ) {
    for(int i=0; i < nheaders; ++i) { ifprintf(selfRGF,"%s%s",i>0 ? "\t" : "",headers[i]); }
    ifprintf(selfRGF,"\n");
    for(int rg=0; rg < vbid.nRGs; ++rg) {
      ifprintf(selfRGF,"%s\t%s",vbid.pPile->sBamSMID.c_str(),vbid.pPile->vsRGIDs[rg].c_str());
      ifprintf(selfRGF,"\t%s",bestInds[rg] >= 0 ? vbid.pGenotypes->indids[bestInds[rg]].c_str() : "NA");
      ifprintf(selfRGF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[(rg+1)*4],(double)vbid.mixOut.numReads[(rg+1)*4]/(double)vbid.mixOut.numGenos[(rg+1)*4]);
      if ( args.bFreeNone ) { ifprintf(selfRGF,"\tNA\tNA\tNA\tNA\tNA"); }
      else if ( args.bFreeMixOnly ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1]); }
      else if ( args.bFreeRefBiasOnly ) { ifprintf(selfRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); }
      else if ( args.bFreeFull ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); }
      else { error("Invalid option in handling bFree"); }
      
      if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(selfRGF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); }
      else if ( args.bChipMixOnly ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.fMixs[rg+1], vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); }
      else if ( args.bChipMixOnly ) { ifprintf(selfRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], vbid.selfOut.refHets[rg+1], vbid.selfOut.refAlts[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); }
      else if ( args.bChipFull ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.selfOut.fMixs[rg+1], vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], vbid.selfOut.refHets[rg+1], vbid.selfOut.refAlts[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); }
      else { error("Invalid option in handling bChip"); }
      ifprintf(selfRGF,"\n");
    }
    ifclose(selfRGF);
  }

  if ( bestRGF != NULL ) {
    for(int i=0; i < nheaders; ++i) { ifprintf(bestRGF,"%s%s",i>0 ? "\t" : "",headers[i]); }
    ifprintf(bestRGF,"\n");
    for(int rg=0; rg < vbid.nRGs; ++rg) {
      ifprintf(bestRGF,"%s\t%s",vbid.pPile->sBamSMID.c_str(),vbid.pPile->vsRGIDs[rg].c_str());
      ifprintf(bestRGF,"\t%s",bestInds[rg] >= 0 ? vbid.pGenotypes->indids[bestInds[rg]].c_str() : "NA");
      ifprintf(bestRGF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[(rg+1)*4],(double)vbid.mixOut.numReads[(rg+1)*4]/(double)vbid.mixOut.numGenos[(rg+1)*4]);
      if ( args.bFreeNone ) { ifprintf(bestRGF,"\tNA\tNA\tNA\tNA\tNA"); }
      else if ( args.bFreeMixOnly ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1]); }
      else if ( args.bFreeRefBiasOnly ) { ifprintf(bestRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); }
      else if ( args.bFreeFull ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); }
      else { error("Invalid option in handling bFree"); }
      
      if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(bestRGF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); }
      else if ( args.bChipMixOnly ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.fMixs[rg+1], vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); }
      else if ( args.bChipMixOnly ) { ifprintf(bestRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], vbid.bestOut.refHets[rg+1], vbid.bestOut.refAlts[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); }
      else if ( args.bChipFull ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.bestOut.fMixs[rg+1], vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], vbid.bestOut.refHets[rg+1], vbid.bestOut.refAlts[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); }
      else { error("Invalid option in handling bChip"); }
      ifprintf(bestRGF,"\n");
    }
    ifclose(bestRGF);
  }
  
  time(&t);
  Logger::gLogger->writeLog("Analysis finished on %s",ctime(&t));

  return 0;
}
Пример #30
0
void main(int argc, char **argv)
{
        int  c;
        int  lib_opt = FALSE;
        int  errflg = 0;

        extern int  optind;
        extern char *optarg;
	char *libname,*fname;
	char *t_name;

#ifdef IBM_PC
	fprintf(stderr, "NYU Ada/Ed Librarian Version 1.11.2\n");
	fprintf(stderr, "Copyright (C) 1985-1992 by New York University.\n");
#endif

#ifndef IBM_PC
        while((c = getopt(argc,argv,"l:"))!=EOF) {
#else
        while((c = getopt(argc,argv,"L:l:"))!=EOF) {
                if (isupper(c)) c = tolower(c);
#endif
                switch(c) {
                case 'l': /* specify library name */
                        lib_opt = TRUE;
                        libname = strjoin(optarg,"");
                        break;
                case '?':
                        errflg++;
                }        
        }

	fname = (char *)0;
	if (optind < argc) fname = argv[optind];
        if (!lib_opt && fname == (char *)0) {
		fname = getenv("ADALIB");
		if (fname!= (char *)0) {
#ifdef IBM_PC
			fprintf(stderr, "L");
#else
			fprintf(stderr, "l");
#endif
			fprintf(stderr,"ibrary defined by ADALIB: %s\n", fname);
		}
	}
	if ((!lib_opt && fname == (char *)0) || errflg) {
	    fprintf(stderr, "Usage: adalib [-l library]\n");
	    exit(1);
	}
        if (!lib_opt) {
           libname = emalloc(strlen(fname) + 1);
           strcpy(libname, fname);
        }

	t_name = libset(libname);
	LIBFILE = ifopen(LIBFILENAME, "", "r", 0);

	load_library();
	exit(0);
}

static void load_library()								/*;load_library*/
{
	/*
	 * retrieve information from LIBFILE
	 * Called only if lib_option and not newlib.
	 */

	int		i, j, n, m, unumber, nodes, symbols;
	int		comp_status, unit_count, cur_level;
	char	*comp_date, *status_str, *uname, *aisname, *tmp_str;
	char	*main_string;
	int		is_main, empty_unit_slots, parent;
	int		ignore;


	unit_count = getnum(LIBFILE, "lib-unit-count");
	n = getnum(LIBFILE, "lib-n");
	empty_unit_slots = getnum(LIBFILE, "lib-empty-slots");
	tmp_str = getstr(LIBFILE, "lib-tmp-str");
	for (i = 1; i <= unit_count; i++) {
		uname = getstr(LIBFILE, "lib-unit-name");
		unumber = getnum(LIBFILE, "lib-unit-number");
		aisname = getstr(LIBFILE, "lib-ais-name");
		comp_date = getstr(LIBFILE, "unit-date");
		symbols = getnum(LIBFILE, "lib-symbols");
		nodes = getnum(LIBFILE, "lib-nodes");
		is_main = getnum(LIBFILE, "lib-is-main");
		if (is_main) {
			if (streq(unit_name_type(uname), "ma"))
				main_string = "(Interface)";
			else
				main_string = "  (Main)   ";
		}
		else {
			main_string = "";
		}
		comp_status = getnum(LIBFILE, "lib-status");
		status_str = (comp_status) ? "active  " : "obsolete";
		printf("%8s %11s %-15s %s\n",
		    status_str, main_string, convert_date(comp_date),
		    formatted_name(uname));
	}
	printf("\n");
	n = getnum(LIBFILE, "lib-n");
	if (n) {
		printf("stubs \n\n");
		for (i = 1; i <= n; i++) {
			uname = getstr(LIBFILE, "lib-unit-name");
			aisname = getstr(LIBFILE, "lib-ais-name");
			parent = getnum(LIBFILE, "lib-parent");
			cur_level = getnum(LIBFILE, "lib-cur-level");
			m = getnum(LIBFILE, "stub-file-size");
			for (j = 1; j <= m; j++)
				ignore = getnum(LIBFILE, "stub-file");
			printf("%s\n", formatted_stub(uname));
		}
		printf("\n");
	}
	ifclose(LIBFILE);
	return;
#ifdef TBSL
	n = getnum(LIBFILE, "precedes-map-size");
	printf("precedes map\n");
	for (i = 1; i <= n; i += 2) {
		dom = getnum(LIBFILE, "precedes-map-dom");
		m = getnum(LIBFILE, "precedes-map-nelt");
		printf("  %4d:", dom);
		for (j = 1; j <= m; j++) {
			range = getnum(LIBFILE, "precedes-map-ent");
			printf(" %4d", range);
		}
		printf("\n");
	}
	n = getnum(LIBFILE, "compilation_table_size");
	if (n) {
		printf("\ncompilation table\n");
		for (i = 1; i <= n; i++) {
			unum = (int) getnum(LIBFILE, "compilation-table-ent");
			printf("  %d\n", unum);
		}
		printf("\n");
	}
	/* late_instances */
	n = getnum(LIBFILE, "late-instances-size");
	if (n) {
		printf("late instances\n");
		for (i = 1; i <= n; i++) {
			str = (char *) getstr(LIBFILE, "late-instances-str");
			printf("  %s\n", str);
		}
	}
	/* current code segment */
	n = getnum(LIBFILE, "unit-size");
	printf("\ncurrent code segments\n");
	printf("  unit cs\n");
	for (i = 1; i <= n; i++) {
		cs = getnum(LIBFILE, "current-code-segment");
		if (cs) printf("   %d: %d\n", i, cs);
	}
	/* local reference maps */
	n = getnum(LIBFILE, "unit-size");
	get_local_ref_maps(LIBFILE, n);
	cde_pos = get_cde_slots(LIBFILE, axq);


	/* could free axq_data_slots, etc., but keep for now */
	/* read out LIB_STUB map (always empty for now) */
	ifclose(LIBFILE);
	return;
#endif
}