コード例 #1
0
ファイル: GlfFile.cpp プロジェクト: aminzia/statgen
// Open a glf file for writing with the specified filename.
bool GlfFile::openForWrite(const char * filename, bool compressed)
{
    // Reset for any previously operated on files.
    resetFile();

    if(compressed)
    {
        myFilePtr = ifopen(filename, "wb", InputFile::BGZF);
    }
    else
    {
        myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
    }

    if (myFilePtr == NULL)
    {
        std::string errorMessage = "Failed to Open ";
        errorMessage += filename;
        errorMessage += " for writing";
        myStatus.setStatus(GlfStatus::FAIL_IO, errorMessage.c_str());
        throw(GlfException(myStatus));
        return(false);
    }
   
    myIsOpenForWrite = true;

    // Successfully opened the file.
    myStatus = GlfStatus::SUCCESS;
    return(true);
}
コード例 #2
0
ファイル: StringTest.cpp プロジェクト: amarawi/gotcloud
void testReadLine()
{
    IFILE filePtr = ifopen("testFiles/testFile.txt", "rb");
    assert(filePtr != NULL);
    
    String line = "";
    line.ReadLine(filePtr);

    assert(line == "  Hello, I am a testFile.  ");

    line.Trim();
    assert(line == "Hello, I am a testFile.");


    // Does not compile in current version, but compiles in old verison.
    // This can be added back in to ensure that it will catch the difference
    // in return value for ReadLine (now: int; used to be: string&)
    //    testMethod(line.ReadLine(filePtr));
    line.ReadLine(filePtr);
    assert(temp1 == 0);
    testMethod(line);
    assert(temp1 == 1);

    //    line.ReadLine(filePtr).Trim();
    line.ReadLine(filePtr);
    line.Trim();

    assert(line == "ThirdLine.");

    ifclose(filePtr);
}
コード例 #3
0
ファイル: dhcpmgmt.c プロジェクト: 1stMaster/syslinux
int dhcp ( struct net_device *netdev ) {
	uint8_t *chaddr;
	uint8_t hlen;
	uint16_t flags;
	int rc;

	/* Check we can open the interface first */
	if ( ( rc = ifopen ( netdev ) ) != 0 )
		return rc;

	/* Wait for link-up */
	if ( ( rc = iflinkwait ( netdev, LINK_WAIT_MS ) ) != 0 )
		return rc;

	/* Perform DHCP */
	chaddr = dhcp_chaddr ( netdev, &hlen, &flags );
	printf ( "DHCP (%s ", netdev->name );
	while ( hlen-- )
		printf ( "%02x%c", *(chaddr++), ( hlen ? ':' : ')' ) );

	if ( ( rc = start_dhcp ( &monojob, netdev ) ) == 0 ) {
		rc = monojob_wait ( "" );
	} else if ( rc > 0 ) {
		printf ( " using cached\n" );
		rc = 0;
	}

	return rc;
}
コード例 #4
0
ファイル: StringHash.cpp プロジェクト: Griffan/FASTQuick
void StringHash::ReadLinesFromFile(const char * filename)
{
    IFILE f = ifopen(filename, "rb");
    if (f == NULL) return;
    ReadLinesFromFile(f);
    ifclose(f);
}
コード例 #5
0
ファイル: VcfFile.cpp プロジェクト: amarawi/gotcloud
void VcfFile::openForRead(const char* filename, int nbuf) {
  reset();
  
  iFile = ifopen(filename,"rb");
  if ( iFile == NULL ) {
    throw VcfFileException("Failed opening file %s - %s",filename, strerror(errno));
  }
  nBuffers = nbuf;
  nNumMarkers = 0;
  nHead = 0;
  if ( nBuffers == 0 ) { // infinite buffer size
    // do not set size of markers
  }
  else {
    vpVcfMarkers.resize( nBuffers );
    for(int i=0; i < nBuffers; ++i) {
      VcfMarker* p = new VcfMarker;
      vpVcfMarkers[i] = p;
    }
  }
  parseMeta();
  parseHeader();

  if ( bUpgrade ) {
    upgradeMetaLines();
  }
}
コード例 #6
0
ファイル: Imputation.cpp プロジェクト: luyi0629/Minimac3
void Imputation::PrintInfoFile(HaplotypeSet &rHap,HaplotypeSet &tHap,  ImputationStatistics &stats)

{
    cout<<endl<<" Writing summary (.info) files ... "<<endl;
    IFILE info = ifopen(outFile + ".info", "wb");
    ifprintf(info, "SNP\tREF(0)\tALT(1)\tALT_Frq\tMAF\tAvgCall\tRsq\tGenotyped\tLooRsq\tEmpR\tEmpRsq\tDose0\tDose1\n");


    int i=0;
    for (int index =0; index < rHap.RefTypedTotalCount; index++)
    {

        if(rHap.RefTypedIndex[index]==-1)
        {

            if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex)
            {
                ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t%.5f\t%.5f\t",
                RsId? rHap.VariantList[i].rsid.c_str(): rHap.VariantList[i].name.c_str(),
                rHap.VariantList[i].refAlleleString.c_str(),
                rHap.VariantList[i].altAlleleString.c_str(),
                stats.AlleleFrequency(i),
                stats.AlleleFrequency(i) > 0.5 ? 1.0 - stats.AlleleFrequency(i) : stats.AlleleFrequency(i),
                stats.AverageCallScore(i),
                stats.Rsq(i));

                if (!tHap.missing[i])
                {
                    ifprintf(info, "Genotyped\t%.3f\t%.3f\t%.5f\t%.5f\t%.5f\n",
                      stats.LooRsq(i), stats.EmpiricalR(i), stats.EmpiricalRsq(i),
                      stats.LooMajorDose(i), stats.LooMinorDose(i));
                }
                else
                 ifprintf(info, "Imputed\t-\t-\t-\t-\t-\n");
            }
            i++;
        }
        else
        {
            variant ThisTypedVariant =tHap.TypedOnlyVariantList[rHap.RefTypedIndex[index]];

            ifprintf(info, "%s\t%s\t%s\t%.5f\t%.5f\t-\t-\tTyped_Only\t-\t-\t-\t-\t-\n",
            RsId? ThisTypedVariant.rsid.c_str(): ThisTypedVariant.name.c_str(),
            ThisTypedVariant.refAlleleString.c_str(),
            ThisTypedVariant.altAlleleString.c_str(),
            tHap.AlleleFreq[rHap.RefTypedIndex[index]],
            tHap.AlleleFreq[rHap.RefTypedIndex[index]] > 0.5 ?
                        1.0 - tHap.AlleleFreq[rHap.RefTypedIndex[index]] : tHap.AlleleFreq[rHap.RefTypedIndex[index]]);

        }
    }
    ifclose(info);


    cout<<endl<<" Summary information written to          : "<<outFile<<".info"<<endl;
   }
コード例 #7
0
ファイル: Imputation.cpp プロジェクト: luyi0629/Minimac3
void Imputation::FlushPartialVcf(HaplotypeSet &rHap,HaplotypeSet &tHap,HaplotypeSet &PartialDosage, string &filename,int &Index)
{

    string tempFileIndex(outFile),tempFileIndex1(outFile);
    IFILE vcfdosepartial = ifopen(filename.c_str(), "wb", InputFile::BGZF);

    for(int hapId=0;hapId<(int)PartialDosage.individualName.size();hapId++)
    {
        ifprintf(vcfdosepartial,"\t%s",PartialDosage.individualName[hapId].c_str());
    }
    ifprintf(vcfdosepartial,"\n");

    int i=0;
    for (int index =0; index < rHap.RefTypedTotalCount; index++)
    {

        if(rHap.RefTypedIndex[index]==-1)
        {

            if(i>=rHap.PrintStartIndex && i <= rHap.PrintEndIndex)
            {
                bool majorIsReference=false;
                if(!rHap.major[i])
                    majorIsReference=true;

                if(!tHap.AllMaleTarget)
                    PartialDosage.PrintDosageForVcfOutputForID(vcfdosepartial,i, majorIsReference,rHap.VariantList[i].refAllele);
                else
                    PartialDosage.PrintDosageForVcfOutputForIDMaleSamples(vcfdosepartial,i, majorIsReference,rHap.VariantList[i].refAllele);

                ifprintf(vcfdosepartial,"\n");

            }
            i++;

        }
        else
        {


            if(!tHap.AllMaleTarget)
                PartialDosage.PrintDosageGWASOnlyForVcfOutputForID
                (tHap,vcfdosepartial,rHap.RefTypedIndex[index]);
            else
                PartialDosage.PrintDosageGWASOnlyForVcfOutputForIDMaleSamples
                (tHap,vcfdosepartial,rHap.RefTypedIndex[index]);
            ifprintf(vcfdosepartial,"\n");
        }

    }

    ifclose(vcfdosepartial);



}
コード例 #8
0
ファイル: lib.c プロジェクト: daveshields/AdaEd
int init_predef()				/*;init_predef*/
{
	char *lname;
	char *t_name;
	extern char *PREDEFNAME;

	lname = libset(PREDEFNAME); /* set PREDEF library as library */
	LIBFILE = ifopen("predef", "lib", "r", 0);
	t_name =libset(lname); /* restore prior library */
	return(read_lib());	/* number of units read */
}
コード例 #9
0
ファイル: GCContent.cpp プロジェクト: BioInfoTools/qplot
void GCContent::LoadRegions(String & regionsFile, GenomeSequence &genome, bool invertRegion)
{
    if(regionsFile.Length()==0) return;
    if(genome.sequenceLength()==0) error("No reference genome loaded!\n");

    IFILE fhRegions;
    fhRegions = ifopen(regionsFile.c_str(),"r");
    if(fhRegions==NULL)
        error("Open regions file %s failed!\n", regionsFile.c_str());

    regionIndicator.resize(genome.sequenceLength());

    StringArray tokens;
    String buffer;
    int len;

    fprintf(stderr, "Loading region list...");

    while (!ifeof(fhRegions)){
        buffer.ReadLine(fhRegions);
        if (buffer.IsEmpty() || buffer[0] == '#') continue;

        tokens.AddTokens(buffer, WHITESPACE);
        if(tokens.Length() < 3) continue;

        genomeIndex_t startGenomeIndex = 0;
        int chromosomeIndex = tokens[1].AsInteger();

        // use chromosome name (token[0]) and position (token[1]) to query genome index.
        startGenomeIndex = genome.getGenomePosition(tokens[0].c_str(), chromosomeIndex);

        if(startGenomeIndex >= regionIndicator.size() ) {
            //fprintf(stderr, "WARNING: region list section %s position %u is not found in the reference and skipped...\n", tokens[0].c_str(), chromosomeIndex);
            continue;
        }

        len = tokens[2].AsInteger() - tokens[1].AsInteger() + 1;
        for(uint32_t i=startGenomeIndex; i<startGenomeIndex+len; i++)
            regionIndicator[i] = true;

        tokens.Clear();
        buffer.Clear();
    }

    if (invertRegion) {
        fprintf(stderr, " invert region...");
        for (uint32_t i = 0; i < regionIndicator.size(); i++) {
            regionIndicator[i] = !regionIndicator[i];
        }
    }

    ifclose(fhRegions);
    fprintf(stderr, "DONE!\n");
}
コード例 #10
0
void MarkovParameters::WriteErrorRates(StringArray & markerNames, const char * filename)
   {
   IFILE output = ifopen(filename, "wb");

   if (output == NULL) return;

   ifprintf(output, "MarkerName\tErrorRate\n");
   for (int i = 0; i < markers; i++)
      ifprintf(output, "%s\t%.5g\n", (const char *) markerNames[i], E[i]);

   ifclose(output);
   }
コード例 #11
0
ファイル: StringAlias.cpp プロジェクト: aminzia/statgen
bool StringAlias::ReadFromFile(const char * filename)
{
    IFILE input = ifopen(filename, "rt");

    if (input == NULL)
        return false;

    ReadFromFile(input);

    ifclose(input);

    return true;
}
コード例 #12
0
void MarkovParameters::WriteCrossoverRates(StringArray & markerNames, const char * filename)
   {
   IFILE output = ifopen(filename, "wb");

   if (output == NULL) return;

   ifprintf(output, "Interval\tSwitchRate\n");
   for (int i = 0; i < markers - 1; i++)
      ifprintf(output, "%s-%s\t%.5g\n",
               (const char *) markerNames[i],
               (const char *) markerNames[i+1], R[i]);

   ifclose(output);
   }
コード例 #13
0
ファイル: glfHandler.cpp プロジェクト: mkanai/ChunkChromosome
bool glfHandler::Create(const String & filename)
   {
   isStub = false;
   handle = ifopen(filename, "wb");

   if (handle == NULL)
      {
      isStub = true;
      return false;
      }

   WriteHeader();

   return handle != NULL;
   }
コード例 #14
0
ファイル: glfHandler.cpp プロジェクト: statgen/statgen
bool glfHandler::Create(const String & filename)
{
    isStub = false;
    // glf is in BGZF format.
    handle = ifopen(filename, "wb", InputFile::BGZF);

    if (handle == NULL)
    {
        isStub = true;
        return false;
    }

    WriteHeader();

    return handle != NULL;
}
コード例 #15
0
ファイル: glfHandler.cpp プロジェクト: statgen/statgen
bool glfHandler::Open(const String & filename)
{
    isStub = false;
    handle = ifopen(filename, "rb");

    if (handle == NULL)
    {
        isStub = true;
        return false;
    }

    if (!ReadHeader())
        ifclose(handle);

    endOfSection = true;

    return handle != NULL;
}
コード例 #16
0
ファイル: VerifyBamID.cpp プロジェクト: statgen/verifyBamID
void VerifyBamID::loadSubsetInds(const char* subsetFile) {
  if ( ( pPile == NULL ) && ( pGenotypes == NULL ) ) {
    if ( subsetInds.size() > 0 ) {
      Logger::gLogger->error("VerifyBamID::loadSubsetInds() called multiple times");
    }

    IFILE f = ifopen(subsetFile,"rb");
    String line;
    StringArray tok;
    while( line.ReadLine(f) > 0 ) {
      tok.ReplaceTokens(line,"\t \n\r");
      subsetInds.push_back(tok[0].c_str());
    }
  }
  else {
    Logger::gLogger->error("VerifyBamID::loadSubsetInds() called after VerifyBamID::loadFiles()");
  }
}
コード例 #17
0
ファイル: dhcpmgmt.c プロジェクト: elitak/ipxe
int dhcp ( struct net_device *netdev ) {
	int rc;

	/* Check we can open the interface first */
	if ( ( rc = ifopen ( netdev ) ) != 0 )
		return rc;

	/* Wait for link-up */
	if ( ( rc = iflinkwait ( netdev, LINK_WAIT_MS ) ) != 0 )
		return rc;

	/* Perform DHCP */
	printf ( "DHCP (%s %s)", netdev->name,
		 netdev->ll_protocol->ntoa ( netdev->ll_addr ) );
	if ( ( rc = start_dhcp ( &monojob, netdev ) ) == 0 )
		rc = monojob_wait ( "" );

	return rc;
}
コード例 #18
0
ファイル: read1.cpp プロジェクト: zhanxw/base
int main() 
{
    String fn = "/home/zhanxw/compareMapSoft/index/mapreads/chr1.fa";
    IFILE file = ifopen(fn.c_str(), "r");

    int totalChar = 0;
    String line;
    int freq[256] = {0};
    while (!ifeof(file)){
        line.ReadLine(file);
        totalChar += line.Length();
        for (int i = 0; i < line.Length(); i++)
            freq[(unsigned int) line[i]]++;
    }
    printf("A frequency: %d (%f)\n", freq[(int)'A'], (float)freq[(int)'A']/totalChar);
    printf("T frequency: %d (%f)\n", freq[(int)'T'], (float)freq[(int)'T']/totalChar);
    printf("G frequency: %d (%f)\n", freq[(int)'G'], (float)freq[(int)'G']/totalChar);
    printf("C frequency: %d (%f)\n", freq[(int)'C'], (float)freq[(int)'C']/totalChar);
}
コード例 #19
0
ファイル: VcfFile.cpp プロジェクト: luyi0629/Minimac3
bool VcfFile::open(const char* filename, const char* mode,
                   InputFile::ifileCompression compressionMode)
{
    // Reset for any previously operated on files.
    reset();

    myFilePtr = ifopen(filename, mode, compressionMode);

    if(myFilePtr == NULL)
    {
        std::string errorMessage = "Failed to Open ";
        errorMessage += filename;
        errorMessage += " for ";
        errorMessage += mode;
        myStatus.setStatus(StatGenStatus::FAIL_IO, errorMessage.c_str());
        return(false);
    }

    return(true);
}
コード例 #20
0
ファイル: FilterStat.cpp プロジェクト: aminzia/statgen
bool FilterStat::writeMergedVcf(const char* outFile) {
  IFILE oFile = ifopen(outFile,"wb");
  if ( oFile == NULL ) {
    Logger::gLogger->error("Cannot open output file %s",outFile);
  }

  VcfFile vcf;
  vcf.setSiteOnly(false);
  vcf.setParseValues(true);
  vcf.openForRead(sAnchorVcf.c_str(),1);  

  vcf.printVCFHeader(oFile);

  VcfMarker* pMarker;
  String STC, STR;
  for( int i=0; vcf.iterateMarker(); ++i ) {
     pMarker = vcf.getLastMarker();

     int c[FILTER_STAT_COUNTS];
     for(int j=0; j < FILTER_STAT_COUNTS; ++j) {
       c[j] = vCounts[FILTER_STAT_COUNTS*i+j];
     }
     STC.printf("%d,%d,%d,%d,%d,%d",c[0],c[1],c[2],c[3],c[4],c[5]);

     if ( ( c[0]+c[1] > 4 ) && ( c[1]+c[3] > 4 ) && ( c[0]+c[2] > 4 ) && ( c[1]+c[3] > 4 ) ) { 
       STR.printf("%.2lf",((c[0]+.5)*(c[3]+.5)-(c[1]+.5)*(c[2]+.5))/sqrt((c[0]+c[1]+1.)*(c[2]+c[3]+1.)*(c[0]+c[2]+1.)*(c[1]+c[3]+1.)));
     }
     else {
       STR = "0";
     }
     pMarker->asInfoKeys.Add("STC");
     pMarker->asInfoKeys.Add("STR");
     pMarker->asInfoValues.Add(STC);
     pMarker->asInfoValues.Add(STR);

     pMarker->printVCFMarker(oFile,false);
  }
  ifclose(oFile);
  return true;
}
コード例 #21
0
ファイル: GlfFile.cpp プロジェクト: aminzia/statgen
// Open a glf file for reading with the specified filename.
bool GlfFile::openForRead(const char * filename)
{
    // Reset for any previously operated on files.
    resetFile();

    myFilePtr = ifopen(filename, "rb");
   
    if (myFilePtr == NULL)
    {
        std::string errorMessage = "Failed to Open ";
        errorMessage += filename;
        errorMessage += " for reading";
        myStatus.setStatus(GlfStatus::FAIL_IO, errorMessage.c_str());
        throw(GlfException(myStatus));
        return(false);
    }

    myIsOpenForRead = true;
    // Successfully opened the file.
    myStatus = GlfStatus::SUCCESS;
    return(true);
}
コード例 #22
0
ファイル: FastQFile.cpp プロジェクト: statgen/libStatGen
// Open a FastQFile.
FastQStatus::Status FastQFile::openFile(const char* fileName,
                                        BaseAsciiMap::SPACE_TYPE spaceType)
{
   // reset the member data.
   reset();

   myBaseComposition.resetBaseMapType();
   myBaseComposition.setBaseMapType(spaceType);
   myQualPerCycle.clear();
   myCountPerCycle.clear();

   FastQStatus::Status status = FastQStatus::FASTQ_SUCCESS;

   // Close the file if there is already one open - checked by close.
   status = closeFile();
   if(status == FastQStatus::FASTQ_SUCCESS)
   {
      // Successfully closed a previously opened file if there was one.
      
      // Open the file
      myFile = ifopen(fileName, "rt");
      myFileName = fileName;
      
      if(myFile == NULL)
      {
         // Failed to open the file.
         status = FastQStatus::FASTQ_OPEN_ERROR;
      }
   }

   if(status != FastQStatus::FASTQ_SUCCESS)
   {
      // Failed to open the file.
      std::string errorMessage = "ERROR: Failed to open file: ";
      errorMessage += fileName;
      logMessage(errorMessage.c_str());
   }
   return(status);
}
コード例 #23
0
ファイル: streamReader.cpp プロジェクト: somaen/twin-e
char streamReader_open(streamReader *pThis, const char *fileName, int fatal) {
#ifndef DREAMCAST
#ifdef USE_IFOPEN
	pThis->fileHandle = ifopen(fileName, "rb");
#else
	pThis->fileHandle = fopen(fileName, "rb");
#endif
#else
	pThis->fileHandle = gdFsOpen(fileName, NULL);
#endif

	if (pThis->fileHandle) {
		pThis->currentSector = 0;
		streamReader_feedBuffer(pThis);
		return 1;
	} else {
		if (fatal) {
			printf("FATAL: Can't find %s\n", fileName);
			exit(-1);
		}
		return 0;
	}
}
コード例 #24
0
ファイル: VerifyBamID.cpp プロジェクト: statgen/verifyBamID
void VerifyBamID::printPerMarkerInfo(const char* filename, int indIdx) {
  IFILE oFile = ifopen(filename,"wb");
  int nMarkers = (int)(pGenotypes->chroms.size());
  char base, a1, a2;

  ifprintf(oFile,"#CHROM\tPOS\tA1\tA2\tAF\tGENO\t#REF\t#ALT\t#OTHERS\tBASES\tQUALS\tMAPQS\n");
  for(int i=0; i < nMarkers; ++i) {
    int counts[3] = {0,0,0};
    std::vector<char> bases;
    std::vector<char> quals;
    std::vector<char> mqs;

    ifprintf(oFile,"%s\t%d\t%c\t%c\t%.4lf\t",pGenotypes->chroms[i].c_str(),pGenotypes->positions[i],pGenotypes->refBases[i],pGenotypes->altBases[i],pGenotypes->alleleFrequencies[i]);
    int geno = pGenotypes->getGenotype(indIdx,i);
    switch(geno) {
    case 0: // MISSING
      ifprintf(oFile,"./.");
      break;
    case 1: // HOMREF;
      ifprintf(oFile,"0/0");
      break;
    case 2: // HET;
      ifprintf(oFile,"0/1");
      break;
    case 3: // HOMALT;
      ifprintf(oFile,"1/1");
      break;
    default:
      Logger::gLogger->error("Unrecognized genotype %d at ind %d, marker %d",indIdx,i);
    }

    a1 = pGenotypes->refBases[i];
    a2 = pGenotypes->altBases[i];

    for(int j=(int)pPile->nBegins[i]; j < (int)pPile->nEnds[i]; ++j) {
      // obtain b (base), (error), and readgroup info
      base = pPile->cBases[j];
      if ( base == a1 ) {
	++counts[0];
      }
      else if ( base == a2 ) {
	++counts[1];
      }
      else {
	++counts[2];
      }

      bases.push_back(base);
      quals.push_back(pPile->cQuals[j]);
      mqs.push_back(((uint8_t)(pPile->cMapQs[j]) > 90) ? '~' : static_cast<char>(pPile->cMapQs[j]+33));
    }
    ifprintf(oFile,"\t%d\t%d\t%d\t%.3lf\t",counts[0],counts[1],counts[2],(counts[0]+counts[1] == 0) ? 0.5 : (double)counts[0]/(double)(counts[0]+counts[1]));

    ifprintf(oFile,"\t");
    for(int j=0; j < (int)bases.size(); ++j)
      ifprintf(oFile,"%c",bases[j]);

    ifprintf(oFile,"\t");
    for(int j=0; j < (int)quals.size(); ++j)
      ifprintf(oFile,"%c",quals[j]);

    ifprintf(oFile,"\t");
    for(int j=0; j < (int)mqs.size(); ++j)
      ifprintf(oFile,"%c",mqs[j]);

    ifprintf(oFile,"\n");
  }
}
コード例 #25
0
ファイル: Tabix.cpp プロジェクト: genome-vendor/gotcloud
// Read & parse the specified index file.
StatGenStatus::Status Tabix::readIndex(const char* filename)
{
    // Reset the index from anything that may previously be set.
    resetIndex();

    IFILE indexFile = ifopen(filename, "rb");

    // Failed to open the index file.
    if(indexFile == NULL)
    {
        return(StatGenStatus::FAIL_IO);
    }

    // read the tabix index structure.

    // Read the magic string.
    char magic[4];
    if(ifread(indexFile, magic, 4) != 4)
    {
        // Failed to read the magic
        return(StatGenStatus::FAIL_IO);
    }

    // If this is not an index file, set num references to 0. 
    if (magic[0] != 'T' || magic[1] != 'B' || magic[2] != 'I' || magic[3] != 1)
    {
        // Not a Tabix Index file.
        return(StatGenStatus::FAIL_PARSE);
    }

    // It is a tabix index file.
    // Read the number of reference sequences.
    if(ifread(indexFile, &n_ref, 4) != 4)
    {
        // Failed to read.
        return(StatGenStatus::FAIL_IO);
    }

    // Size the references.
    myRefs.resize(n_ref);

    // Read the Format configuration.
    if(ifread(indexFile, &myFormat, sizeof(myFormat)) != sizeof(myFormat))
    {
        // Failed to read.
        return(StatGenStatus::FAIL_IO);
    }

    // Read the length of the chromosome names.
    uint32_t l_nm;

    if(ifread(indexFile, &l_nm, sizeof(l_nm)) != sizeof(l_nm))
    {
        // Failed to read.
        return(StatGenStatus::FAIL_IO);
    }

    // Read the chromosome names.
    myChromNamesBuffer = new char[l_nm];
    if(ifread(indexFile, myChromNamesBuffer, l_nm) != l_nm)
    {
        return(StatGenStatus::FAIL_IO);
    }
    myChromNamesVector.resize(n_ref);

    // Parse out the chromosome names.
    bool prevNull = true;
    int chromIndex = 0;
    for(uint32_t i = 0; i < l_nm; i++)
    {
        if(chromIndex >= n_ref)
        {
            // already set the pointer for the last chromosome name, 
            // so stop looping.
            break;
        }
        if(prevNull == true)
        {
            myChromNamesVector[chromIndex++] = myChromNamesBuffer + i;
            prevNull = false;
        }
        if(myChromNamesBuffer[i] == '\0')
        {
            prevNull = true;
        }
    }

    for(int refIndex = 0; refIndex < n_ref; refIndex++)
    {
        // Read each reference.
        Reference* ref = &(myRefs[refIndex]);
        
        // Resize the bins so they can be indexed by bin number.
        ref->bins.resize(MAX_NUM_BINS + 1);
        
        // Read the number of bins.
        if(ifread(indexFile, &(ref->n_bin), 4) != 4)
        {
            // Failed to read the number of bins.
            // Return failure.
            return(StatGenStatus::FAIL_PARSE);
        }

        // Read each bin.
        for(int binIndex = 0; binIndex < ref->n_bin; binIndex++)
        {
            uint32_t binNumber;

            // Read in the bin number.
            if(ifread(indexFile, &(binNumber), 4) != 4)
            {
                // Failed to read the bin number.
                // Return failure.
                return(StatGenStatus::FAIL_IO);
            }

            // Add the bin to the reference and get the
            // pointer back so the values can be set in it.
            Bin* binPtr = &(ref->bins[binNumber]);
            binPtr->bin = binNumber;
         
            // Read in the number of chunks.
            if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4)
            {
                // Failed to read number of chunks.
                // Return failure.
                return(StatGenStatus::FAIL_IO);
            }

            // Read in the chunks.
            // Allocate space for the chunks.
            uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk);
            binPtr->chunks = (Chunk*)malloc(sizeOfChunkList);
            if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList)
            {
                // Failed to read the chunks.
                // Return failure.
                return(StatGenStatus::FAIL_IO);
            }
        }

        // Read the number of intervals.
        if(ifread(indexFile, &(ref->n_intv), 4) != 4)
        {
            // Failed to read, set to 0.
            ref->n_intv = 0;
            // Return failure.
            return(StatGenStatus::FAIL_IO);
        }

        // Allocate space for the intervals and read them.
        uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t);
        ref->ioffsets = (uint64_t*)malloc(linearIndexSize);
        if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize)
        {
            // Failed to read the linear index.
            // Return failure.
            return(StatGenStatus::FAIL_IO);
        }
    }

    // Successfully read teh bam index file.
    return(StatGenStatus::SUCCESS);
}
コード例 #26
0
ファイル: Imputation.cpp プロジェクト: luyi0629/Minimac3
void Imputation::performImputation(HaplotypeSet &tHap,HaplotypeSet &rHap, String Golden)
{

    vector<int> optStructure=rHap.optEndPoints;

    int time_prev = time(0),time_load,vcfSampleIndex=0;;
    includeGwas=true;
    MarkovParameters* MP=createEstimates(rHap,tHap,rHap.optEndPoints,1-includeGwas);

    cout<<" ------------------------------------------------------------------------------"<<endl;
    cout<<"                              MAIN IMPUTATION                                  "<<endl;
    cout<<" ------------------------------------------------------------------------------"<<endl;


    ImputationStatistics stats(rHap.numMarkers );
    IFILE dosages=NULL, hapdose=NULL, haps=NULL,vcfdosepartial=NULL;
    HaplotypeSet DosageForVcfPartial;
    DosageForVcfPartial.unphasedOutput=unphasedOutput;
    DosageForVcfPartial.TypedOnly=tHap.TypedOnly;
    DosageForVcfPartial.GWASOnlycounter=tHap.GWASOnlycounter;

    if(tHap.TypedOnly)
    {
        printf("\n Calculating Allele Frequency for Typed-Only variants ... ");
        cout<<endl;
        tHap.CalculateGWASOnlyFreq();

    }

    cout << "\n Starting Imputation ...";
    printf("\n\n Setting up Markov Model for Imputation ...");
    cout<<endl<<endl;


    if (phased && !unphasedOutput)
    {

        hapdose = ifopen(outFile + ".hapDose" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED);
        haps = ifopen(outFile + ".hapLabel" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED);

    }

    int maxVcfSample=200,NumVcfWritten=0,NumVcfCreated=0,NovcfParts=1;

    if((maxVcfSample)>=tHap.numSamples)
        maxVcfSample=tHap.numSamples;

    if(vcfOutput)
    {


        vcfdosepartial = ifopen(outFile + ".dose.vcf" + (gzip ? ".gz" : ""), "wb", gzip ?InputFile::BGZF:InputFile::UNCOMPRESSED);
        ifprintf(vcfdosepartial,"##fileformat=VCFv4.1\n");
        time_t t = time(0);
        struct tm * now = localtime( & t );
        ifprintf(vcfdosepartial,"##filedate=%d.%d.%d\n",(now->tm_year + 1900),(now->tm_mon + 1) ,now->tm_mday);
        ifprintf(vcfdosepartial,"##source=Minimac3\n");
        if(GT)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n");
        if(tHap.AllMaleTarget)
        {
            if(DS)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage (For Male Chr: X) : [P(Alt Allele)]\">\n");
            if(GP)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=GP,Number=2,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0 and 1 (For Male Chr: X) \">\n");
        }
        else
        {
            if(DS)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=DS,Number=1,Type=Float,Description=\"Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]\">\n");
            if(GP)
                ifprintf(vcfdosepartial,"##FORMAT=<ID=GP,Number=3,Type=Float,Description=\"Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1 \">\n");
        }


        ifprintf(vcfdosepartial,"##INFO=<ID=MAF,Number=1,Type=Float,Description=\"Estimated Alternate Allele Frequency\">\n");
        ifprintf(vcfdosepartial,"##INFO=<ID=R2,Number=1,Type=Float,Description=\"Estimated Imputation Accuracy\">\n");
        ifprintf(vcfdosepartial,"##INFO=<ID=ER2,Number=1,Type=Float,Description=\"Empirical (Leave-One-Out) R-square (available only for genotyped variants)\">\n");
        ifprintf(vcfdosepartial,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
        ifclose(vcfdosepartial);

        if(!tHap.AllMaleTarget)
            DosageForVcfPartial.InitializePartialDosageForVcfOutput((2*maxVcfSample),rHap.numMarkers,format);
        else
            DosageForVcfPartial.InitializePartialDosageForVcfOutputMaleSamples(maxVcfSample<MaxSample?maxVcfSample:MaxSample,rHap.numMarkers,format);
    }

    if(doseOutput)
        dosages = ifopen(outFile + ".dose" + (gzip ? ".gz" : ""), "wb",(gzip ? InputFile::BGZF:InputFile::UNCOMPRESSED) );


    #pragma omp parallel for
    for(int hapId=0;hapId<MaxSample;hapId++)
    {


        if (hapId %2==1)
        {
            if(rHap.finChromosome!="X")
                continue;
            else if(!tHap.AllMaleTarget)
                continue;
        }

        vector<float> foldedProb,recomProb,noRecomProb, rightProb,probAlleleNoStandardize(8,0.0),tempDoseHap1;
        vector<bool> tempHap(rHap.numMarkers),tempMissHap(rHap.numMarkers);
        vector<bool> tempDoseAlleleHap1;

        MarkovModel MM(tHap,rHap,tHap.missing,rHap.major);

        MM.CopyParameters(MP);

        int hapIdIndiv=hapId;

        do{

            MM.initializeMatrices(tHap,rHap,optStructure,rHap.ReducedStructureInfo);
            printf("  Processing Haplotype %d of %d ...", hapIdIndiv + 1, MaxSample);
            cout<<endl;


            MM.ThisHapId=hapIdIndiv;


            for(int group=1;group<(int)optStructure.size();group++)
            {

                MM.foldProbabilities(foldedProb,group-1,rHap.ReducedStructureInfo[group-1],0,refCount);
                MM.leftNoRecoProb[group-1][0]=foldedProb;


                if(group==1 && !tHap.missing[0])
                        if(!tHap.getMissingScaffoldedHaplotype(hapIdIndiv,0))
                            {

                                Condition(rHap,0,foldedProb,MM.leftNoRecoProb[group-1][0],MM.Error[0],
                                tHap.getScaffoldedHaplotype(hapIdIndiv,0)? rHap.AlleleFreq[0] : 1-rHap.AlleleFreq[0],
                                tHap.getScaffoldedHaplotype(hapIdIndiv,0),MM.backgroundError,
                                      foldedProb.size(),rHap.ReducedStructureInfo[0]);
                            }



                MM.WalkLeft(tHap,hapIdIndiv,MM.leftProb[group-1],MM.leftNoRecoProb[group-1],
                            foldedProb,optStructure[group-1],optStructure[group],
                            rHap.ReducedStructureInfo[group-1],rHap.AlleleFreq);

                splitFoldedProb(recomProb,MM.leftProb[group-1][optStructure[group]-optStructure[group-1]],MM.leftNoRecoProb[group-1][optStructure[group]-optStructure[group-1]]);

                MM.unfoldProbabilities(group-1,recomProb,MM.leftNoRecoProb[group-1][optStructure[group]-optStructure[group-1]],foldedProb,0,rHap.ReducedStructureInfo,refCount);

            }



            for(int group=optStructure.size()-1;group>0;group--)
            {

                MM.foldProbabilities(foldedProb,group-1,rHap.ReducedStructureInfo[group-1],1,refCount);
                rightProb=foldedProb;
                noRecomProb=foldedProb;

                MM.Impute(tHap,foldedProb,hapIdIndiv,MM.leftProb[group-1],MM.leftNoRecoProb[group-1],rightProb,noRecomProb,MM.junctionLeftProb[group-1],
                          MM.junctionRightProb[group],optStructure[group-1], optStructure[group],rHap.ReducedStructureInfo[group-1],1,rHap.AlleleFreq);

                splitFoldedProb(recomProb,rightProb,noRecomProb);
                MM.unfoldProbabilities(group-1,recomProb,noRecomProb,foldedProb,1,rHap.ReducedStructureInfo,refCount);
            }

            for(int jjj=0;jjj<rHap.numMarkers;jjj++)
                {
                    tempHap[jjj]=tHap.getScaffoldedHaplotype(hapIdIndiv,jjj);
                    tempMissHap[jjj]=tHap.getMissingScaffoldedHaplotype(hapIdIndiv,jjj);

                }

            if(vcfOutput)
            {
                if(hapIdIndiv%2==0)
                {
                   tempDoseHap1= MM.imputedHap;
                   tempDoseAlleleHap1= MM.imputedAlleleNumber;
                }
            }
            #pragma omp critical
            {
                stats.Update(MM.imputedHap, MM.leaveOneOut,tempHap,tempMissHap,rHap.major);
            }

            #pragma omp critical
            if (phased && !unphasedOutput)
            {

                PrintHaplotypeData(rHap, tHap, hapdose, haps,
                                    MM.imputedHap, MM.imputedAlleleNumber,
                                    hapIdIndiv, tHap.AllMaleTarget?hapId:hapId/2);
            }


            if(tHap.AllMaleTarget)
                break;
            hapIdIndiv++;
        }while(hapIdIndiv<MaxSample && hapIdIndiv%2==1);

        #pragma omp critical
        if(doseOutput)
        {
            PrintDosageData(rHap, tHap, dosages, MM.imputedDose, tHap.AllMaleTarget?hapId:hapId/2);
        }
         #pragma omp critical
        if(vcfOutput)
        {

            printf("    Saving Individual %s for VCF File...\n",  tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2].c_str());
            if(!tHap.AllMaleTarget)
                DosageForVcfPartial.SaveDosageForVcfOutputSampleWise(NumVcfCreated-NumVcfWritten,
                                                                 tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2],
                                                                 tempDoseHap1,MM.imputedHap,
                                                                 tempDoseAlleleHap1,MM.imputedAlleleNumber);
            else
                DosageForVcfPartial.SaveDosageForVcfOutputSampleWiseChrX(NumVcfCreated-NumVcfWritten,
                                                                 tHap.individualName[tHap.AllMaleTarget?hapId:hapId/2],
                                                                  MM.imputedHap,
                                                                 MM.imputedAlleleNumber);

            if(DosageForVcfPartial.TypedOnly)
            {

                DosageForVcfPartial.SaveIndexForGWASOnlyForVcfOutput(NumVcfCreated-NumVcfWritten,
                                                                     tHap.AllMaleTarget?hapId:hapId/2);
            }



            NumVcfCreated++;
            vcfSampleIndex++;

            if(NumVcfCreated%maxVcfSample==0 || NumVcfCreated==(tHap.AllMaleTarget?MaxSample:MaxSample/2))
            {

                string PartialVcfFileName(outFile),tempFileIndex1(outFile);
                stringstream strs;
                strs<<(NovcfParts);


                PartialVcfFileName+=(".dose.vcf.part." +
                                      (string)(strs.str())
                                     +(gzip ? ".gz" : ""));
                if(!tHap.AllMaleTarget)
                    printf("\n    --->>> Saving samples %d-%d in VCF file : %s ...\n\n",
                       (NumVcfWritten)+1,(MaxSample/2<(NumVcfWritten+maxVcfSample)?MaxSample/2:(NumVcfWritten+maxVcfSample)),
                       PartialVcfFileName.c_str());
                else
                    printf("\n    --->>> Saving samples %d-%d in VCF file : %s ...\n\n",
                       (NumVcfWritten)+1,(MaxSample<(NumVcfWritten+maxVcfSample)?MaxSample:(NumVcfWritten+maxVcfSample)),
                       PartialVcfFileName.c_str());

//if(NovcfParts==2)
//    abort();


                FlushPartialVcf(rHap,tHap,DosageForVcfPartial,PartialVcfFileName,NovcfParts);
                if(NumVcfCreated<(tHap.AllMaleTarget?MaxSample:MaxSample/2))
                {
                    NovcfParts++;
                    NumVcfWritten+=maxVcfSample;



//int gg=maxVcfSample<(((tHap.AllMaleTarget?MaxSample:MaxSample/2))-NumVcfWritten)?
//2*maxVcfSample:2*(((tHap.AllMaleTarget?MaxSample:MaxSample/2))-NumVcfWritten);
//
//
//abort();

                    if(!tHap.AllMaleTarget)
                        DosageForVcfPartial.InitializePartialDosageForVcfOutput(maxVcfSample<(MaxSample/2-NumVcfWritten)?2*maxVcfSample:2*(MaxSample/2-NumVcfWritten),rHap.numMarkers,format);
                    else
                        DosageForVcfPartial.InitializePartialDosageForVcfOutputMaleSamples(maxVcfSample<(MaxSample-NumVcfWritten)?maxVcfSample:(MaxSample-NumVcfWritten),rHap.numMarkers,format);


                }
            }

        }
    }

    cout<<endl<<" Imputation Finished ... "<<endl;


    if (phased && !unphasedOutput)
    {
        ifclose(hapdose);
        ifclose(haps);

        cout<<endl<<" Haplotype Dosage information written to : "<<
            outFile + ".hapDose" + (gzip ? ".gz" : "")<<endl;
        cout<<endl<<" Haplotype Allele information written to : "<<
        outFile + ".hapLabel" + (gzip ? ".gz" : "")<<endl;
    }



    if(doseOutput)
    {
        ifclose(dosages);
        cout<<endl<<" Dosage information written to           : "<<
        outFile + ".dose" + (gzip ? ".gz" : "")<<endl;
    }

    PrintInfoFile(rHap,tHap,stats);

    time_load = time(0) - time_prev;
    cout << "\n Time taken for imputation = " << time_load << " seconds."<<endl<<endl;


    if(vcfOutput)
        MergeFinalVcfAllVariants(rHap,tHap,stats,NovcfParts);

}
コード例 #27
0
ファイル: Bam2FastQ.cpp プロジェクト: zorankiki/gotcloud
void Bam2FastQ::writeFastQ(SamRecord& samRec, IFILE filePtr,
                           const std::string& fileNameExt, const char* readNameExt)
{
    static int16_t flag;
    static std::string sequence;
    static String quality;
    static std::string rg;
    static std::string rgFastqExt;
    static std::string rgListStr;
    static std::string fileName;
    static std::string fq2;
    if(mySplitRG)
    {
        rg = samRec.getString("RG").c_str();
        rgFastqExt = rg + fileNameExt;

        OutFastqMap::iterator it;
        it = myOutFastqs.find(rgFastqExt);
        if(it == myOutFastqs.end())
        {
            // New file.
            fileName = myOutBase.c_str();
            if(rg != "")
            {
                fileName += '.';
            }
            else
            {
                rg = ".";
            }
            fileName += rgFastqExt;
            filePtr = ifopen(fileName.c_str(), "w", myCompression);
            myOutFastqs[rgFastqExt] = filePtr;

            if(fileNameExt != mySecondFileNameExt)
            {
                // first end.
                const char* sm = mySamHeader.getRGTagValue("SM", rg.c_str());
                if(strcmp(sm, "") == 0){sm = myOutBase.c_str();}

                rgListStr.clear();
                SamHeaderRG* rgPtr = mySamHeader.getRG(rg.c_str());
                if((rgPtr == NULL) || (!rgPtr->appendString(rgListStr)))
                {
                    // No RG info for this record.
                    rgListStr = ".\n";
                }
                fq2 = ".";
                if(fileNameExt == myFirstFileNameExt)
                {
                    fq2 = myOutBase.c_str();
                    if(rg != ".")
                    {
                        fq2 += '.';
                        fq2 += rg;
                    }
                    fq2 += mySecondFileNameExt;
                }
                ifprintf(myFqList, "%s\t%s\t%s\t%s",
                         sm, fileName.c_str(), fq2.c_str(),
                         rgListStr.c_str());
            }
        }
        else
        {
            filePtr = it->second;
        }
    }
    if(filePtr == NULL)
    {
        throw(std::runtime_error("Programming ERROR/EXITING: Bam2FastQ filePtr not set."));
        return;
    }

    flag = samRec.getFlag();
    const char* readName = samRec.getReadName();
    sequence = samRec.getSequence();
    if(myQField.IsEmpty())
    {
        // Read the quality from the quality field
        quality = samRec.getQuality();
    }
    else
    {
        // Read Quality from the specified tag
        const String* qTagPtr = samRec.getStringTag(myQField.c_str());
        if((qTagPtr != NULL) && (qTagPtr->Length() == (int)sequence.length()))
        {
            // Use the tag value for quality
            quality = qTagPtr->c_str();
        }
        else
        {
            // Tag was not found, so use the quality field.
            ++myNumQualTagErrors;
            if(myNumQualTagErrors == 1)
            {
                std::cerr << "Bam2FastQ: " << myQField.c_str() 
                          << " tag was not found/invalid, so using the quality field in records without the tag\n";
            }
            quality = samRec.getQuality();
        }
    }
    
    if(SamFlag::isReverse(flag) && myReverseComp)
    {
        // It is reverse, so reverse compliment the sequence
        BaseUtilities::reverseComplement(sequence);
        // Reverse the quality.
        quality.Reverse();
    }
    else
    {
        // Ensure it is all capitalized.
        int seqLen = sequence.size();
        for (int i = 0; i < seqLen; i++)
        {
            sequence[i] = (char)toupper(sequence[i]);
        }
    }
    
    if(myRNPlus)
    {

        ifprintf(filePtr, "@%s%s\n%s\n+%s%s\n%s\n", readName, readNameExt,
                 sequence.c_str(), readName, readNameExt, quality.c_str());
    }
    else
    {
        ifprintf(filePtr, "@%s%s\n%s\n+\n%s\n", readName, readNameExt,
                 sequence.c_str(), quality.c_str());
    }
    // Release the record.
    myPool.releaseRecord(&samRec);
}
コード例 #28
0
ファイル: Bam2FastQ.cpp プロジェクト: zorankiki/gotcloud
int Bam2FastQ::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    bool readName = false;
    String refFile = "";
    String firstOut = "";
    String secondOut = "";
    String unpairedOut = "";

    bool interleave = false;
    bool noeof = false;
    bool gzip = false;
    bool params = false;

    myOutBase = "";
    myNumMateFailures = 0;
    myNumPairs = 0;
    myNumUnpaired = 0;
    mySplitRG = false;
    myQField = "";
    myNumQualTagErrors = 0;
    myReverseComp = true;
    myRNPlus = false;
    myFirstRNExt = DEFAULT_FIRST_EXT;
    mySecondRNExt = DEFAULT_SECOND_EXT;
    myCompression = InputFile::DEFAULT;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("readName", &readName)
        LONG_PARAMETER("splitRG", &mySplitRG)
        LONG_STRINGPARAMETER("qualField", &myQField)
        LONG_PARAMETER("merge", &interleave)
        LONG_STRINGPARAMETER("refFile", &refFile)
        LONG_STRINGPARAMETER("firstRNExt", &myFirstRNExt)
        LONG_STRINGPARAMETER("secondRNExt", &mySecondRNExt)
        LONG_PARAMETER("rnPlus", &myRNPlus)
        LONG_PARAMETER("noReverseComp", &myReverseComp)
        LONG_PARAMETER("gzip", &gzip)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Optional OutputFile Names")
        LONG_STRINGPARAMETER("outBase", &myOutBase)
        LONG_STRINGPARAMETER("firstOut", &firstOut)
        LONG_STRINGPARAMETER("secondOut", &secondOut)
        LONG_STRINGPARAMETER("unpairedOut", &unpairedOut)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    if(gzip)
    {
        myCompression = InputFile::GZIP;
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Cannot specify both interleaved & secondOut since secondOut would be N/A.
    if(interleave && !secondOut.IsEmpty())
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n";
        return(-1);
    }

    // Cannot specify both interleaved & secondOut since secondOut would be N/A.
    if(interleave && !secondOut.IsEmpty())
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n";
        return(-1);
    }

    // Cannot specify both splitRG & firstOut/secondOut/unpairedOut
    // since it needs a different file for each RG.
    if(mySplitRG && (!firstOut.IsEmpty() || 
                   !secondOut.IsEmpty() || !unpairedOut.IsEmpty()))
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --splitRG & --firstOut/--secondOut/--unpairedOut.\n";
        std::cerr << "Use --outBase instead.\n";
        return(-1);
    }
    // Cannot specify splitRG & output to stdout.
    if(mySplitRG && (myOutBase[0] == '-'))
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --splitRG & write to stdout.\n";
        return(-1);
    }

    // Check to see if the out file was specified, if not, generate it from
    // the input filename.
    if(myOutBase == "")
    {
        // Just remove the extension from the input filename.
        int extStart = inFile.FastFindLastChar('.');
        if(extStart <= 0)
        {
            myOutBase = inFile;
        }
        else
        {
            myOutBase = inFile.Left(extStart);
        }
    }

    if(mySplitRG)
    {
        std::string fqList = myOutBase.c_str();
        fqList += ".list";
        myFqList = ifopen(fqList.c_str(), "w");
        ifprintf(myFqList, "MERGE_NAME\tFASTQ1\tFASTQ2\tRG\n");
    }

    // Check to see if the first/second/single-ended were specified and
    // if not, set them.
    myFirstFileNameExt = "_1.fastq";
    mySecondFileNameExt = "_2.fastq";
    myUnpairedFileNameExt = ".fastq";
    if(interleave)
    {
        myFirstFileNameExt = "_interleaved.fastq";
        myFirstFileNameExt = "_interleaved.fastq";
    }
    getFileName(firstOut, myFirstFileNameExt);
    getFileName(secondOut, mySecondFileNameExt);
    getFileName(unpairedOut, myUnpairedFileNameExt);

    if(params)
    {
        inputParameters.Status();
    }

    // Open the files for reading/writing.
    // Open prior to opening the output files,
    // so if there is an error, the outputs don't get created.
    SamFile samIn;
    samIn.OpenForRead(inFile, &mySamHeader);
    // Skip non-primary reads.
    samIn.SetReadFlags(0, 0x0100);

    // Open the output files if not splitting RG
    if(!mySplitRG)
    {
        myUnpairedFile = ifopen(unpairedOut, "w", myCompression);

        // Only open the first file if it is different than an already opened file.
        if(firstOut != unpairedOut)
        {
            myFirstFile = ifopen(firstOut, "w", myCompression);
        }
        else
        {
            myFirstFile = myUnpairedFile;
        }

        // If it is interleaved or the 2nd file is not a new name, set it appropriately.
        if(interleave || secondOut == firstOut)
        {
            mySecondFile = myFirstFile;
        }
        else if(secondOut == unpairedOut)
        {
            mySecondFile = myUnpairedFile;
        }
        else
        {
            mySecondFile = ifopen(secondOut, "w", myCompression);
        }
    
        if(myUnpairedFile == NULL)
        {
            std::cerr << "Failed to open " << unpairedOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
        if(myFirstFile == NULL)
        {
            std::cerr << "Failed to open " << firstOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
        if(mySecondFile == NULL)
        {
            std::cerr << "Failed to open " << secondOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
    }

    if((readName) || (strcmp(mySamHeader.getSortOrder(), "queryname") == 0))
    {
        readName = true;
    }
    else
    {
        // defaulting to coordinate sorted.
        samIn.setSortedValidation(SamFile::COORDINATE);
    }

    // Setup the '=' translation if the reference was specified.
    if(!refFile.IsEmpty())
    {
        GenomeSequence* refPtr = new GenomeSequence(refFile);
        samIn.SetReadSequenceTranslation(SamRecord::BASES);
        samIn.SetReference(refPtr);
    }

    SamRecord* recordPtr;
    int16_t samFlag;

    SamStatus::Status returnStatus = SamStatus::SUCCESS;
    while(returnStatus == SamStatus::SUCCESS)
    {
        recordPtr = myPool.getRecord();
        if(recordPtr == NULL)
        {
            // Failed to allocate a new record.
            throw(std::runtime_error("Failed to allocate a new SAM/BAM record"));
        }
        if(!samIn.ReadRecord(mySamHeader, *recordPtr))
        {
            // Failed to read a record.
            returnStatus = samIn.GetStatus();
            continue;
        }

        // Have a record.  Check to see if it is a pair or unpaired read.
        samFlag = recordPtr->getFlag();
        if(SamFlag::isPaired(samFlag))
        {
            if(readName)
            {
                handlePairedRN(*recordPtr);
            }
            else
            {
                handlePairedCoord(*recordPtr);
            }
        }
        else
        {
            ++myNumUnpaired;
            writeFastQ(*recordPtr, myUnpairedFile,
                       myUnpairedFileNameExt);
        }
    }

    // Flush All
    cleanUpMateMap(0, true);

    if(returnStatus == SamStatus::NO_MORE_RECS)
    {
        returnStatus = SamStatus::SUCCESS;
    }

    samIn.Close();
    closeFiles();
    
    // Output the results
    std::cerr << "\nFound " << myNumPairs << " read pairs.\n";
    std::cerr << "Found " << myNumUnpaired << " unpaired reads.\n";
    if(myNumMateFailures != 0)
    {
        std::cerr << "Failed to find mates for " << myNumMateFailures
                  << " reads, so they were written as unpaired\n"
                  << "  (not included in either of the above counts).\n";
    }
    if(myNumQualTagErrors != 0)
    {
        std::cerr << myNumQualTagErrors << " records did not have tag "
                  << myQField.c_str() << " or it was invalid, so the quality field was used for those records.\n";
    }

    return(returnStatus);
}
コード例 #29
0
ファイル: SamFile.cpp プロジェクト: rtchen/gotcloud
// Open a sam/bam file for reading with the specified filename.
bool SamFile::OpenForRead(const char * filename, SamFileHeader* header)
{
    // Reset for any previously operated on files.
    resetFile();

    int lastchar = 0;

    while (filename[lastchar] != 0) lastchar++;

    // If at least one character, check for '-'.
    if((lastchar >= 1) && (filename[0] == '-'))
    {
        // Read from stdin - determine type of file to read.
        // Determine if compressed bam.
        if(strcmp(filename, "-.bam") == 0)
        {
            // Compressed bam - open as bgzf.
            // -.bam is the filename, read compressed bam from stdin
            filename = "-";

            myFilePtr = new InputFile;
            // support recover mode - this switches in a reader
            // capable of recovering from bad BGZF compression blocks.
            myFilePtr->setAttemptRecovery(myAttemptRecovery);
            myFilePtr->openFile(filename, "rb", InputFile::BGZF);

            myInterfacePtr = new BamInterface;

            // Read the magic string.
            char magic[4];
            ifread(myFilePtr, magic, 4);
        }
        else if(strcmp(filename, "-.ubam") == 0)
        {
            // uncompressed BAM File.
            // -.ubam is the filename, read uncompressed bam from stdin.
            // uncompressed BAM is still compressed with BGZF, but using
            // compression level 0, so still open as BGZF since it has a
            // BGZF header.
            filename = "-";

            // Uncompressed, so do not require the eof block.
#ifdef __ZLIB_AVAILABLE__
            BgzfFileType::setRequireEofBlock(false);
#endif
            myFilePtr = ifopen(filename, "rb", InputFile::BGZF);
        
            myInterfacePtr = new BamInterface;

            // Read the magic string.
            char magic[4];
            ifread(myFilePtr, magic, 4);
        }
        else if((strcmp(filename, "-") == 0) || (strcmp(filename, "-.sam") == 0))
        {
            // SAM File.
            // read sam from stdin
            filename = "-";
            myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED);
            myInterfacePtr = new SamInterface;
        }
        else
        {
            std::string errorMessage = "Invalid SAM/BAM filename, ";
            errorMessage += filename;
            errorMessage += ".  From stdin, can only be '-', '-.sam', '-.bam', or '-.ubam'";
            myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
            delete myFilePtr;
            myFilePtr = NULL;
            return(false);          
        }
    }
    else
    {
        // Not from stdin.  Read the file to determine the type.

        myFilePtr = new InputFile;

        // support recovery mode - this conditionally enables a reader
        // capable of recovering from bad BGZF compression blocks.
        myFilePtr->setAttemptRecovery(myAttemptRecovery);
        bool rc = myFilePtr->openFile(filename, "rb", InputFile::DEFAULT);

        if (rc == false)
        {
            std::string errorMessage = "Failed to Open ";
            errorMessage += filename;
            errorMessage += " for reading";
            myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
            delete myFilePtr;
            myFilePtr = NULL;
            return(false);
        }
        
        char magic[4];
        ifread(myFilePtr, magic, 4);
        
        if (magic[0] == 'B' && magic[1] == 'A' && magic[2] == 'M' &&
            magic[3] == 1)
        {
            myInterfacePtr = new BamInterface;
            // Set that it is a bam file open for reading.  This is needed to
            // determine if an index file can be used.
            myIsBamOpenForRead = true;
        }
        else
        {
            // Not a bam, so rewind to the beginning of the file so it
            // can be read.
            ifrewind(myFilePtr);
            myInterfacePtr = new SamInterface;
        }
    }

    // File is open for reading.
    myIsOpenForRead = true;

    // Read the header if one was passed in.
    if(header != NULL)
    {
        return(ReadHeader(*header));
    }

    // Successfully opened the file.
    myStatus = SamStatus::SUCCESS;
    return(true);
}
コード例 #30
0
ファイル: SamFile.cpp プロジェクト: rtchen/gotcloud
// Open a sam/bam file for writing with the specified filename.
bool SamFile::OpenForWrite(const char * filename, SamFileHeader* header)
{
    // Reset for any previously operated on files.
    resetFile();
    
    int lastchar = 0;
    while (filename[lastchar] != 0) lastchar++;   
    if (lastchar >= 4 && 
        filename[lastchar - 4] == 'u' &&
        filename[lastchar - 3] == 'b' &&
        filename[lastchar - 2] == 'a' &&
        filename[lastchar - 1] == 'm')
    {
        // BAM File.
        // if -.ubam is the filename, write uncompressed bam to stdout
        if((lastchar == 6) && (filename[0] == '-') && (filename[1] == '.'))
        {
            filename = "-";
        }

        myFilePtr = ifopen(filename, "wb0", InputFile::BGZF);

        myInterfacePtr = new BamInterface;
    }
    else if (lastchar >= 3 && 
             filename[lastchar - 3] == 'b' &&
             filename[lastchar - 2] == 'a' &&
             filename[lastchar - 1] == 'm')
    {
        // BAM File.
        // if -.bam is the filename, write compressed bam to stdout
        if((lastchar == 5) && (filename[0] == '-') && (filename[1] == '.'))
        {
            filename = "-";
        }
        myFilePtr = ifopen(filename, "wb", InputFile::BGZF);
        
        myInterfacePtr = new BamInterface;
    }
    else
    {
        // SAM File
        // if - (followed by anything is the filename,
        // write uncompressed sam to stdout
        if((lastchar >= 1) && (filename[0] == '-'))
        {
            filename = "-";
        }
        myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
   
        myInterfacePtr = new SamInterface;
    }

    if (myFilePtr == NULL)
    {
        std::string errorMessage = "Failed to Open ";
        errorMessage += filename;
        errorMessage += " for writing";
        myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
        return(false);
    }
   
    myIsOpenForWrite = true;

    // Write the header if one was passed in.
    if(header != NULL)
    {
        return(WriteHeader(*header));
    }

    // Successfully opened the file.
    myStatus = SamStatus::SUCCESS;
    return(true);
}