Example #1
0
// specific to upgrade from VCF 3.3 to VCF 4.0
void VcfFile::upgradeMetaLines() {
  std::vector<int> toDelete;

  for(int i=0; i < asMetaKeys.Length(); ++i) {
    if ( asMetaKeys[i].Compare("fileformat") == 0 ) {
      if ( asMetaValues[i].Compare("VCFv3.3") == 0 ) {
	asMetaValues[i] = "VCFv4.0";
      }
    }
    else if ( asMetaKeys[i].Compare("FILTER") == 0 ) {
      toDelete.push_back(i);
    }
    else if ( asMetaKeys[i].Compare("FORMAT") == 0 ) {
      StringArray tok;
      tok.ReplaceColumns(asMetaValues[i],',');
      asMetaValues[i].printf("<ID=%s,Number=%s,Type=%s,Description=%s>",tok[0].c_str(),tok[2].c_str(),tok[1].c_str(),tok[3].c_str());
    }
  }

  for(int i=0; i < (int)toDelete.size(); ++i) {
    asMetaValues.Delete(toDelete[toDelete.size()-i-1]);
    asMetaKeys.Delete(toDelete[toDelete.size()-i-1]);
  }

  // add INFO and FILTER?
}
Example #2
0
bool FilterStat::appendStatVcf(const char* file) {
  VcfFile vcf;
  vcf.setSiteOnly(false);
  vcf.setParseValues(true);
  vcf.setParseGenotypes(false);
  vcf.setParseDosages(false);
  vcf.openForRead(file,1);
  
  VcfMarker* pMarker;
  StringArray tok;
  for( int i=0, j=0; vcf.iterateMarker(); ++i, ++j ) {
    pMarker = vcf.getLastMarker();
    if ( sChrom.Compare(pMarker->sChrom) != 0 ) {
      Logger::gLogger->error("Chromosome name does not match - %s vs %s",sChrom.c_str(),pMarker->sChrom.c_str());
    }

    while ( vPos[j] < pMarker->nPos ) { ++j; }

    if ( vPos[j] > pMarker->nPos ) {
      Logger::gLogger->error("Position %s:%d is not observed in the anchor VCF",sChrom.c_str(),pMarker->nPos);
    }

    std::vector<int> vAlleles;
    std::vector<int> vStrands;

    //fprintf(stderr,"%s:%d\t%s\n",pMarker->sChrom.c_str(),pMarker->nPos,pMarker->asFormatKeys[0].c_str());

    for(int k=0; k < pMarker->asFormatKeys.Length(); ++k) {
      if ( pMarker->asFormatKeys[k].Compare("BASE") == 0 ) {
	tok.ReplaceColumns(pMarker->asSampleValues[k],',');
	for(int l=0; l < tok.Length(); ++l) {
	  if ( tok[l].Compare(vAl1[j].c_str()) == 0 ) {
	    vAlleles.push_back(0);
	  }
	  else if ( tok[l].Compare(vAl2[j].c_str()) == 0 ) {
	    vAlleles.push_back(1);
	  }
	  else {
	    vAlleles.push_back(2);
	  }
	}
      }
      else if ( pMarker->asFormatKeys[k].Compare("STRAND") == 0 ) {
	tok.ReplaceColumns(pMarker->asSampleValues[k],',');
	for(int l=0; l < tok.Length(); ++l) {
	  if ( tok[l].Compare("F") == 0 ) {
	    vStrands.push_back(0);
	  }
	  else {
	    vStrands.push_back(1);
	  }
	}
      }
    }

    //fprintf(stderr,"%s:%d\t%d",pMarker->sChrom.c_str(),pMarker->nPos,(int)vAlleles.size());
    //for(int k=0; k < (int) vAlleles.size(); ++k) {
    //  fprintf(stderr,"\t%d",vAlleles[k]*2+vStrands[k]);
    //}
    //fprintf(stderr,"\n");

    if ( vAlleles.size() != vStrands.size() ) {
      Logger::gLogger->error("Alleles and Strands do not match in size at %s:%d, in %s",pMarker->sChrom.c_str(), pMarker->nPos, file);
    }

    // updates the counts - needs synchronization
    {
      //boost::mutex::scoped_lock lock(mutex);
      for(int k=0; k < (int) vAlleles.size(); ++k) {
	++(vCounts[FILTER_STAT_COUNTS*j + vAlleles[k]*2 + vStrands[k]]);
      }
    }
  }
  return true;
}
Example #3
0
int VcfMac::execute(int argc, char **argv)
{
    String inputVcf = "";
    int minAC = -1;
    String sampleSubset = "";
    String filterList = "";
    bool params = false;

    IntervalTree<int> regions;
    std::vector<int> intersection;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inputVcf)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_STRINGPARAMETER("sampleSubset", &sampleSubset)
        LONG_INTPARAMETER("minAC", &minAC)
        LONG_STRINGPARAMETER("filterList", &filterList)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    // Check that all files were specified.
    if(inputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in\", a required parameter.\n\n";
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the two input files.
    VcfFileReader inFile;
    VcfHeader header;
    VcfRecord record;

    // Open the file
    if(sampleSubset.IsEmpty())
    {
        inFile.open(inputVcf, header);        
    }
    else
    {
        inFile.open(inputVcf, header, sampleSubset, NULL, NULL);
    }
    
    // Add the discard rule for minor allele count.
    if(minAC >= 0)
    {
        inFile.addDiscardMinMinorAlleleCount(minAC, NULL);
    }
    
    if(!filterList.IsEmpty())
    {
        // Open the filter list.
        IFILE regionFile = ifopen(filterList, "r");
        String regionLine;
        StringArray regionColumn;
        int start;
        int end;
        int intervalVal = 1;
        if(regionFile == NULL)
        {
            std::cerr << "Failed to open " << filterList 
                      << ", so keeping all positions\n";
            filterList.Clear();
        }
        else
        {
            while( regionFile->isOpen() && !regionFile->ifeof())
            {
                // Read the next interval
                regionLine.Clear();
                regionLine.ReadLine(regionFile);
                if(regionLine.IsEmpty())
                {
                    // Nothing on this line, continue to the next.
                    continue;
                }
                regionColumn.ReplaceColumns(regionLine, ' ');
                if(regionColumn.Length() != 2)
                {
                    std::cerr << "Improperly formatted region line: " 
                              << regionLine << "; skipping to the next line.\n";
                    continue;
                }
                // Convert the columns to integers.
                if(!regionColumn[0].AsInteger(start))
                {
                    // The start position (1st column) is not an integer.
                    std::cerr << "Improperly formatted region line, start position "
                              << "(1st column) is not an integer: "
                              << regionColumn[0]
                              << "; Skipping to the next line.\n";
                    continue;
                }
                if(!regionColumn[1].AsInteger(end))
                {
                    // The start position (1st column) is not an integer.
                    std::cerr << "Improperly formatted region line, end position "
                              << "(2nd column) is not an integer: "
                              << regionColumn[1]
                              << "; Skipping to the next line.\n";
                    continue;
                }
                // Add 1-based inclusive intervals.
                regions.add(start,end, intervalVal);
            }
        }
    }


    int numReadRecords = 0;

    while( inFile.readRecord(record))
    {
        if(!filterList.IsEmpty())
        {
            // Check if the region should be kept.
            intersection.clear();
            regions.get_intersecting_intervals(record.get1BasedPosition(), intersection);
            
            if(intersection.empty())
            {
                // not in the interval, so continue to the next record.
                continue;
            }
        }

        ++numReadRecords;

        // Loop through the number of possible alternates.
        unsigned int numAlts = record.getNumAlts();
        int minAlleleCount = -1;
        int curAlleleCount = 0;
        int totalAlleleCount = 0;
        for(unsigned int i = 0; i <= numAlts; i++)
        {
            curAlleleCount = record.getAlleleCount(i);
            if((minAlleleCount == -1) ||
               (curAlleleCount < minAlleleCount))
            {
                minAlleleCount = curAlleleCount;
            }
            totalAlleleCount += curAlleleCount;
        }
        if(totalAlleleCount != 0)
        {
            double maf = (double)minAlleleCount/totalAlleleCount;
            std::cout << record.getIDStr()
                      << "\t" << minAlleleCount
                      << "\t" << maf << "\n";
        }
    }
    
    inFile.close();

    //    std::cerr << "\n\t# Records: " << numReadRecords << "\n";

    // return success.
    return(0);
}