Example #1
0
void BOOM::FastaWriter::writeFasta(const BOOM::String &defline,
				 const BOOM::String &sequence,
				 const BOOM::String &filename)
{
  ofstream os(filename.c_str());
  addToFasta(defline,sequence.c_str(),os);
}
void Application::generateModel(const BOOM::String &filename,
				const BOOM::String &startCodonModelFile)
{
  // Create output file
  ofstream os(filename.c_str());
  os<<"SignalPeptide"<<endl;

  // Copy the start codon model into the file
  ifstream is(startCodonModelFile.c_str());
  BOOM::String line;
  while(!is.eof())
    {
      line.getline(is);
      if(is.eof()) break;
      os<<line<<endl;
    }

  // Write out each field separately
  os<<numFields<<endl;
  for(int fieldNum=0 ; fieldNum<numFields ; ++fieldNum)
    {
      Field &field=*fields[fieldNum];

      // Count the number of codons (you just never know...)
      BOOM::Map<char,float>::iterator aCur=field.aminoAcidFreqs.begin(),
	aEnd=field.aminoAcidFreqs.end();
      numCodons=0;
      for(; aCur!=aEnd ; ++aCur)
	{
	  char acid=(*aCur).first;
	  float &acidP=(*aCur).second;
	  BOOM::Map<BOOM::String,float> codons=codonFreqs[acid];
	  numCodons+=codons.size();
	}

      os<<field.fieldLength<<endl;
      os<<numCodons<<endl;
      aCur=field.aminoAcidFreqs.begin();  aEnd=field.aminoAcidFreqs.end();
      for(; aCur!=aEnd ; ++aCur)
	{
	  char acid=(*aCur).first;
	  float &acidP=(*aCur).second;
	  BOOM::Map<BOOM::String,float> codons=codonFreqs[acid];
	  BOOM::Map<BOOM::String,float>::iterator cur=codons.begin(),
	    end=codons.end();
	  for(; cur!=end ; ++cur)
	    {
	      BOOM::String codon=(*cur).first;
	      float codonP=(*cur).second;
	      float logP=log(acidP*codonP);
	      os<<codon<<" "<<logP<<endl;
	    }
	}
    }
}
Example #3
0
double BOOM::SequenceEntropy::entropy(const BOOM::String &str,
				    double &maxEntropy)
{
  int len=str.length();
  BOOM::StringMap<int> counts(hashTableSize(0));
  const char *p=str.c_str();
  int total=0;
  for(int i=0 ; i<len ; ++i, ++p)
    {
      if(counts.isDefined(p,1)) 
	++counts.lookup(p,1);
      else 
	counts.lookup(p,1)=1;
      ++total;
    }
  double entropy=0;
  StringMapIterator<int> cur=counts.begin(), end=counts.end();
  for(; cur!=end ; ++cur)
    {
      int count=(*cur).second;
      double p=count/double(total);
      entropy-=p*lg(p);
    }
  maxEntropy=-lg(1.0/counts.size());
  if(entropy>maxEntropy) entropy=maxEntropy;
  return entropy;
}
Example #4
0
File: IMM.C Project: bmajoros/EGGS
bool IMM::save(const BOOM::String &filename)
{
  ofstream os(filename.c_str());
  if(!os.good()) throw BOOM::String("Error creating file ")+filename+
		   "in IMM::save()";
  return save(os);
}
Example #5
0
bool ThreePeriodicMarkovChain::save(const BOOM::String &filename)
{
  ofstream os(filename.c_str());
  if(!os.good()) throw BOOM::String("Error creating file ")+filename+
		   "in ThreePeriodicMarkovChain::save()";
  return save(os);
}
Example #6
0
double BOOM::SequenceEntropy::jointEntropy(const BOOM::String &str,
					   int order,
					   double &maxEntropy)
{
  int len=str.length();
  int gramSize=order+1;
  if(gramSize>=len) 
    throw BOOM::String("Order ")+order+
      " is too large for sequence of length "+len;
  int numWindows=len-gramSize+1;
  BOOM::StringMap<int> counts(hashTableSize(order));
  const char *p=str.c_str();
  int total=0;
  for(int i=0 ; i<numWindows ; ++i, ++p)
    {
      if(counts.isDefined(p,gramSize)) 
	++counts.lookup(p,gramSize);
      else 
	counts.lookup(p,gramSize)=1;
      ++total;
    }
  double entropy=0;
  StringMapIterator<int> cur=counts.begin(), end=counts.end();
  for(; cur!=end ; ++cur)
    {
      int count=(*cur).second;
      double p=count/double(total);
      entropy-=p*lg(p);
    }
  maxEntropy=-lg(1.0/counts.size());
  if(entropy>maxEntropy) entropy=maxEntropy;
  return entropy;
}
void Application::writeHistogramFile(BOOM::Vector<double> &scores,
				     const BOOM::String &filename)
{
  ofstream os(filename.c_str());
  BOOM::Vector<double>::iterator cur=scores.begin(), end=scores.end();
  for(; cur!=end ; ++cur)
    os<<*cur<<endl;
}
Example #8
0
bool BranchAcceptor::save(const BOOM::String &filename)
{
  ofstream os(filename.c_str());
  if(!os.good())
    throw BOOM::String("Error creating file ")+filename+
      "in BranchPoint::save()";
  return save(os);
}
Example #9
0
void SignalSensor::addConsensus(const BOOM::String &s)
{
  int len=s.length();
  if(consensusLength>0 && consensusLength!=len)
    throw BOOM::String(
       "Consensus lengths differ in SignalSensor::addConsensus");
  
  consensusLength=len;
  consensuses.lookup(s.c_str(),len)=char(1);
}
Example #10
0
File: IMM.C Project: bmajoros/EGGS
IMM::IMM(const BOOM::String &filename)
  : revComp(NULL), models(new BOOM::Vector<BOOM::StringMap<double>*>)
{
  ifstream is(filename.c_str());
  if(!is.good()) throw BOOM::String("Error opening file ")+filename
		   +" in IMM::IMM()";
  BOOM::String modelType;
  is >> modelType;
  if(modelType!="IMM")
    throw BOOM::String("Attempt to load an object of type ")+modelType+
      " into an IMM";
  load(is);
}
Example #11
0
ThreePeriodicMarkovChain::ThreePeriodicMarkovChain(const BOOM::String &
						   filename)
{
  ifstream is(filename.c_str());
  if(!is.good()) throw BOOM::String("Error opening file ")+filename
		   +" in ThreePeriodicMarkovChain()";
  BOOM::String modelType;
  is >> modelType;
  if(modelType!="3P")
    throw BOOM::String("Attempt to load an object of type ")+modelType+
      " into a ThreePeriodicMarkovChain (3P)";
  load(is);  
}
void Application::updateCodonFreqs(const BOOM::String &transcript)
{
  const char *str=transcript.c_str();
  int len=transcript.length();
  const char *p=str;
  for(int i=0 ; i<len ; i+=3, p+=3)
    {
      BOOM::String codon(p,3);
      char acid=BOOM::ProteinTrans::mapCodon(codon.c_str());
      BOOM::Map<BOOM::String,float> &counts=codonFreqs[acid];
      if(!counts.isDefined(codon)) counts[codon]=1;
      else ++counts[codon];
    }
}
Example #13
0
void EmpiricalDistribution::load(const BOOM::String &filename)
{
  ifstream is(filename.c_str());
  if(!is.good()) throw BOOM::String("Error opening file ")+filename+
		   " in EmpiricalDistribution::load()";
  while(!is.eof())
    {
      unsigned x;
      double y;
      is >> x;
      if(is.eof()) break;
      is >> y;
      v.push_back(new EmpiricalDistributionElement(x,y));
    }
  binSize=v[1]->first-v[0]->first;
}
Example #14
0
BranchAcceptor::BranchAcceptor(GarbageCollector &gc,BOOM::String &filename)
  : SignalSensor(gc),
    branchPoint(NULL),
    acceptor(NULL)
{
  // ctor

  ifstream is(filename.c_str());
  if(!is.good()) throw BOOM::String("Error opening file ")+filename+
		   "in BranchAcceptor::BranchAcceptor()";
  BOOM::String modelType;
  is >> modelType;
  if(modelType!="BranchAcceptor") 
    throw BOOM::String("Attempt to load an object of type ")+modelType+
      "in into a BranchAcceptor";
  load(is);
}
Example #15
0
File: IMM.C Project: bmajoros/EGGS
double IMM::scoreSingleBase(const Sequence &seq,const BOOM::String &str,
				    int index,Symbol s,char c)
{
  const char *p=str.c_str();
  switch(getStrand())
    {
    case PLUS_STRAND:
      {
	int maxOrder=(index>N ? N : index);
	for(int order=maxOrder ; order>=0 ; --order)
	  {
	    BOOM::StringMap<double> &model=*(*models)[order];
	    if(model.isDefined(p,index-order,order+1))
	      return model.lookup(p,index-order,order+1);
	  }
	throw BOOM::String("IMM::scoreSingleBase('+',")+
	  index+",strlen="+strlen(p)+",str="+
	  str.substring(index,maxOrder)+")";
      }

    case MINUS_STRAND:
      {
	/*
	  On the minus strand we have to take our contexts from the
	  right (but only because we trained the model that way)
	 */
	int seqLen=str.length();
	int maxOrder=seqLen-index-1;
	if(maxOrder>N) maxOrder=N;
	for(int order=maxOrder ; order>=0 ; --order)
	  {
	    BOOM::StringMap<double> &model=*(*models)[order];
	    if(model.isDefined(p,index,order+1)) 
	      return model.lookup(p,index,order+1);
	  }
	throw BOOM::Stacktrace(
          BOOM::String("IMM::scoreSingleBase('-',")+
	    index+",strlen="+strlen(p)+",str="+
	  str.substring(index,maxOrder)+")");
      }

    default: throw BOOM::String(__FILE__)+__LINE__;
    }
}
Example #16
0
double BOOM::SequenceEntropy::conditionalEntropy(const BOOM::String &str,
						 int order)
{
  if(order<1) 
    throw "BOOM::SequenceEntropy::conditionalEntropy() : order<1";
  int len=str.length();
  int gramSize=order+1;
  if(gramSize>=len) 
    throw BOOM::String("Order ")+order+
      " is too large for sequence of length "+len;
  int numWindows=len-gramSize+1;
  BOOM::StringMap<int> counts(hashTableSize(order));
  BOOM::StringMap<int> prefixCounts(hashTableSize(order-1));
  const char *p=str.c_str();
  int total=0;
  for(int i=0 ; i<numWindows ; ++i, ++p)
    {
      if(counts.isDefined(p,gramSize)) 
	++counts.lookup(p,gramSize);
      else 
	counts.lookup(p,gramSize)=1;
      if(prefixCounts.isDefined(p,gramSize-1)) 
	++prefixCounts.lookup(p,gramSize-1);
      else 
	prefixCounts.lookup(p,gramSize-1)=1;
      ++total;
    }
  double entropy=0;
  StringMapIterator<int> cur=counts.begin(), end=counts.end();
  for(; cur!=end ; ++cur)
    {
      int count=(*cur).second;
      const char *s=(*cur).first;
      double p=count/double(total);
      double condP=count/double(prefixCounts.lookup(s,gramSize-1));
      entropy-=p*lg(condP);
    }
  return entropy;
}
Example #17
0
BOOM::String BOOM::String::substitute(const BOOM::String &from,
				  const BOOM::String &to) const
{
  BOOM::String rval;
  const char *pattern=from.c_str();
  int patternLen=from.length();
  const char *ptr=c_str();
  const char *last=ptr+length()-patternLen;
  while(ptr<=last) {
    if(localMatch(ptr,pattern,patternLen)) {
      ptr+=patternLen;
      rval+=to;
    }
    else {
      rval+=*ptr;
      ptr++;
    }
  }
  //int extra=patternLen-1;
  //for(int i=0 ; i<extra ; ++i) rval+=*ptr++;
  for(; *ptr ; ++ptr) rval+=*ptr;
  return rval;
}
Example #18
0
void Application::writeOutput(const BOOM::String &tataFile,
			      const BOOM::String &outfile)
{
  // Load TATA model
  SignalSensor *tata=SignalSensor::load(tataFile,GC);

  // Create output file and write header
  ofstream os(outfile.c_str());
  os.precision(8);
  os<<"TataCapModel"<<endl;
  os<<minSeparation<<"\t"<<maxSeparation<<endl;
  
  // Write out the TATA model
  tata->save(os);

  // Write out the intergenic model
  Alphabet &alphabet=DnaAlphabet::global();
  os<<"MC\nINTERGENIC\n0\t0\t1\n5"<<endl;
  os<<"A\n"<<intergenicModel[alphabet.lookup('A')]<<endl;
  os<<"C\n"<<intergenicModel[alphabet.lookup('C')]<<endl;
  os<<"G\n"<<intergenicModel[alphabet.lookup('G')]<<endl;
  os<<"N\n"<<intergenicModel[alphabet.lookup('N')]<<endl;
  os<<"T\n"<<intergenicModel[alphabet.lookup('T')]<<endl;
  os<<"MC\nINTERGENIC\n0\t0\t1\n5"<<endl;
  os<<"A\n"<<intergenicModel[alphabet.lookup('T')]<<endl;
  os<<"C\n"<<intergenicModel[alphabet.lookup('G')]<<endl;
  os<<"G\n"<<intergenicModel[alphabet.lookup('C')]<<endl;
  os<<"N\n"<<intergenicModel[alphabet.lookup('N')]<<endl;
  os<<"T\n"<<intergenicModel[alphabet.lookup('A')]<<endl;

  // Write out the CAP model
  capModel->save(os);

  // Write out the CAP/intergenic ratio model
  capIntergenicRatioModel->save(os);
}
Example #19
0
BOOM::String BOOM::String::operator+(const BOOM::String &s) const
{
  return BOOM::String(*this+s.c_str());
}
Example #20
0
bool BOOM::String::stricmp(const BOOM::String &str) const
{
  return strcasecmp(c_str(),str.c_str());
}
Example #21
0
bool BOOM::String::occursAt(const BOOM::String &substring,int pos) const
{
  return localMatch(substring.c_str(),c_str()+pos,substring.size());
}
Example #22
0
bool BOOM::String::contains(const BOOM::String &s) const
{
  return find(s.c_str())!=npos;
}
Example #23
0
void NmerRateMatrix::save(const BOOM::String &filename)
{
  ofstream os(filename.c_str());
  save(os);
}
Example #24
0
void FastaWriter::appendToFasta(const BOOM::String &defline,const 
				BOOM::String &sequence,const BOOM::String &filename)
{
  ofstream os(filename.c_str(),std::ios::app); // ios_base::app);
  addToFasta(defline,sequence,os);
}
Example #25
0
void BOOM::FastaWriter::addToFasta(const BOOM::String &defline,
				 const BOOM::String &sequence,
				 ostream &os)
{
  addToFasta(defline,sequence.c_str(),os);
}
Example #26
0
bool SignalSensor::consensusOccursAt(const BOOM::String &str,int index)
{
  return consensuses.isDefined(str.c_str(),index,consensusLength);
}