Example #1
0
bool Seq::EqIgnoreCase(const Seq &s) const
	{
	const unsigned n = Length();
	if (n != s.Length())
		{
		return false;
		}
	for (unsigned i = 0; i < n; ++i)
		{
		const char c1 = at(i);
		const char c2 = s.at(i);
		if (IsGap(c1))
			{
			if (!IsGap(c2))
				return false;
			}
		else
			{
			if (toupper(c1) != toupper(c2))
				{
				return false;
				}
			}
		}
	return true;
	}
void CWSInput1WithFeatureModelHandler<RNNDerived, I1Model>::predict(std::istream &is, std::ostream &os)
{
    
    std::vector<Seq> raw_instances;
    std::vector<IndexSeq> sents ;
    std::vector<CWSFeatureDataSeq> cws_feature_seqs;
    read_test_data(is, raw_instances, sents, cws_feature_seqs );
    BOOST_LOG_TRIVIAL(info) << "do prediction on " << raw_instances.size() << " instances .";
    BasicStat stat(true);
    stat.start_time_stat();
    for (unsigned int i = 0; i < raw_instances.size(); ++i)
    {
        Seq &raw_sent = raw_instances.at(i);
        if (0 == raw_sent.size())
        {
            os << "\n";
            continue;
        }
        IndexSeq &sent = sents.at(i) ;
        CWSFeatureDataSeq &cws_feature_seq = cws_feature_seqs.at(i);
        IndexSeq pred_tag_seq;
        dynet::ComputationGraph cg;
        i1m->predict(cg, sent, cws_feature_seq, pred_tag_seq);
        Seq words ;
        CWSTaggingSystem::static_parse_chars_indextag2word_seq(raw_sent, pred_tag_seq, words) ;
        os << words[0] ;
        for( size_t i = 1 ; i < words.size() ; ++i ) os << OutputDelimiter << words[i] ;
        os << "\n";
        stat.total_tags += pred_tag_seq.size() ;
    }
    stat.end_time_stat() ;
    BOOST_LOG_TRIVIAL(info) << stat.get_stat_str("predict done.")  ;
}
 operator Seq<U,CA,A>() const 
 {
     Seq<U,CA,A> result;
     result.transfer( result.end(), values_ );
     BOOST_ASSERT( empty() );
     return result;
 }
Example #4
0
std::size_t size(const Seq& seq)
{
    std::size_t d = 0;
    for (typename Seq::const_iterator it=seq.begin(); it!=seq.end(); ++it)
        d += boost::asio::buffer_size(*it);
    return d;
}
Example #5
0
void emit_sep(
	Seq const &s, std::size_t step, Printer &&p
)
{
	auto iter(s.begin());
	std::size_t c(0);
	auto lc(std::min(s.size(), c + step));

	if (lc) {
		auto xc(c);
		printf("\t\t");
		p(*iter++);
		for (++xc; xc < lc; ++xc) {
			printf(", ");
			p(*iter++);
		}
	}
	c += lc;

	for (; c < s.size(); c += step) {
		printf(",\n");
		lc = std::min(s.size(), c + step);

		if (!lc)
			break;

		auto xc(c);
		printf("\t\t");
		p(*iter++);
		for (++xc; xc < lc; ++xc) {
			printf(", ");
			p(*iter++);
		}
	}
}
Example #6
0
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
	{
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert(sIn.Length() == uSymbols);

	unsigned uColCount = uSymbols + uIndels;

	a.Clear();
	a.SetSize(1, uColCount);

	a.SetSeqName(0, sIn.GetName());
	a.SetSeqId(0, sIn.GetId());

	unsigned p = 0;
	unsigned uColIndex = 0;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				{
				char c = sIn[p++];
				a.SetChar(0, uColIndex++, c);
				}
		else
			for (int i = 0; i < -n; ++i)
				a.SetChar(0, uColIndex++, '-');
		}
	assert(uColIndex == uColCount);
	return uColCount;
	}
Example #7
0
double variance_pf(Seq &sq) {
	
	
	double av=0;
	double var=0;
	int h=0;
	
	typename Seq::iterator it = sq.begin(); 
	while(it != sq.end()) {
		
		av+=*(it) * h;
		var+=(*(it)) * h * h ;
		it++;
		h++;
	}
	
	
	var-=av*av;
	
	if(var<1e-7)
		return 0;
	
	return var;
	
}
Example #8
0
void SeqVect::FromFASTAFile(TextFile &File)
	{
	Clear();

	FILE *f = File.GetStdioFile();
	for (;;)
		{
		char *Label;
		unsigned uLength;
		char *SeqData = GetFastaSeq(f, &uLength, &Label);
		if (0 == SeqData)
			return;
		Seq *ptrSeq = new Seq;

		for (unsigned i = 0; i < uLength; ++i)
			{
			char c = SeqData[i];
			ptrSeq->push_back(c);
			}

		ptrSeq->SetName(Label);
		push_back(ptrSeq);

		delete[] SeqData;
		delete[] Label;
		}
	}
Example #9
0
    static void extract(const jsonpack::value &v, char* json_ptr, Seq &value)
    {
        array_t arr = *v._arr;
        value.clear();

        for(const auto &it : arr)
        {
#ifndef _MSC_VER
            // Initialize before use
            type_t val = {};
#else
            type_t val;
#endif  
            if( json_traits<type_t&>::match_token_type(it) )
            {
                json_traits<type_t&>::extract(it, json_ptr, val);
                value.insert(value.end(), val); //faster way in each container
            }
            else
            {
                throw type_error( "Array item type mismatch" );
            }
        }

    }
Example #10
0
double variance_func(Seq &sq) {
	
	if (sq.empty())
		return 0;
	
	double av=0;
	double var=0;
	
	
	typename Seq::iterator it = sq.begin(); 
	while(it != sq.end()) {
		
		av+=*(it);
		var+=(*(it))*(*(it));
		it++;
		
	}
	
	
	av=av/sq.size();
	var=var/sq.size();
	var-=av*av;
	
	if(var<1e-7)
		return 0;
	
	return var;
	
}
Example #11
0
template<class Seq> void purge(Seq& c) 
{
	typename Seq::iterator i;
	for(i = c.begin(); i != c.end(); ++i) {
		delete *i;
		*i = 0;
	}
}
 inline Seq
 strings (std::initializer_list<X> const& con)
 {
   Seq collected;
   for (auto elm : con)
     collected.push_back(elm);
   return collected;
 }
Example #13
0
void SeqVect::ToFASTAFile(TextFile &File) const
	{
	unsigned uSeqCount = Length();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		ptrSeq->ToFASTAFile(File);
		}
	}
Example #14
0
void SeqVect::StripGapsAndWhitespace()
	{
	unsigned uSeqCount = Length();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		ptrSeq->StripGapsAndWhitespace();
		}
	}
Example #15
0
void SeqVect::ToUpper()
	{
	unsigned uSeqCount = Length();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		ptrSeq->ToUpper();
		}
	}
static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j)
	{
	assert(i < seqA.Length());
	assert(j < seqB.Length());

	unsigned uLetterA = seqA.GetLetter(i);
	unsigned uLetterB = seqB.GetLetter(j);
	return VTML_SP[uLetterA][uLetterB] + g_scoreCenter;
	}
inline int string_compare(const Seq& s, const C* p)
{
   std::size_t i = 0;
   while((i < s.size()) && (p[i] == s[i]))
   {
      ++i;
   }
   return (i == s.size()) ? -p[i] : s[i] - p[i];
}
Example #18
0
 void checkPredecessorInSeqOneToFive(Seq& seq)
 {
   for (int i = 0; i < 4; ++i) {
     seq.next();
     EXPECT_EQ(i+1, seq.cpred());
   }
   for (int i = 1; i < 5; ++i) {
     EXPECT_EQ(5-i, seq.cpred(i));
   }
 }
Example #19
0
/** to purge a STL container of pointers; only pointers owned by the container, ie for which second arg
    corresponding value is true, are deleted.
    \param a STL sequence container
    \param a std::vector<bool>
*/
template<class Seq> void purge(Seq& c, const std::vector<bool>& isAllocatedIn)
{
  typename Seq::iterator i;
  std::vector<bool>::const_iterator it = isAllocatedIn.begin();
  for (i = c.begin(); i != c.end(); ++i)
  {
    if (*it ++) delete *i;
    *i = NULL;
  }
}
Example #20
0
void x_PredictAGSites(const Seq& seq, CAntigenic::TLocVec& results,
                      int min_len)
{


    // First build vector giving local average of Pa (over 7 residues).
    // Along the way, calculate the average for the whole protein.
    
    vector<double> Pa(seq.size());
    double local_sum = 0, global_sum = 0;

    for (int i = 0;  i < 7;  i++) {
        local_sum += CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i])];
        global_sum += CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i])];
    }
    Pa[3] = local_sum / 7;
    
    for (unsigned int i = 4;  i < seq.size() - 3;  i++) {
        local_sum -= CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i-4])];
        local_sum += CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i+3])];
        global_sum += CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i+3])];
        Pa[i] = local_sum / 7;
    }

    double global_mean = global_sum / seq.size();
    double thresh = min(global_mean, 1.0);

    // now look for runs of Pa >= thresh of length >= min_len

    int count = 0;
    int begin = 0;  // initialize to avoid compiler warning

    // NOTE: we go one extra residue, in the knowledge that
    // its Pa entry will be zero, so it will end any run
    for (unsigned int i = 3;  i < seq.size() - 2;  i++) {
        if (Pa[i] >= thresh) {
            if (count == 0) {
                begin = i;  // the beginning of a run
            }
            count++;
        } else {
            // the end of a (possibly empty) run
            if (count >= min_len) {
                // an antigenic site
                int end = i - 1;
                
                CRef<objects::CSeq_loc> loc(new objects::CSeq_loc());
                loc->SetInt().SetFrom(begin);
                loc->SetInt().SetTo(end);
                results.push_back(loc);
            }
            count = 0;
        }
    }
}
Example #21
0
void SeqVect::FixAlpha()
	{
	ClearInvalidLetterWarning();
	unsigned uSeqCount = Length();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		ptrSeq->FixAlpha();
		}
	ReportInvalidLetters();
	}
Example #22
0
void Seq::Copy(const Seq &rhs)
	{
	clear();
	const unsigned uLength = rhs.Length();
	for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
		push_back(rhs.at(uColIndex));
	const char *ptrName = rhs.GetName();
	size_t n = strlen(ptrName) + 1;
	m_ptrName = new char[n];
	strcpy(m_ptrName, ptrName);
	}
Example #23
0
bool  Sequencer::adjoin(Seq* newseq)
{
  if (bContradictory) {
    delete newseq;
    return false;
  }
  if (newseq->isEmpty()) {
    delete newseq;
    seq.flush();
    bContradictory = true;
    return false;
  }
  if (newseq->isTrivial()) {
    delete newseq;
    return true;
  }
  int  cur = seq.size();
  seq.push(newseq);
  AtomSet common;
LOOP:
  while (cur < seq.size()) {
    for (size_t i=cur; 0 < i--; ) {
      if (seq[cur]->implies(*seq[i])) {
        delete seq[i];
        seq.erase(seq.begin() + i);
        cur--;
      }
      else if (seq[i]->implies(*seq[cur])) {
        delete seq[cur];
        seq.erase(seq.begin() + cur);
        goto LOOP;
      }
    }
    for (size_t j=cur; 0 < j--; ) {
      Seq res;
      res.conj = seq[cur]->conj | seq[j]->conj;
      res.disj = seq[cur]->disj | seq[j]->disj;
      common   = res.conj & res.disj;
      if (common.size() == 1) {
        res.conj.erase(res.conj.find(common[0]));
        res.disj.erase(res.disj.find(common[0]));
        if (res.isEmpty()) {
          seq.flush();
          seq.push(new Seq(res));
          bContradictory = true;
          return false;
        }
        seq.push(new Seq(res));
      }
    }
    cur++;
  }
  return true;
}
Example #24
0
void XMLPrintVisitor::processObjects(Seq& list)
{
    if (list.empty()) return;
    // the following line breaks the compilator on Visual2003
    //for_each<XMLPrintVisitor, Seq, typename Seq::value_type>(this, list, &XMLPrintVisitor::processObject<typename Seq::value_type>);
    for (typename Seq::iterator it = list.begin(); it != list.end(); ++it)
    {
        typename Seq::value_type obj = *it;
        this->processObject<typename Seq::value_type>(obj);
    }
}
Example #25
0
void SeqVect::Copy(const SeqVect &rhs)
	{
	clear();
	unsigned uSeqCount = rhs.Length();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = rhs.at(uSeqIndex);
		Seq *ptrSeqCopy = new Seq;
		ptrSeqCopy->Copy(*ptrSeq);
		push_back(ptrSeqCopy);
		}
	}
static void RowFromSeq(const Seq &s, SCORE *Row[])
	{
	const unsigned uLength = s.Length();
	for (unsigned i = 0; i < uLength; ++i)
		{
		char c = s.GetChar(i);
		unsigned uLetter = CharToLetter(c);
		if (uLetter < 20)
			Row[i] = VTML_SP[uLetter];
		else
			Row[i] = VTML_SP[AX_X];
		}
	}
static void LettersFromSeq(const Seq &s, unsigned Letters[])
	{
	const unsigned uLength = s.Length();
	for (unsigned i = 0; i < uLength; ++i)
		{
		char c = s.GetChar(i);
		unsigned uLetter = CharToLetter(c);
		if (uLetter < 20)
			Letters[i] = uLetter;
		else
			Letters[i] = AX_X;
		}
	}
Example #28
0
static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,
  unsigned uColTo, Seq &s)
	{
	s.Clear();
	s.SetName(msa.GetSeqName(uSeqIndex));
	s.SetId(msa.GetSeqId(uSeqIndex));
	for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)
		{
		char c = msa.GetChar(uSeqIndex, uColIndex);
		if (!IsGapChar(c))
			s.AppendChar(c);
		}
	}
Example #29
0
bool operator==(Seq<T> lhs, Seq<T> rhs) {
  while (true) {
    Maybe<T> a(lhs.next());
    Maybe<T> b(rhs.next());
    if (a != b) {
      return false;
    }

    if (!a && !b) {
      return true;
    }
  }
}
Example #30
0
bool Seq::Eq(const Seq &s) const
	{
	const unsigned n = Length();
	if (n != s.Length())
		return false;
	for (unsigned i = 0; i < n; ++i)
		{
		const char c1 = at(i);
		const char c2 = s.at(i);
		if (c1 != c2)
			return false;
		}
	return true;
	}