bool Seq::EqIgnoreCase(const Seq &s) const { const unsigned n = Length(); if (n != s.Length()) { return false; } for (unsigned i = 0; i < n; ++i) { const char c1 = at(i); const char c2 = s.at(i); if (IsGap(c1)) { if (!IsGap(c2)) return false; } else { if (toupper(c1) != toupper(c2)) { return false; } } } return true; }
void CWSInput1WithFeatureModelHandler<RNNDerived, I1Model>::predict(std::istream &is, std::ostream &os) { std::vector<Seq> raw_instances; std::vector<IndexSeq> sents ; std::vector<CWSFeatureDataSeq> cws_feature_seqs; read_test_data(is, raw_instances, sents, cws_feature_seqs ); BOOST_LOG_TRIVIAL(info) << "do prediction on " << raw_instances.size() << " instances ."; BasicStat stat(true); stat.start_time_stat(); for (unsigned int i = 0; i < raw_instances.size(); ++i) { Seq &raw_sent = raw_instances.at(i); if (0 == raw_sent.size()) { os << "\n"; continue; } IndexSeq &sent = sents.at(i) ; CWSFeatureDataSeq &cws_feature_seq = cws_feature_seqs.at(i); IndexSeq pred_tag_seq; dynet::ComputationGraph cg; i1m->predict(cg, sent, cws_feature_seq, pred_tag_seq); Seq words ; CWSTaggingSystem::static_parse_chars_indextag2word_seq(raw_sent, pred_tag_seq, words) ; os << words[0] ; for( size_t i = 1 ; i < words.size() ; ++i ) os << OutputDelimiter << words[i] ; os << "\n"; stat.total_tags += pred_tag_seq.size() ; } stat.end_time_stat() ; BOOST_LOG_TRIVIAL(info) << stat.get_stat_str("predict done.") ; }
operator Seq<U,CA,A>() const { Seq<U,CA,A> result; result.transfer( result.end(), values_ ); BOOST_ASSERT( empty() ); return result; }
std::size_t size(const Seq& seq) { std::size_t d = 0; for (typename Seq::const_iterator it=seq.begin(); it!=seq.end(); ++it) d += boost::asio::buffer_size(*it); return d; }
void emit_sep( Seq const &s, std::size_t step, Printer &&p ) { auto iter(s.begin()); std::size_t c(0); auto lc(std::min(s.size(), c + step)); if (lc) { auto xc(c); printf("\t\t"); p(*iter++); for (++xc; xc < lc; ++xc) { printf(", "); p(*iter++); } } c += lc; for (; c < s.size(); c += step) { printf(",\n"); lc = std::min(s.size(), c + step); if (!lc) break; auto xc(c); printf("\t\t"); p(*iter++); for (++xc; xc < lc; ++xc) { printf(", "); p(*iter++); } } }
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a) { unsigned uSymbols; unsigned uIndels; EstringCounts(es, &uSymbols, &uIndels); assert(sIn.Length() == uSymbols); unsigned uColCount = uSymbols + uIndels; a.Clear(); a.SetSize(1, uColCount); a.SetSeqName(0, sIn.GetName()); a.SetSeqId(0, sIn.GetId()); unsigned p = 0; unsigned uColIndex = 0; for (;;) { int n = *es++; if (0 == n) break; if (n > 0) for (int i = 0; i < n; ++i) { char c = sIn[p++]; a.SetChar(0, uColIndex++, c); } else for (int i = 0; i < -n; ++i) a.SetChar(0, uColIndex++, '-'); } assert(uColIndex == uColCount); return uColCount; }
double variance_pf(Seq &sq) { double av=0; double var=0; int h=0; typename Seq::iterator it = sq.begin(); while(it != sq.end()) { av+=*(it) * h; var+=(*(it)) * h * h ; it++; h++; } var-=av*av; if(var<1e-7) return 0; return var; }
void SeqVect::FromFASTAFile(TextFile &File) { Clear(); FILE *f = File.GetStdioFile(); for (;;) { char *Label; unsigned uLength; char *SeqData = GetFastaSeq(f, &uLength, &Label); if (0 == SeqData) return; Seq *ptrSeq = new Seq; for (unsigned i = 0; i < uLength; ++i) { char c = SeqData[i]; ptrSeq->push_back(c); } ptrSeq->SetName(Label); push_back(ptrSeq); delete[] SeqData; delete[] Label; } }
static void extract(const jsonpack::value &v, char* json_ptr, Seq &value) { array_t arr = *v._arr; value.clear(); for(const auto &it : arr) { #ifndef _MSC_VER // Initialize before use type_t val = {}; #else type_t val; #endif if( json_traits<type_t&>::match_token_type(it) ) { json_traits<type_t&>::extract(it, json_ptr, val); value.insert(value.end(), val); //faster way in each container } else { throw type_error( "Array item type mismatch" ); } } }
double variance_func(Seq &sq) { if (sq.empty()) return 0; double av=0; double var=0; typename Seq::iterator it = sq.begin(); while(it != sq.end()) { av+=*(it); var+=(*(it))*(*(it)); it++; } av=av/sq.size(); var=var/sq.size(); var-=av*av; if(var<1e-7) return 0; return var; }
template<class Seq> void purge(Seq& c) { typename Seq::iterator i; for(i = c.begin(); i != c.end(); ++i) { delete *i; *i = 0; } }
inline Seq strings (std::initializer_list<X> const& con) { Seq collected; for (auto elm : con) collected.push_back(elm); return collected; }
void SeqVect::ToFASTAFile(TextFile &File) const { unsigned uSeqCount = Length(); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { Seq *ptrSeq = at(uSeqIndex); ptrSeq->ToFASTAFile(File); } }
void SeqVect::StripGapsAndWhitespace() { unsigned uSeqCount = Length(); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { Seq *ptrSeq = at(uSeqIndex); ptrSeq->StripGapsAndWhitespace(); } }
void SeqVect::ToUpper() { unsigned uSeqCount = Length(); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { Seq *ptrSeq = at(uSeqIndex); ptrSeq->ToUpper(); } }
static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j) { assert(i < seqA.Length()); assert(j < seqB.Length()); unsigned uLetterA = seqA.GetLetter(i); unsigned uLetterB = seqB.GetLetter(j); return VTML_SP[uLetterA][uLetterB] + g_scoreCenter; }
inline int string_compare(const Seq& s, const C* p) { std::size_t i = 0; while((i < s.size()) && (p[i] == s[i])) { ++i; } return (i == s.size()) ? -p[i] : s[i] - p[i]; }
void checkPredecessorInSeqOneToFive(Seq& seq) { for (int i = 0; i < 4; ++i) { seq.next(); EXPECT_EQ(i+1, seq.cpred()); } for (int i = 1; i < 5; ++i) { EXPECT_EQ(5-i, seq.cpred(i)); } }
/** to purge a STL container of pointers; only pointers owned by the container, ie for which second arg corresponding value is true, are deleted. \param a STL sequence container \param a std::vector<bool> */ template<class Seq> void purge(Seq& c, const std::vector<bool>& isAllocatedIn) { typename Seq::iterator i; std::vector<bool>::const_iterator it = isAllocatedIn.begin(); for (i = c.begin(); i != c.end(); ++i) { if (*it ++) delete *i; *i = NULL; } }
void x_PredictAGSites(const Seq& seq, CAntigenic::TLocVec& results, int min_len) { // First build vector giving local average of Pa (over 7 residues). // Along the way, calculate the average for the whole protein. vector<double> Pa(seq.size()); double local_sum = 0, global_sum = 0; for (int i = 0; i < 7; i++) { local_sum += CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i])]; global_sum += CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i])]; } Pa[3] = local_sum / 7; for (unsigned int i = 4; i < seq.size() - 3; i++) { local_sum -= CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i-4])]; local_sum += CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i+3])]; global_sum += CAntigenic::sm_Pa_table[static_cast<unsigned>(seq[i+3])]; Pa[i] = local_sum / 7; } double global_mean = global_sum / seq.size(); double thresh = min(global_mean, 1.0); // now look for runs of Pa >= thresh of length >= min_len int count = 0; int begin = 0; // initialize to avoid compiler warning // NOTE: we go one extra residue, in the knowledge that // its Pa entry will be zero, so it will end any run for (unsigned int i = 3; i < seq.size() - 2; i++) { if (Pa[i] >= thresh) { if (count == 0) { begin = i; // the beginning of a run } count++; } else { // the end of a (possibly empty) run if (count >= min_len) { // an antigenic site int end = i - 1; CRef<objects::CSeq_loc> loc(new objects::CSeq_loc()); loc->SetInt().SetFrom(begin); loc->SetInt().SetTo(end); results.push_back(loc); } count = 0; } } }
void SeqVect::FixAlpha() { ClearInvalidLetterWarning(); unsigned uSeqCount = Length(); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { Seq *ptrSeq = at(uSeqIndex); ptrSeq->FixAlpha(); } ReportInvalidLetters(); }
void Seq::Copy(const Seq &rhs) { clear(); const unsigned uLength = rhs.Length(); for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex) push_back(rhs.at(uColIndex)); const char *ptrName = rhs.GetName(); size_t n = strlen(ptrName) + 1; m_ptrName = new char[n]; strcpy(m_ptrName, ptrName); }
bool Sequencer::adjoin(Seq* newseq) { if (bContradictory) { delete newseq; return false; } if (newseq->isEmpty()) { delete newseq; seq.flush(); bContradictory = true; return false; } if (newseq->isTrivial()) { delete newseq; return true; } int cur = seq.size(); seq.push(newseq); AtomSet common; LOOP: while (cur < seq.size()) { for (size_t i=cur; 0 < i--; ) { if (seq[cur]->implies(*seq[i])) { delete seq[i]; seq.erase(seq.begin() + i); cur--; } else if (seq[i]->implies(*seq[cur])) { delete seq[cur]; seq.erase(seq.begin() + cur); goto LOOP; } } for (size_t j=cur; 0 < j--; ) { Seq res; res.conj = seq[cur]->conj | seq[j]->conj; res.disj = seq[cur]->disj | seq[j]->disj; common = res.conj & res.disj; if (common.size() == 1) { res.conj.erase(res.conj.find(common[0])); res.disj.erase(res.disj.find(common[0])); if (res.isEmpty()) { seq.flush(); seq.push(new Seq(res)); bContradictory = true; return false; } seq.push(new Seq(res)); } } cur++; } return true; }
void XMLPrintVisitor::processObjects(Seq& list) { if (list.empty()) return; // the following line breaks the compilator on Visual2003 //for_each<XMLPrintVisitor, Seq, typename Seq::value_type>(this, list, &XMLPrintVisitor::processObject<typename Seq::value_type>); for (typename Seq::iterator it = list.begin(); it != list.end(); ++it) { typename Seq::value_type obj = *it; this->processObject<typename Seq::value_type>(obj); } }
void SeqVect::Copy(const SeqVect &rhs) { clear(); unsigned uSeqCount = rhs.Length(); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { Seq *ptrSeq = rhs.at(uSeqIndex); Seq *ptrSeqCopy = new Seq; ptrSeqCopy->Copy(*ptrSeq); push_back(ptrSeqCopy); } }
static void RowFromSeq(const Seq &s, SCORE *Row[]) { const unsigned uLength = s.Length(); for (unsigned i = 0; i < uLength; ++i) { char c = s.GetChar(i); unsigned uLetter = CharToLetter(c); if (uLetter < 20) Row[i] = VTML_SP[uLetter]; else Row[i] = VTML_SP[AX_X]; } }
static void LettersFromSeq(const Seq &s, unsigned Letters[]) { const unsigned uLength = s.Length(); for (unsigned i = 0; i < uLength; ++i) { char c = s.GetChar(i); unsigned uLetter = CharToLetter(c); if (uLetter < 20) Letters[i] = uLetter; else Letters[i] = AX_X; } }
static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom, unsigned uColTo, Seq &s) { s.Clear(); s.SetName(msa.GetSeqName(uSeqIndex)); s.SetId(msa.GetSeqId(uSeqIndex)); for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex) { char c = msa.GetChar(uSeqIndex, uColIndex); if (!IsGapChar(c)) s.AppendChar(c); } }
bool operator==(Seq<T> lhs, Seq<T> rhs) { while (true) { Maybe<T> a(lhs.next()); Maybe<T> b(rhs.next()); if (a != b) { return false; } if (!a && !b) { return true; } } }
bool Seq::Eq(const Seq &s) const { const unsigned n = Length(); if (n != s.Length()) return false; for (unsigned i = 0; i < n; ++i) { const char c1 = at(i); const char c2 = s.at(i); if (c1 != c2) return false; } return true; }