void Tree::reorderSeqs (vguard<FastSeq>& seq) const { Assert (seq.size() == nodes(), "Number of sequences doesn't match number of nodes in tree"); map<string,size_t> name2seq; for (size_t n = 0; n < seq.size(); ++n) { Assert (name2seq.find (seq[n].name) == name2seq.end(), "Duplicate sequence name: %s", seq[n].name.c_str()); name2seq[seq[n].name] = n; } vguard<size_t> new2old (seq.size()), old2new (seq.size()); for (TreeNodeIndex n = 0; n < nodes(); ++n) { Assert (name2seq.find (seqName(n)) != name2seq.end(), "Tree node %s is absent from sequence dataset", seqName(n).c_str()); const size_t old_n = name2seq[seqName(n)]; new2old[n] = old_n; old2new[old_n] = n; } for (size_t n = 0; n < new2old.size(); ++n) { const size_t o = new2old[n], m = old2new[n]; swap (seq[n], seq[o]); swap (old2new[n], old2new[o]); swap (new2old[n], new2old[m]); } }
string Tree::seqName (TreeNodeIndex n) const { string s = nodeName(n); if (s.size() == 0) { vguard<string> cs; for (auto c : node[n].child) { ostringstream o; o << seqName(c) << ':' << defaultfloat << branchLength(c); cs.push_back (o.str()); } s = "(" + join(cs,",") + ")"; } return s; }
bool Tree::seqNamesBijective (const vguard<FastSeq>& seq) const { if (!allNodesNamed()) return false; if (seq.size() != nodes()) return false; map<string,size_t> name2seq; for (size_t n = 0; n < seq.size(); ++n) { if (name2seq.find (seq[n].name) != name2seq.end()) return false; name2seq[seq[n].name] = n; } for (TreeNodeIndex n = 0; n < nodes(); ++n) if (name2seq.find (seqName(n)) == name2seq.end()) return false; return true; }
void Tree::assignInternalNodeNames (vguard<FastSeq>& seq, const char* prefix) { reorderSeqs (seq); // make sure that nodes match rows assignInternalNodeNames (prefix); for (size_t n = 0; n < nodes(); ++n) seq[n].name = seqName(n); }
void Tree::validateBranchLengths() const { for (size_t n = 0; n + 1 < node.size(); ++n) { Require (branchLength(n) >= 0, "Node in tree is missing branch length: %s", seqName(n).c_str()); Require (branchLength(n) >= minBranchLength, "Node in tree has a lower-than-minimal branch length: %s", seqName(n).c_str()); } }
static void load(IOAdapter* io, const U2DbiRef& dbiRef, QList<GObject*>& objects, const QVariantMap& fs, U2OpStatus& os) { DbiOperationsBlock opBlock(dbiRef, os); CHECK_OP(os, ); Q_UNUSED(opBlock); U2SequenceImporter seqImporter(fs, true); const QString folder = fs.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString(); QByteArray readBuffer(DocumentFormat::READ_BUFF_SIZE, '\0'); char* buff = readBuffer.data(); QBitArray ALPHAS = TextUtils::ALPHA_NUMS; ALPHAS['-'] = true; QByteArray seq; QString seqName(io->getURL().baseFileName()); //reading sequence QBuffer writer(&seq); writer.open(QIODevice::WriteOnly); TmpDbiObjects dbiObjects(dbiRef, os); bool ok = true; int len = 0; bool isStarted = false; int sequenceCounter = 0; bool terminatorFound = false; bool isSplit = fs.value((DocumentReadingMode_SequenceAsSeparateHint), false).toBool(); while (ok && !io->isEof()) { len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE, &terminatorFound); if (len <= 0){ continue; } seq.clear(); bool isSeek = writer.seek(0); assert(isSeek); Q_UNUSED(isSeek); if (os.isCoR()) { break; } for (int i=0; i<len && ok; i++) { char c = buff[i]; if (ALPHAS[(uchar)c]) { ok = writer.putChar(c); } } if(seq.size()>0 && isStarted == false ){ QString name = sequenceCounter == 0 ? seqName : seqName + QString("_%1").arg(sequenceCounter); isStarted = true; seqImporter.startSequence(dbiRef, folder, name, false, os); } if(isStarted){ seqImporter.addBlock(seq.data(),seq.size(),os); } if (seq.size()>0 && isStarted && terminatorFound && isSplit){ finishSequence(objects, io, os, dbiRef, fs, dbiObjects, seqImporter); sequenceCounter++; isStarted = false; } if (os.isCoR()) { break; } os.setProgress(io->getProgress()); } writer.close(); CHECK_OP(os, ); if (sequenceCounter == 0){ CHECK_EXT(isStarted == true, os.setError(RawDNASequenceFormat::tr("Sequence is empty")), ); }