Exemple #1
0
void Tree::reorderSeqs (vguard<FastSeq>& seq) const {
  Assert (seq.size() == nodes(), "Number of sequences doesn't match number of nodes in tree");
  map<string,size_t> name2seq;
  for (size_t n = 0; n < seq.size(); ++n) {
    Assert (name2seq.find (seq[n].name) == name2seq.end(), "Duplicate sequence name: %s", seq[n].name.c_str());
    name2seq[seq[n].name] = n;
  }
  vguard<size_t> new2old (seq.size()), old2new (seq.size());
  for (TreeNodeIndex n = 0; n < nodes(); ++n) {
    Assert (name2seq.find (seqName(n)) != name2seq.end(), "Tree node %s is absent from sequence dataset", seqName(n).c_str());
    const size_t old_n = name2seq[seqName(n)];
    new2old[n] = old_n;
    old2new[old_n] = n;
  }

  for (size_t n = 0; n < new2old.size(); ++n) {
    const size_t o = new2old[n], m = old2new[n];
    swap (seq[n], seq[o]);
    swap (old2new[n], old2new[o]);
    swap (new2old[n], new2old[m]);
  }
}
Exemple #2
0
string Tree::seqName (TreeNodeIndex n) const {
  string s = nodeName(n);
  if (s.size() == 0) {
    vguard<string> cs;
    for (auto c : node[n].child) {
      ostringstream o;
      o << seqName(c) << ':' << defaultfloat << branchLength(c);
      cs.push_back (o.str());
    }
    s = "(" + join(cs,",") + ")";
  }
  return s;
}
Exemple #3
0
bool Tree::seqNamesBijective (const vguard<FastSeq>& seq) const {
  if (!allNodesNamed())
    return false;
  if (seq.size() != nodes())
    return false;
  map<string,size_t> name2seq;
  for (size_t n = 0; n < seq.size(); ++n) {
    if (name2seq.find (seq[n].name) != name2seq.end())
      return false;
    name2seq[seq[n].name] = n;
  }
  for (TreeNodeIndex n = 0; n < nodes(); ++n)
    if (name2seq.find (seqName(n)) == name2seq.end())
      return false;
  return true;
}
Exemple #4
0
void Tree::assignInternalNodeNames (vguard<FastSeq>& seq, const char* prefix) {
  reorderSeqs (seq);  // make sure that nodes match rows
  assignInternalNodeNames (prefix);
  for (size_t n = 0; n < nodes(); ++n)
    seq[n].name = seqName(n);
}
Exemple #5
0
void Tree::validateBranchLengths() const {
  for (size_t n = 0; n + 1 < node.size(); ++n) {
    Require (branchLength(n) >= 0, "Node in tree is missing branch length: %s", seqName(n).c_str());
    Require (branchLength(n) >= minBranchLength, "Node in tree has a lower-than-minimal branch length: %s", seqName(n).c_str());
  }
}
static void load(IOAdapter* io, const U2DbiRef& dbiRef,  QList<GObject*>& objects, const QVariantMap& fs, U2OpStatus& os) {
    DbiOperationsBlock opBlock(dbiRef, os);
    CHECK_OP(os, );
    Q_UNUSED(opBlock);

    U2SequenceImporter seqImporter(fs, true);
    const QString folder = fs.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();

    QByteArray readBuffer(DocumentFormat::READ_BUFF_SIZE, '\0');
    char* buff  = readBuffer.data();

    QBitArray ALPHAS = TextUtils::ALPHA_NUMS;
    ALPHAS['-'] = true;

    QByteArray seq;
    QString seqName(io->getURL().baseFileName());
    //reading sequence
    QBuffer writer(&seq);
    writer.open(QIODevice::WriteOnly);
    TmpDbiObjects dbiObjects(dbiRef, os);
    bool ok = true;
    int len = 0;
    bool isStarted = false;
    int sequenceCounter = 0;
    bool terminatorFound = false;
    bool isSplit = fs.value((DocumentReadingMode_SequenceAsSeparateHint), false).toBool();


    while (ok && !io->isEof()) {
        len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE, &terminatorFound);
        if (len <= 0){
            continue;
        }

        seq.clear();
        bool isSeek = writer.seek(0);
                assert(isSeek); Q_UNUSED(isSeek);
        if (os.isCoR()) {
            break;
        }

        for (int i=0; i<len && ok; i++) {
            char c = buff[i];
            if (ALPHAS[(uchar)c]) {
                ok = writer.putChar(c);
            }
        }
        if(seq.size()>0 && isStarted == false ){
            QString name = sequenceCounter == 0 ? seqName : seqName + QString("_%1").arg(sequenceCounter);
            isStarted = true;
            seqImporter.startSequence(dbiRef, folder, name, false, os);
        }
        if(isStarted){
            seqImporter.addBlock(seq.data(),seq.size(),os);
        }
        if (seq.size()>0 && isStarted && terminatorFound && isSplit){
            finishSequence(objects, io, os, dbiRef, fs, dbiObjects, seqImporter);
            sequenceCounter++;
            isStarted = false;
        }
        if (os.isCoR()) {
            break;
        }
        os.setProgress(io->getProgress());
    }
    writer.close();

    CHECK_OP(os, );
    if (sequenceCounter == 0){
        CHECK_EXT(isStarted == true, os.setError(RawDNASequenceFormat::tr("Sequence is empty")), );
    }