Exemplo n.º 1
0
QVariantMap MSAEditorState::saveState(MSAEditor* v) {
    MSAEditorState ss;

    ss.stateData[VIEW_ID]=MsaEditorFactory::ID;

    MultipleSequenceAlignmentObject* msaObj = v->getMaObject();
    if (msaObj) {
        ss.setMSAObjectRef(GObjectReference(msaObj));
    }

    ss.setFont(v->getFont());
    ss.setFirstPos(v->getFirstVisibleBase());

    return ss.stateData;
}
Exemplo n.º 2
0
QVariantMap TreeViewerState::saveState(TreeViewer* v) {
    TreeViewerState ss;

    ss.stateData[VIEW_ID] = TreeViewerFactory::ID;

    PhyTreeObject* phyObj = v->getPhyObject();
    if (phyObj) {
        ss.setPhyObject(GObjectReference(phyObj));
    }

    ss.setHorizontalZoom(v->getHorizontalZoom());
    ss.setVerticalZoom(v->getVerticalZoom());
    ss.setTransform(v->getTransform());

    ss.stateData.unite(v->getSettingsState());

    return ss.stateData;
}
Exemplo n.º 3
0
Task* ClustalOWorker::tick() {
    if (input->hasMessage()) {
        Message inputMessage = getMessageAndSetupScriptValues(input);
        if (inputMessage.isEmpty()) {
            output->transit();
            return NULL;
        }
        cfg.numIterations=actor->getParameter(NUM_ITERATIONS)->getAttributeValue<int>(context);
        cfg.maxGuidetreeIterations=actor->getParameter(MAX_GT_ITERATIONS)->getAttributeValue<int>(context);
        cfg.maxHMMIterations=actor->getParameter(MAX_HMM_ITERATIONS)->getAttributeValue<int>(context);
        cfg.setAutoOptions=actor->getParameter(SET_AUTO)->getAttributeValue<bool>(context);
        cfg.numberOfProcessors=AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount();

        QString path=actor->getParameter(EXT_TOOL_PATH)->getAttributeValue<QString>(context);
        if(QString::compare(path, "default", Qt::CaseInsensitive) != 0){
            AppContext::getExternalToolRegistry()->getByName(ET_CLUSTALO)->setPath(path);
        }
        path=actor->getParameter(TMP_DIR_PATH)->getAttributeValue<QString>(context);
        if(QString::compare(path, "default", Qt::CaseInsensitive) != 0){
            AppContext::getAppSettings()->getUserAppsSettings()->setUserTemporaryDirPath(path);
        }
        QVariantMap qm = inputMessage.getData().toMap();
        SharedDbiDataHandler msaId = qm.value(BaseSlots::MULTIPLE_ALIGNMENT_SLOT().getId()).value<SharedDbiDataHandler>();
        QScopedPointer<MultipleSequenceAlignmentObject> msaObj(StorageUtils::getMsaObject(context->getDataStorage(), msaId));
        SAFE_POINT(!msaObj.isNull(), "NULL MSA Object!", NULL);
        const MultipleSequenceAlignment msa = msaObj->getMultipleAlignment();

        if (msa->isEmpty()) {
            algoLog.error(tr("An empty MSA '%1' has been supplied to ClustalO.").arg(msa->getName()));
            return NULL;
        }
        ClustalOSupportTask* supportTask = new ClustalOSupportTask(msa, GObjectReference(), cfg);
        supportTask->addListeners(createLogListeners());
        Task *t = new NoFailTaskWrapper(supportTask);
        connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
        return t;
    } else if (input->isEnded()) {
        setDone();
        output->setEnded();
    }
    return NULL;
}
Exemplo n.º 4
0
SendSelectionDialog::SendSelectionDialog(const U2SequenceObject* dnaso, bool _isAminoSeq, QWidget *p)
    : QDialog(p), translateToAmino(false), isAminoSeq(_isAminoSeq), extImported(false)
{
    CreateAnnotationModel ca_m;
    ca_m.hideAnnotationType = true;
    ca_m.hideAnnotationName = true;
    ca_m.hideLocation = true;
    ca_m.sequenceObjectRef = GObjectReference(dnaso);
    ca_m.sequenceLen = dnaso->getSequenceLength();
    ca_c = new CreateAnnotationWidgetController(ca_m, this);
    setupUi(this);
    new HelpButton(this, buttonBox, "17467726");
    buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
    buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));

    optionsTab->setCurrentIndex(0);
    int idx = 2;
    QWidget *wdgt;
    wdgt = ca_c->getWidget();
    layoutAnnotations->insertWidget(idx, wdgt);

    matrixComboBox->addItems(ParametersLists::blastp_matrix);

    setupDataBaseList();
    setUpSettings();
    megablastCheckBox->setEnabled(false);
    alignComboBoxes();

    connect( dataBase, SIGNAL(currentIndexChanged(int)), SLOT(sl_scriptSelected(int)) );
    QPushButton* okButton = buttonBox->button(QDialogButtonBox::Ok);
    QPushButton* cancelButton = buttonBox->button(QDialogButtonBox::Cancel);

    connect( okButton, SIGNAL(clicked()), SLOT(sl_OK()) );
    connect( cancelButton, SIGNAL(clicked()), SLOT(sl_Cancel()) );
    connect( megablastCheckBox, SIGNAL(stateChanged(int)),SLOT(sl_megablastChecked(int)) );
    connect( serviceComboBox,SIGNAL(currentIndexChanged(int)),SLOT(sl_serviceChanged(int)) );
    sl_scriptSelected( 0 );
}
Exemplo n.º 5
0
GObjectReference MSAEditorState::getMSAObjectRef() const {
    return stateData.contains(MSA_OBJ) ? stateData[MSA_OBJ].value<GObjectReference>() : GObjectReference();
}
Exemplo n.º 6
0
Task* ClustalWWorker::tick() {
    if (input->hasMessage()) {
        Message inputMessage = getMessageAndSetupScriptValues(input);
        if (inputMessage.isEmpty()) {
            output->transit();
            return NULL;
        }
        cfg.gapOpenPenalty=actor->getParameter(GAP_OPEN_PENALTY)->getAttributeValue<float>(context);
        cfg.gapExtenstionPenalty=actor->getParameter(GAP_EXT_PENALTY)->getAttributeValue<float>(context);
        cfg.gapDist=actor->getParameter(GAP_DIST)->getAttributeValue<float>(context);
        cfg.endGaps=actor->getParameter(END_GAPS)->getAttributeValue<bool>(context);
        cfg.noHGaps=actor->getParameter(NO_HGAPS)->getAttributeValue<bool>(context);
        cfg.noPGaps=actor->getParameter(NO_PGAPS)->getAttributeValue<bool>(context);
        if(actor->getParameter(ITERATION)->getAttributeValue<int>(context) != 0){
            if(actor->getParameter(ITERATION)->getAttributeValue<int>(context) == 1){
                cfg.iterationType="TREE";
            }else if(actor->getParameter(ITERATION)->getAttributeValue<int>(context) == 2){
                cfg.iterationType="ALIGNMENT";
            }
            if(actor->getParameter(NUM_ITERATIONS)->getAttributeValue<int>(context) != 3){
                cfg.numIterations=actor->getParameter(NUM_ITERATIONS)->getAttributeValue<int>(context);
            }
        }
        if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == -1){
            if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 0){
                cfg.matrix="IUB";
            }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 1){
                cfg.matrix="CLUSTALW";
            }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 2){
                cfg.matrix="BLOSUM";
            }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 3){
                cfg.matrix="PAM";
            }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 4){
                cfg.matrix="GONNET";
            }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 5){
                cfg.matrix="ID";
            }
        }

        QString path=actor->getParameter(EXT_TOOL_PATH)->getAttributeValue<QString>(context);
        if(QString::compare(path, "default", Qt::CaseInsensitive) != 0){
            AppContext::getExternalToolRegistry()->getByName(ET_CLUSTAL)->setPath(path);
        }
        path=actor->getParameter(TMP_DIR_PATH)->getAttributeValue<QString>(context);
        if(QString::compare(path, "default", Qt::CaseInsensitive) != 0){
            AppContext::getAppSettings()->getUserAppsSettings()->setUserTemporaryDirPath(path);
        }

        QVariantMap qm = inputMessage.getData().toMap();
        SharedDbiDataHandler msaId = qm.value(BaseSlots::MULTIPLE_ALIGNMENT_SLOT().getId()).value<SharedDbiDataHandler>();
        QScopedPointer<MultipleSequenceAlignmentObject> msaObj(StorageUtils::getMsaObject(context->getDataStorage(), msaId));
        SAFE_POINT(!msaObj.isNull(), "NULL MSA Object!", NULL);
        const MultipleSequenceAlignment msa = msaObj->getMultipleAlignment();

        if (msa->isEmpty()) {
            algoLog.error(tr("An empty MSA '%1' has been supplied to ClustalW.").arg(msa->getName()));
            return NULL;
        }
        ClustalWSupportTask* supportTask = new ClustalWSupportTask(msa, GObjectReference(), cfg);
        supportTask->addListeners(createLogListeners());
        Task *t = new NoFailTaskWrapper(supportTask);
        connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
        return t;
    } else if (input->isEnded()) {
        setDone();
        output->setEnded();
    }
    return NULL;
}
Exemplo n.º 7
0
/**
 * FASTQ format specification: http://maq.sourceforge.net/fastq.shtml
 */
static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& hints, const GUrl& docUrl, QList<GObject*>& objects, U2OpStatus& os,
                 int gapSize, int predictedSize, QString& writeLockReason) {
    DbiOperationsBlock opBlock(dbiRef, os);
    CHECK_OP(os, );
    Q_UNUSED(opBlock);
    writeLockReason.clear();

    bool merge = gapSize!=-1;
    QByteArray sequence;
    QByteArray qualityScores;
    QStringList headers;
    QSet<QString> uniqueNames;

    QVector<U2Region> mergedMapping;
    QByteArray gapSequence((merge ? gapSize : 0), 0);
    sequence.reserve(predictedSize);
    qualityScores.reserve(predictedSize);

    // for lower case annotations
    GObjectReference sequenceRef;
    qint64 sequenceStart = 0;

    U2SequenceImporter seqImporter(hints, true);
    const QString folder = hints.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();
    int seqNumber = 0;
    int progressUpNum = 0;

    const int objectsCountLimit = hints.contains(DocumentReadingMode_MaxObjectsInDoc) ? hints[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1;
    const bool settingsMakeUniqueName = !hints.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool();
    while (!os.isCoR()) {
        //read header
        QString sequenceName = readSequenceName(os, io, '@');
        // check for eof while trying to read another FASTQ block
        if (io->isEof()) {
            break;
        }

        CHECK_OP_BREAK(os);

        if(sequenceName.isEmpty()){
            sequenceName = "Sequence";
        }

        if ((merge == false) || (seqNumber == 0)) {
            QString objName = sequenceName;
            if (settingsMakeUniqueName) {
                objName = (merge) ? "Sequence" : TextUtils::variate(sequenceName, "_", uniqueNames);
                objName.squeeze();
                uniqueNames.insert(objName);
            }
            seqImporter.startSequence(dbiRef, folder, objName, false, os);
            CHECK_OP_BREAK(os);
        }

        //read sequence
        if (merge && sequence.length() > 0) {
            seqImporter.addDefaultSymbolsBlock(gapSize,os);
            sequenceStart += sequence.length();
            sequenceStart+=gapSize;
            CHECK_OP_BREAK(os);
        }

        sequence.clear();
        readSequence(os, io, sequence);
        MemoryLocker lSequence(os, qCeil(sequence.size()/(1000*1000)));
        CHECK_OP_BREAK(os);
        Q_UNUSED(lSequence);

        seqImporter.addBlock(sequence.data(),sequence.length(),os);
        CHECK_OP_BREAK(os);

        QString qualSequenceName = readSequenceName(os, io, '+');
        if (!qualSequenceName.isEmpty()) {
            static const QString err = U2::FastqFormat::tr("Not a valid FASTQ file: %1, sequence name differs from quality scores name: %2 and %3");
            CHECK_EXT_BREAK(sequenceName == qualSequenceName,
                os.setError(err.arg(docUrl.getURLString()).arg(sequenceName).arg(qualSequenceName)));
        }

        // read qualities
        qualityScores.clear();
        readQuality(os, io, qualityScores, sequence.size());
        CHECK_OP_BREAK(os);

        static const QString err = U2::FastqFormat::tr("Not a valid FASTQ file: %1. Bad quality scores: inconsistent size.").arg(docUrl.getURLString());
        CHECK_EXT_BREAK(sequence.length() == qualityScores.length(), os.setError(err));

        seqNumber++;
        progressUpNum++;
        if (merge) {
            headers.append(sequenceName);
            mergedMapping.append(U2Region(sequenceStart, sequence.length() ));
        }
        else {
            if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) {
                os.setError(FastqFormat::tr("File \"%1\" contains too many sequences to be displayed. "
                    "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> "
                    "or pipelines built with Workflow Designer.")
                    .arg(io->getURL().getURLString()));
                break;
            }

            U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os);
            CHECK_OP_BREAK(os);
            sequenceRef = GObjectReference(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id));

            U2SequenceObject* seqObj = new U2SequenceObject(u2seq.visualName, U2EntityRef(dbiRef, u2seq.id));
            CHECK_EXT_BREAK(seqObj != NULL, os.setError("U2SequenceObject is NULL"));
            seqObj->setQuality(DNAQuality(qualityScores));
            objects << seqObj;

            U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, hints);
        }
        if (PROGRESS_UPDATE_STEP == progressUpNum) {
            progressUpNum = 0;
            os.setProgress(io->getProgress());
        }
    }

    CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), );
    bool emptyObjects = objects.isEmpty();
    CHECK_EXT(!emptyObjects || merge, os.setError(Document::tr("Document is empty.")), );
    SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", );

    if (!merge) {
        return;
    }
    U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os);
    CHECK_OP(os,);

    sequenceRef = GObjectReference(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id));

    U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, hints);
    objects << new U2SequenceObject(u2seq.visualName, U2EntityRef(dbiRef, u2seq.id));
    objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence(sequenceRef, dbiRef, headers, mergedMapping, hints);
    if (headers.size() > 1) {
        writeLockReason = DocumentFormat::MERGED_SEQ_LOCK;
    }
}
void EMBLGenbankAbstractDocument::load(const U2DbiRef& dbiRef, IOAdapter* io, QList<GObject*>& objects, QVariantMap& fs, U2OpStatus& os, QString& writeLockReason) {
    DbiOperationsBlock opBlock(dbiRef, os);
    CHECK_OP(os, );
    Q_UNUSED(opBlock);
    writeLockReason.clear();

    //get settings
    int gapSize = qBound(-1, DocumentFormatUtils::getMergeGap(fs), 1000*1000);
    bool merge = gapSize!=-1;

    QScopedPointer<AnnotationTableObject> mergedAnnotations(NULL);
    QStringList contigs;
    QVector<U2Region> mergedMapping;

    // Sequence loading is 'lazy', so, if there is no sequence, it won't be created and there is no need to remove it.
    U2SequenceImporter seqImporter(fs, true);
    const QString folder = fs.value(DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();

    QSet<QString> usedNames;

    GObjectReference sequenceRef(GObjectReference(io->getURL().getURLString(), "", GObjectTypes::SEQUENCE));

    QByteArray readBuffer(ParserState::LOCAL_READ_BUFFER_SIZE, '\0');
    ParserState st(isNcbiLikeFormat() ? 12 : 5, io, NULL, os);
    st.buff = readBuffer.data();

    TmpDbiObjects dbiObjects(dbiRef, os);
    int num_sequence = 0;

    qint64 sequenceStart = 0;
    int sequenceSize = 0;
    int fullSequenceSize = 0;
    const int objectsCountLimit = fs.contains(DocumentReadingMode_MaxObjectsInDoc) ? fs[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1;

    for (int i=0; !os.isCoR(); i++, ++num_sequence) {
        if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) {
            os.setError(EMBLGenbankAbstractDocument::tr("File \"%1\" contains too many sequences to be displayed. "
                "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> "
                "or pipelines built with Workflow Designer.")
                .arg(io->getURL().getURLString()));
            break;
        }

        //TODO: reference to a local variable??? Such a pointer will become invalid
        EMBLGenbankDataEntry data;
        st.entry = &data;

        if (num_sequence == 0 || merge == false){
            seqImporter.startSequence(dbiRef, folder, "default sequence name", false, os); //change name and circularity after finalize method
            CHECK_OP(os, );
        }

        sequenceSize = 0;
        os.setDescription(tr("Reading entry header"));
        int offset = 0;
        if (merge && num_sequence > 0) {
            offset = gapSize;
        }
        if (!readEntry(&st,seqImporter,sequenceSize,fullSequenceSize,merge,offset, os)) {
            break;
        }

        if (merge && sequenceSize > 0 && num_sequence > 0) {
                sequenceStart = fullSequenceSize - sequenceSize;
                sequenceStart += gapSize;
                fullSequenceSize += gapSize;
        }

        // tolerate blank lines between records
        char ch;
        bool b;
        while ((b = st.io->getChar(&ch)) && (ch == '\n' || ch == '\r')){}
        if (b) {
            st.io->skip(-1);
        }

        AnnotationTableObject *annotationsObject = NULL;

        if (data.hasAnnotationObjectFlag) {
            QString annotationName = genObjectName(usedNames, data.name, data.tags, i+1, GObjectTypes::ANNOTATION_TABLE);

            QVariantMap hints;
            hints.insert(DBI_FOLDER_HINT, fs.value(DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER));
            if (Q_UNLIKELY(merge && NULL == mergedAnnotations)) {
                mergedAnnotations.reset(new AnnotationTableObject(annotationName, dbiRef, hints));
            }
            annotationsObject = merge ? mergedAnnotations.data() : new AnnotationTableObject(annotationName, dbiRef, hints);

            QStringList groupNames;
            QMap<QString, QList<SharedAnnotationData> > groupName2Annotations;
            for (int i = 0, n = data.features.size(); i < n; ++i) {
                SharedAnnotationData &d = data.features[i];
                if (!d->location->regions.isEmpty()) {
                    for (int i = 0, n = d->location->regions.size(); i < n; ++i) {
                        // for some reason larger numbers cannot be stored within rtree SQLite tables
                        if (d->location->regions[i].endPos() > 9223371036854775807LL) {
                            d->location->regions[i].length = 9223371036854775807LL - d->location->regions[i].startPos;
                        }
                    }
                }
                groupNames.clear();
                d->removeAllQualifiers(GBFeatureUtils::QUALIFIER_GROUP, groupNames);
                if (groupNames.isEmpty()) {
                    groupName2Annotations[""].append(d);
                } else {
                    foreach(const QString &gName, groupNames) {
                        groupName2Annotations[gName].append(d);
                    }
                }
                CHECK_OP(os, );
            }
Exemplo n.º 9
0
GObjectReference TreeViewerState::getPhyObject() const {
    return stateData.contains(PHY_OBJ) ? stateData[PHY_OBJ].value<GObjectReference>() : GObjectReference();
}
Exemplo n.º 10
0
Task* RepeatWorker::tick() {
    if (input->hasMessage()) {
        Message inputMessage = getMessageAndSetupScriptValues(input);
        if (inputMessage.isEmpty()) {
            output->transit();
            return NULL;
        }
        cfg.algo = RFAlgorithm(actor->getParameter(ALGO_ATTR)->getAttributeValue<int>(context));
        cfg.minLen = actor->getParameter(LEN_ATTR)->getAttributeValue<int>(context);
        if(actor->getParameter(USE_MIN_DISTANCE_ATTR)->getAttributeValue<bool>(context)){
            cfg.minDist = actor->getParameter(MIN_DIST_ATTR)->getAttributeValue<int>(context);
        }else{
            cfg.minDist = 0;
        }
        if(actor->getParameter(USE_MAX_DISTANCE_ATTR)->getAttributeValue<bool>(context)){
            cfg.maxDist = actor->getParameter(MAX_DIST_ATTR)->getAttributeValue<int>(context);
        }else{
            cfg.maxDist = INT_MAX;
        }
        int identity = actor->getParameter(IDENTITY_ATTR)->getAttributeValue<int>(context);
        cfg.setIdentity(identity);
        cfg.nThreads = actor->getParameter(THREADS_ATTR)->getAttributeValue<int>(context);
        cfg.inverted = actor->getParameter(INVERT_ATTR)->getAttributeValue<bool>(context);
        cfg.filter = RepeatsFilterAlgorithm(actor->getParameter(NESTED_ATTR)->getAttributeValue<int>(context));
        cfg.excludeTandems = actor->getParameter(TANMEDS_ATTR)->getAttributeValue<bool>(context);
        resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>(context);
        if(resultName.isEmpty()){
            resultName = "repeat_unit";
            algoLog.error(tr("result name is empty, default name used"));
        }
        if(identity > 100 || identity < 0){
            algoLog.error(tr("Incorrect value: identity value must be between 0 and 100"));
            return new FailTask(tr("Incorrect value: identity value must be between 0 and 100"));
        }
        QVariantMap map = inputMessage.getData().toMap();
        SharedDbiDataHandler seqId = map.value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<SharedDbiDataHandler>();
        QScopedPointer<U2SequenceObject> seqObj(StorageUtils::getSequenceObject(context->getDataStorage(), seqId));
        if (seqObj.isNull()) {
            return NULL;
        }
        U2OpStatusImpl os;
        DNASequence seq = seqObj->getWholeSequence(os);
        CHECK_OP(os, new FailTask(os.getError()));

        if(cfg.minDist < 0){
            algoLog.error(tr("Incorrect value: minimal distance must be greater then zero"));
            return new FailTask(tr("Incorrect value: minimal distance must be greater then zero"));
        }

        if (!seq.alphabet->isNucleic()) {
            QString err = tr("Sequence alphabet is not nucleic!");
            return new FailTask(err);
        }
        Task* t = new FindRepeatsToAnnotationsTask(cfg, seq, resultName, QString(), "", GObjectReference());
        connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
        return t;
    } else if (input->isEnded()) {
        setDone();
        output->setEnded();
    }
    return NULL;
}
Exemplo n.º 11
0
static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& fs, QList<GObject*>& objects,
                 int gapSize, QString& writeLockReason, U2OpStatus& os)
{
    DbiOperationsBlock opBlock(dbiRef, os);
    CHECK_OP(os, );
    Q_UNUSED(opBlock);

    static char fastaCommentStartChar = FastaFormat::FASTA_COMMENT_START_SYMBOL;

    MemoryLocker memoryLocker(os, 1);
    CHECK_OP(os, );

    writeLockReason.clear();
    QByteArray readBuff(DocumentFormat::READ_BUFF_SIZE + 1, 0);
    char* buff = readBuff.data();
    qint64 len = 0;

    bool merge = gapSize != -1;
    QStringList headers;
    QSet<QString> uniqueNames;
    QVector<U2Region> mergedMapping;

    // for lower case annotations
    GObjectReference sequenceRef;

    //skip leading whites if present
    bool lineOk = true;
    static QBitArray nonWhites = ~TextUtils::WHITES;
    io->readUntil(buff, DocumentFormat::READ_BUFF_SIZE, nonWhites, IOAdapter::Term_Exclude, &lineOk);
    CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

    U2SequenceImporter seqImporter(fs, true);
    const QString folder = fs.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();

    qint64 sequenceStart = 0;
    int sequenceNumber = 0;
    DbiConnection con(dbiRef, os);
    bool headerReaded = false;
    QStringList emptySeqNames;

    const int objectsCountLimit = fs.contains(DocumentReadingMode_MaxObjectsInDoc) ? fs[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1;
    const bool settingsMakeUniqueName = !fs.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool();
    while (!os.isCoR()) {
        //skip start comments and read header
        if(!headerReaded){
            do{
                len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE);
                CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
            }while(buff[0] == fastaCommentStartChar && len > 0);
        }

        if (len == 0 && io->isEof()) { //end if stream
            break;
        }
        CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
        CHECK_EXT_BREAK(lineOk, os.setError(FastaFormat::tr("Line is too long")));

        QString headerLine = QString(QByteArray(buff+1, len-1)).trimmed();
        CHECK_EXT_BREAK(buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL, os.setError(FastaFormat::tr("First line is not a FASTA header")));

        //read sequence
        if (sequenceNumber == 0 || !merge) {
            QString objName = headerLine;
            if(objName.isEmpty()){
                objName = "Sequence";
            }
            if (settingsMakeUniqueName) {
                objName = (merge) ? "Sequence" : TextUtils::variate(objName, "_", uniqueNames);
                objName.squeeze();
                memoryLocker.tryAcquire(2*objName.size());
                CHECK_OP_BREAK(os);
                uniqueNames.insert(objName);
            }
            seqImporter.startSequence(os, dbiRef, folder, objName, false);
            CHECK_OP_BREAK(os);

            sequenceRef = GObjectReference(io->getURL().getURLString(), objName, GObjectTypes::SEQUENCE);
        }
        if (sequenceNumber >= 1 && merge) {
            seqImporter.addDefaultSymbolsBlock(gapSize, os);
            sequenceStart += gapSize;
            CHECK_OP_BREAK(os);
        }
        int sequenceLen = 0;
        while (!os.isCoR()) {
            do {
                len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE);
                CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
            } while (len <= 0 && !io->isEof());
            CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

            if (len <= 0 && io->isEof()) {
                break;
            }
            CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

            buff[len] = 0;

            if(buff[0] != fastaCommentStartChar && buff[0] != FastaFormat::FASTA_HEADER_START_SYMBOL){
                len = TextUtils::remove(buff, len, TextUtils::WHITES);
                if(len > 0){
                    seqImporter.addBlock(buff, len, os);
                    sequenceLen += len;
                }
            }else if( buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL){
                headerReaded = true;
                break;
            }

            CHECK_OP_BREAK(os);
            os.setProgress(io->getProgress());
        }

        if (merge) {
            memoryLocker.tryAcquire(headerLine.size());
            CHECK_OP_BREAK(os);
            headers.append(headerLine);
            mergedMapping.append(U2Region(sequenceStart, sequenceLen));
        } else {
            if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) {
                os.setError(FastaFormat::tr("File \"%1\" contains too many sequences to be displayed. "
                    "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> "
                    "or pipelines built with Workflow Designer.")
                    .arg(io->getURL().getURLString()));
                break;
            }
            memoryLocker.tryAcquire(800);
            CHECK_OP_BREAK(os);
            U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os);
            if (os.hasError() && os.getError() == U2SequenceImporter::EMPTY_SEQUENCE_ERROR) {
                os.setError("");
                emptySeqNames << headerLine;
                continue;
            }
            sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id);

            //TODO parse header
            U2StringAttribute attr(seq.id, DNAInfo::FASTA_HDR, headerLine);
            con.dbi->getAttributeDbi()->createStringAttribute(attr, os);
            CHECK_OP_BREAK(os);

            objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id));
            CHECK_OP_BREAK(os);

            U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs);
        }
        sequenceStart += sequenceLen;
        sequenceNumber++;
        ioLog.trace(QString("Sequence #%1 is processed").arg(sequenceNumber));
    }

    CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), );
    CHECK_EXT(!objects.isEmpty() || merge, os.setError(Document::tr("Document is empty.")), );
    SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", );
    ioLog.trace("All sequences are processed");

    if (!emptySeqNames.isEmpty()) {
        QString warningMessage;
        warningMessage.append(FastaFormat::tr("Loaded sequences: %1.\n").arg(sequenceNumber));
        warningMessage.append(FastaFormat::tr("Skipped sequences: %1.\n").arg(emptySeqNames.size()));
        warningMessage.append(FastaFormat::tr("The following sequences are empty:\n%1").arg(emptySeqNames.join(",\n")));
        os.addWarning(warningMessage);
    }

    if (!merge) {
        return;
    }

    U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os);
    CHECK_OP(os, );
    sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id);

    U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs);
    objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id));
    objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence( sequenceRef, dbiRef, headers, mergedMapping, fs );
    if (headers.size() > 1) {
        writeLockReason = QObject::tr("Document sequences were merged");
    }
}