QVariantMap MSAEditorState::saveState(MSAEditor* v) { MSAEditorState ss; ss.stateData[VIEW_ID]=MsaEditorFactory::ID; MultipleSequenceAlignmentObject* msaObj = v->getMaObject(); if (msaObj) { ss.setMSAObjectRef(GObjectReference(msaObj)); } ss.setFont(v->getFont()); ss.setFirstPos(v->getFirstVisibleBase()); return ss.stateData; }
QVariantMap TreeViewerState::saveState(TreeViewer* v) { TreeViewerState ss; ss.stateData[VIEW_ID] = TreeViewerFactory::ID; PhyTreeObject* phyObj = v->getPhyObject(); if (phyObj) { ss.setPhyObject(GObjectReference(phyObj)); } ss.setHorizontalZoom(v->getHorizontalZoom()); ss.setVerticalZoom(v->getVerticalZoom()); ss.setTransform(v->getTransform()); ss.stateData.unite(v->getSettingsState()); return ss.stateData; }
Task* ClustalOWorker::tick() { if (input->hasMessage()) { Message inputMessage = getMessageAndSetupScriptValues(input); if (inputMessage.isEmpty()) { output->transit(); return NULL; } cfg.numIterations=actor->getParameter(NUM_ITERATIONS)->getAttributeValue<int>(context); cfg.maxGuidetreeIterations=actor->getParameter(MAX_GT_ITERATIONS)->getAttributeValue<int>(context); cfg.maxHMMIterations=actor->getParameter(MAX_HMM_ITERATIONS)->getAttributeValue<int>(context); cfg.setAutoOptions=actor->getParameter(SET_AUTO)->getAttributeValue<bool>(context); cfg.numberOfProcessors=AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount(); QString path=actor->getParameter(EXT_TOOL_PATH)->getAttributeValue<QString>(context); if(QString::compare(path, "default", Qt::CaseInsensitive) != 0){ AppContext::getExternalToolRegistry()->getByName(ET_CLUSTALO)->setPath(path); } path=actor->getParameter(TMP_DIR_PATH)->getAttributeValue<QString>(context); if(QString::compare(path, "default", Qt::CaseInsensitive) != 0){ AppContext::getAppSettings()->getUserAppsSettings()->setUserTemporaryDirPath(path); } QVariantMap qm = inputMessage.getData().toMap(); SharedDbiDataHandler msaId = qm.value(BaseSlots::MULTIPLE_ALIGNMENT_SLOT().getId()).value<SharedDbiDataHandler>(); QScopedPointer<MultipleSequenceAlignmentObject> msaObj(StorageUtils::getMsaObject(context->getDataStorage(), msaId)); SAFE_POINT(!msaObj.isNull(), "NULL MSA Object!", NULL); const MultipleSequenceAlignment msa = msaObj->getMultipleAlignment(); if (msa->isEmpty()) { algoLog.error(tr("An empty MSA '%1' has been supplied to ClustalO.").arg(msa->getName())); return NULL; } ClustalOSupportTask* supportTask = new ClustalOSupportTask(msa, GObjectReference(), cfg); supportTask->addListeners(createLogListeners()); Task *t = new NoFailTaskWrapper(supportTask); connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished())); return t; } else if (input->isEnded()) { setDone(); output->setEnded(); } return NULL; }
SendSelectionDialog::SendSelectionDialog(const U2SequenceObject* dnaso, bool _isAminoSeq, QWidget *p) : QDialog(p), translateToAmino(false), isAminoSeq(_isAminoSeq), extImported(false) { CreateAnnotationModel ca_m; ca_m.hideAnnotationType = true; ca_m.hideAnnotationName = true; ca_m.hideLocation = true; ca_m.sequenceObjectRef = GObjectReference(dnaso); ca_m.sequenceLen = dnaso->getSequenceLength(); ca_c = new CreateAnnotationWidgetController(ca_m, this); setupUi(this); new HelpButton(this, buttonBox, "17467726"); buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search")); buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel")); optionsTab->setCurrentIndex(0); int idx = 2; QWidget *wdgt; wdgt = ca_c->getWidget(); layoutAnnotations->insertWidget(idx, wdgt); matrixComboBox->addItems(ParametersLists::blastp_matrix); setupDataBaseList(); setUpSettings(); megablastCheckBox->setEnabled(false); alignComboBoxes(); connect( dataBase, SIGNAL(currentIndexChanged(int)), SLOT(sl_scriptSelected(int)) ); QPushButton* okButton = buttonBox->button(QDialogButtonBox::Ok); QPushButton* cancelButton = buttonBox->button(QDialogButtonBox::Cancel); connect( okButton, SIGNAL(clicked()), SLOT(sl_OK()) ); connect( cancelButton, SIGNAL(clicked()), SLOT(sl_Cancel()) ); connect( megablastCheckBox, SIGNAL(stateChanged(int)),SLOT(sl_megablastChecked(int)) ); connect( serviceComboBox,SIGNAL(currentIndexChanged(int)),SLOT(sl_serviceChanged(int)) ); sl_scriptSelected( 0 ); }
GObjectReference MSAEditorState::getMSAObjectRef() const { return stateData.contains(MSA_OBJ) ? stateData[MSA_OBJ].value<GObjectReference>() : GObjectReference(); }
Task* ClustalWWorker::tick() { if (input->hasMessage()) { Message inputMessage = getMessageAndSetupScriptValues(input); if (inputMessage.isEmpty()) { output->transit(); return NULL; } cfg.gapOpenPenalty=actor->getParameter(GAP_OPEN_PENALTY)->getAttributeValue<float>(context); cfg.gapExtenstionPenalty=actor->getParameter(GAP_EXT_PENALTY)->getAttributeValue<float>(context); cfg.gapDist=actor->getParameter(GAP_DIST)->getAttributeValue<float>(context); cfg.endGaps=actor->getParameter(END_GAPS)->getAttributeValue<bool>(context); cfg.noHGaps=actor->getParameter(NO_HGAPS)->getAttributeValue<bool>(context); cfg.noPGaps=actor->getParameter(NO_PGAPS)->getAttributeValue<bool>(context); if(actor->getParameter(ITERATION)->getAttributeValue<int>(context) != 0){ if(actor->getParameter(ITERATION)->getAttributeValue<int>(context) == 1){ cfg.iterationType="TREE"; }else if(actor->getParameter(ITERATION)->getAttributeValue<int>(context) == 2){ cfg.iterationType="ALIGNMENT"; } if(actor->getParameter(NUM_ITERATIONS)->getAttributeValue<int>(context) != 3){ cfg.numIterations=actor->getParameter(NUM_ITERATIONS)->getAttributeValue<int>(context); } } if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == -1){ if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 0){ cfg.matrix="IUB"; }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 1){ cfg.matrix="CLUSTALW"; }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 2){ cfg.matrix="BLOSUM"; }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 3){ cfg.matrix="PAM"; }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 4){ cfg.matrix="GONNET"; }else if(actor->getParameter(MATRIX)->getAttributeValue<int>(context) == 5){ cfg.matrix="ID"; } } QString path=actor->getParameter(EXT_TOOL_PATH)->getAttributeValue<QString>(context); if(QString::compare(path, "default", Qt::CaseInsensitive) != 0){ AppContext::getExternalToolRegistry()->getByName(ET_CLUSTAL)->setPath(path); } path=actor->getParameter(TMP_DIR_PATH)->getAttributeValue<QString>(context); if(QString::compare(path, "default", Qt::CaseInsensitive) != 0){ AppContext::getAppSettings()->getUserAppsSettings()->setUserTemporaryDirPath(path); } QVariantMap qm = inputMessage.getData().toMap(); SharedDbiDataHandler msaId = qm.value(BaseSlots::MULTIPLE_ALIGNMENT_SLOT().getId()).value<SharedDbiDataHandler>(); QScopedPointer<MultipleSequenceAlignmentObject> msaObj(StorageUtils::getMsaObject(context->getDataStorage(), msaId)); SAFE_POINT(!msaObj.isNull(), "NULL MSA Object!", NULL); const MultipleSequenceAlignment msa = msaObj->getMultipleAlignment(); if (msa->isEmpty()) { algoLog.error(tr("An empty MSA '%1' has been supplied to ClustalW.").arg(msa->getName())); return NULL; } ClustalWSupportTask* supportTask = new ClustalWSupportTask(msa, GObjectReference(), cfg); supportTask->addListeners(createLogListeners()); Task *t = new NoFailTaskWrapper(supportTask); connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished())); return t; } else if (input->isEnded()) { setDone(); output->setEnded(); } return NULL; }
/** * FASTQ format specification: http://maq.sourceforge.net/fastq.shtml */ static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& hints, const GUrl& docUrl, QList<GObject*>& objects, U2OpStatus& os, int gapSize, int predictedSize, QString& writeLockReason) { DbiOperationsBlock opBlock(dbiRef, os); CHECK_OP(os, ); Q_UNUSED(opBlock); writeLockReason.clear(); bool merge = gapSize!=-1; QByteArray sequence; QByteArray qualityScores; QStringList headers; QSet<QString> uniqueNames; QVector<U2Region> mergedMapping; QByteArray gapSequence((merge ? gapSize : 0), 0); sequence.reserve(predictedSize); qualityScores.reserve(predictedSize); // for lower case annotations GObjectReference sequenceRef; qint64 sequenceStart = 0; U2SequenceImporter seqImporter(hints, true); const QString folder = hints.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString(); int seqNumber = 0; int progressUpNum = 0; const int objectsCountLimit = hints.contains(DocumentReadingMode_MaxObjectsInDoc) ? hints[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1; const bool settingsMakeUniqueName = !hints.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool(); while (!os.isCoR()) { //read header QString sequenceName = readSequenceName(os, io, '@'); // check for eof while trying to read another FASTQ block if (io->isEof()) { break; } CHECK_OP_BREAK(os); if(sequenceName.isEmpty()){ sequenceName = "Sequence"; } if ((merge == false) || (seqNumber == 0)) { QString objName = sequenceName; if (settingsMakeUniqueName) { objName = (merge) ? "Sequence" : TextUtils::variate(sequenceName, "_", uniqueNames); objName.squeeze(); uniqueNames.insert(objName); } seqImporter.startSequence(dbiRef, folder, objName, false, os); CHECK_OP_BREAK(os); } //read sequence if (merge && sequence.length() > 0) { seqImporter.addDefaultSymbolsBlock(gapSize,os); sequenceStart += sequence.length(); sequenceStart+=gapSize; CHECK_OP_BREAK(os); } sequence.clear(); readSequence(os, io, sequence); MemoryLocker lSequence(os, qCeil(sequence.size()/(1000*1000))); CHECK_OP_BREAK(os); Q_UNUSED(lSequence); seqImporter.addBlock(sequence.data(),sequence.length(),os); CHECK_OP_BREAK(os); QString qualSequenceName = readSequenceName(os, io, '+'); if (!qualSequenceName.isEmpty()) { static const QString err = U2::FastqFormat::tr("Not a valid FASTQ file: %1, sequence name differs from quality scores name: %2 and %3"); CHECK_EXT_BREAK(sequenceName == qualSequenceName, os.setError(err.arg(docUrl.getURLString()).arg(sequenceName).arg(qualSequenceName))); } // read qualities qualityScores.clear(); readQuality(os, io, qualityScores, sequence.size()); CHECK_OP_BREAK(os); static const QString err = U2::FastqFormat::tr("Not a valid FASTQ file: %1. Bad quality scores: inconsistent size.").arg(docUrl.getURLString()); CHECK_EXT_BREAK(sequence.length() == qualityScores.length(), os.setError(err)); seqNumber++; progressUpNum++; if (merge) { headers.append(sequenceName); mergedMapping.append(U2Region(sequenceStart, sequence.length() )); } else { if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) { os.setError(FastqFormat::tr("File \"%1\" contains too many sequences to be displayed. " "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> " "or pipelines built with Workflow Designer.") .arg(io->getURL().getURLString())); break; } U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os); CHECK_OP_BREAK(os); sequenceRef = GObjectReference(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id)); U2SequenceObject* seqObj = new U2SequenceObject(u2seq.visualName, U2EntityRef(dbiRef, u2seq.id)); CHECK_EXT_BREAK(seqObj != NULL, os.setError("U2SequenceObject is NULL")); seqObj->setQuality(DNAQuality(qualityScores)); objects << seqObj; U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, hints); } if (PROGRESS_UPDATE_STEP == progressUpNum) { progressUpNum = 0; os.setProgress(io->getProgress()); } } CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), ); bool emptyObjects = objects.isEmpty(); CHECK_EXT(!emptyObjects || merge, os.setError(Document::tr("Document is empty.")), ); SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", ); if (!merge) { return; } U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os); CHECK_OP(os,); sequenceRef = GObjectReference(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id)); U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, hints); objects << new U2SequenceObject(u2seq.visualName, U2EntityRef(dbiRef, u2seq.id)); objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence(sequenceRef, dbiRef, headers, mergedMapping, hints); if (headers.size() > 1) { writeLockReason = DocumentFormat::MERGED_SEQ_LOCK; } }
void EMBLGenbankAbstractDocument::load(const U2DbiRef& dbiRef, IOAdapter* io, QList<GObject*>& objects, QVariantMap& fs, U2OpStatus& os, QString& writeLockReason) { DbiOperationsBlock opBlock(dbiRef, os); CHECK_OP(os, ); Q_UNUSED(opBlock); writeLockReason.clear(); //get settings int gapSize = qBound(-1, DocumentFormatUtils::getMergeGap(fs), 1000*1000); bool merge = gapSize!=-1; QScopedPointer<AnnotationTableObject> mergedAnnotations(NULL); QStringList contigs; QVector<U2Region> mergedMapping; // Sequence loading is 'lazy', so, if there is no sequence, it won't be created and there is no need to remove it. U2SequenceImporter seqImporter(fs, true); const QString folder = fs.value(DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString(); QSet<QString> usedNames; GObjectReference sequenceRef(GObjectReference(io->getURL().getURLString(), "", GObjectTypes::SEQUENCE)); QByteArray readBuffer(ParserState::LOCAL_READ_BUFFER_SIZE, '\0'); ParserState st(isNcbiLikeFormat() ? 12 : 5, io, NULL, os); st.buff = readBuffer.data(); TmpDbiObjects dbiObjects(dbiRef, os); int num_sequence = 0; qint64 sequenceStart = 0; int sequenceSize = 0; int fullSequenceSize = 0; const int objectsCountLimit = fs.contains(DocumentReadingMode_MaxObjectsInDoc) ? fs[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1; for (int i=0; !os.isCoR(); i++, ++num_sequence) { if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) { os.setError(EMBLGenbankAbstractDocument::tr("File \"%1\" contains too many sequences to be displayed. " "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> " "or pipelines built with Workflow Designer.") .arg(io->getURL().getURLString())); break; } //TODO: reference to a local variable??? Such a pointer will become invalid EMBLGenbankDataEntry data; st.entry = &data; if (num_sequence == 0 || merge == false){ seqImporter.startSequence(dbiRef, folder, "default sequence name", false, os); //change name and circularity after finalize method CHECK_OP(os, ); } sequenceSize = 0; os.setDescription(tr("Reading entry header")); int offset = 0; if (merge && num_sequence > 0) { offset = gapSize; } if (!readEntry(&st,seqImporter,sequenceSize,fullSequenceSize,merge,offset, os)) { break; } if (merge && sequenceSize > 0 && num_sequence > 0) { sequenceStart = fullSequenceSize - sequenceSize; sequenceStart += gapSize; fullSequenceSize += gapSize; } // tolerate blank lines between records char ch; bool b; while ((b = st.io->getChar(&ch)) && (ch == '\n' || ch == '\r')){} if (b) { st.io->skip(-1); } AnnotationTableObject *annotationsObject = NULL; if (data.hasAnnotationObjectFlag) { QString annotationName = genObjectName(usedNames, data.name, data.tags, i+1, GObjectTypes::ANNOTATION_TABLE); QVariantMap hints; hints.insert(DBI_FOLDER_HINT, fs.value(DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER)); if (Q_UNLIKELY(merge && NULL == mergedAnnotations)) { mergedAnnotations.reset(new AnnotationTableObject(annotationName, dbiRef, hints)); } annotationsObject = merge ? mergedAnnotations.data() : new AnnotationTableObject(annotationName, dbiRef, hints); QStringList groupNames; QMap<QString, QList<SharedAnnotationData> > groupName2Annotations; for (int i = 0, n = data.features.size(); i < n; ++i) { SharedAnnotationData &d = data.features[i]; if (!d->location->regions.isEmpty()) { for (int i = 0, n = d->location->regions.size(); i < n; ++i) { // for some reason larger numbers cannot be stored within rtree SQLite tables if (d->location->regions[i].endPos() > 9223371036854775807LL) { d->location->regions[i].length = 9223371036854775807LL - d->location->regions[i].startPos; } } } groupNames.clear(); d->removeAllQualifiers(GBFeatureUtils::QUALIFIER_GROUP, groupNames); if (groupNames.isEmpty()) { groupName2Annotations[""].append(d); } else { foreach(const QString &gName, groupNames) { groupName2Annotations[gName].append(d); } } CHECK_OP(os, ); }
GObjectReference TreeViewerState::getPhyObject() const { return stateData.contains(PHY_OBJ) ? stateData[PHY_OBJ].value<GObjectReference>() : GObjectReference(); }
Task* RepeatWorker::tick() { if (input->hasMessage()) { Message inputMessage = getMessageAndSetupScriptValues(input); if (inputMessage.isEmpty()) { output->transit(); return NULL; } cfg.algo = RFAlgorithm(actor->getParameter(ALGO_ATTR)->getAttributeValue<int>(context)); cfg.minLen = actor->getParameter(LEN_ATTR)->getAttributeValue<int>(context); if(actor->getParameter(USE_MIN_DISTANCE_ATTR)->getAttributeValue<bool>(context)){ cfg.minDist = actor->getParameter(MIN_DIST_ATTR)->getAttributeValue<int>(context); }else{ cfg.minDist = 0; } if(actor->getParameter(USE_MAX_DISTANCE_ATTR)->getAttributeValue<bool>(context)){ cfg.maxDist = actor->getParameter(MAX_DIST_ATTR)->getAttributeValue<int>(context); }else{ cfg.maxDist = INT_MAX; } int identity = actor->getParameter(IDENTITY_ATTR)->getAttributeValue<int>(context); cfg.setIdentity(identity); cfg.nThreads = actor->getParameter(THREADS_ATTR)->getAttributeValue<int>(context); cfg.inverted = actor->getParameter(INVERT_ATTR)->getAttributeValue<bool>(context); cfg.filter = RepeatsFilterAlgorithm(actor->getParameter(NESTED_ATTR)->getAttributeValue<int>(context)); cfg.excludeTandems = actor->getParameter(TANMEDS_ATTR)->getAttributeValue<bool>(context); resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>(context); if(resultName.isEmpty()){ resultName = "repeat_unit"; algoLog.error(tr("result name is empty, default name used")); } if(identity > 100 || identity < 0){ algoLog.error(tr("Incorrect value: identity value must be between 0 and 100")); return new FailTask(tr("Incorrect value: identity value must be between 0 and 100")); } QVariantMap map = inputMessage.getData().toMap(); SharedDbiDataHandler seqId = map.value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<SharedDbiDataHandler>(); QScopedPointer<U2SequenceObject> seqObj(StorageUtils::getSequenceObject(context->getDataStorage(), seqId)); if (seqObj.isNull()) { return NULL; } U2OpStatusImpl os; DNASequence seq = seqObj->getWholeSequence(os); CHECK_OP(os, new FailTask(os.getError())); if(cfg.minDist < 0){ algoLog.error(tr("Incorrect value: minimal distance must be greater then zero")); return new FailTask(tr("Incorrect value: minimal distance must be greater then zero")); } if (!seq.alphabet->isNucleic()) { QString err = tr("Sequence alphabet is not nucleic!"); return new FailTask(err); } Task* t = new FindRepeatsToAnnotationsTask(cfg, seq, resultName, QString(), "", GObjectReference()); connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished())); return t; } else if (input->isEnded()) { setDone(); output->setEnded(); } return NULL; }
static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& fs, QList<GObject*>& objects, int gapSize, QString& writeLockReason, U2OpStatus& os) { DbiOperationsBlock opBlock(dbiRef, os); CHECK_OP(os, ); Q_UNUSED(opBlock); static char fastaCommentStartChar = FastaFormat::FASTA_COMMENT_START_SYMBOL; MemoryLocker memoryLocker(os, 1); CHECK_OP(os, ); writeLockReason.clear(); QByteArray readBuff(DocumentFormat::READ_BUFF_SIZE + 1, 0); char* buff = readBuff.data(); qint64 len = 0; bool merge = gapSize != -1; QStringList headers; QSet<QString> uniqueNames; QVector<U2Region> mergedMapping; // for lower case annotations GObjectReference sequenceRef; //skip leading whites if present bool lineOk = true; static QBitArray nonWhites = ~TextUtils::WHITES; io->readUntil(buff, DocumentFormat::READ_BUFF_SIZE, nonWhites, IOAdapter::Term_Exclude, &lineOk); CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); U2SequenceImporter seqImporter(fs, true); const QString folder = fs.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString(); qint64 sequenceStart = 0; int sequenceNumber = 0; DbiConnection con(dbiRef, os); bool headerReaded = false; QStringList emptySeqNames; const int objectsCountLimit = fs.contains(DocumentReadingMode_MaxObjectsInDoc) ? fs[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1; const bool settingsMakeUniqueName = !fs.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool(); while (!os.isCoR()) { //skip start comments and read header if(!headerReaded){ do{ len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE); CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); }while(buff[0] == fastaCommentStartChar && len > 0); } if (len == 0 && io->isEof()) { //end if stream break; } CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); CHECK_EXT_BREAK(lineOk, os.setError(FastaFormat::tr("Line is too long"))); QString headerLine = QString(QByteArray(buff+1, len-1)).trimmed(); CHECK_EXT_BREAK(buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL, os.setError(FastaFormat::tr("First line is not a FASTA header"))); //read sequence if (sequenceNumber == 0 || !merge) { QString objName = headerLine; if(objName.isEmpty()){ objName = "Sequence"; } if (settingsMakeUniqueName) { objName = (merge) ? "Sequence" : TextUtils::variate(objName, "_", uniqueNames); objName.squeeze(); memoryLocker.tryAcquire(2*objName.size()); CHECK_OP_BREAK(os); uniqueNames.insert(objName); } seqImporter.startSequence(os, dbiRef, folder, objName, false); CHECK_OP_BREAK(os); sequenceRef = GObjectReference(io->getURL().getURLString(), objName, GObjectTypes::SEQUENCE); } if (sequenceNumber >= 1 && merge) { seqImporter.addDefaultSymbolsBlock(gapSize, os); sequenceStart += gapSize; CHECK_OP_BREAK(os); } int sequenceLen = 0; while (!os.isCoR()) { do { len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE); CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); } while (len <= 0 && !io->isEof()); CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); if (len <= 0 && io->isEof()) { break; } CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); buff[len] = 0; if(buff[0] != fastaCommentStartChar && buff[0] != FastaFormat::FASTA_HEADER_START_SYMBOL){ len = TextUtils::remove(buff, len, TextUtils::WHITES); if(len > 0){ seqImporter.addBlock(buff, len, os); sequenceLen += len; } }else if( buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL){ headerReaded = true; break; } CHECK_OP_BREAK(os); os.setProgress(io->getProgress()); } if (merge) { memoryLocker.tryAcquire(headerLine.size()); CHECK_OP_BREAK(os); headers.append(headerLine); mergedMapping.append(U2Region(sequenceStart, sequenceLen)); } else { if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) { os.setError(FastaFormat::tr("File \"%1\" contains too many sequences to be displayed. " "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> " "or pipelines built with Workflow Designer.") .arg(io->getURL().getURLString())); break; } memoryLocker.tryAcquire(800); CHECK_OP_BREAK(os); U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os); if (os.hasError() && os.getError() == U2SequenceImporter::EMPTY_SEQUENCE_ERROR) { os.setError(""); emptySeqNames << headerLine; continue; } sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id); //TODO parse header U2StringAttribute attr(seq.id, DNAInfo::FASTA_HDR, headerLine); con.dbi->getAttributeDbi()->createStringAttribute(attr, os); CHECK_OP_BREAK(os); objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id)); CHECK_OP_BREAK(os); U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs); } sequenceStart += sequenceLen; sequenceNumber++; ioLog.trace(QString("Sequence #%1 is processed").arg(sequenceNumber)); } CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), ); CHECK_EXT(!objects.isEmpty() || merge, os.setError(Document::tr("Document is empty.")), ); SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", ); ioLog.trace("All sequences are processed"); if (!emptySeqNames.isEmpty()) { QString warningMessage; warningMessage.append(FastaFormat::tr("Loaded sequences: %1.\n").arg(sequenceNumber)); warningMessage.append(FastaFormat::tr("Skipped sequences: %1.\n").arg(emptySeqNames.size())); warningMessage.append(FastaFormat::tr("The following sequences are empty:\n%1").arg(emptySeqNames.join(",\n"))); os.addWarning(warningMessage); } if (!merge) { return; } U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os); CHECK_OP(os, ); sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id); U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs); objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id)); objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence( sequenceRef, dbiRef, headers, mergedMapping, fs ); if (headers.size() > 1) { writeLockReason = QObject::tr("Document sequences were merged"); } }