void finishSequence(QList<GObject*>& objects, IOAdapter* io, U2OpStatus& os, const U2DbiRef& dbiRef, const QVariantMap& fs, TmpDbiObjects& dbiObjects, U2SequenceImporter& seqImporter){ U2Sequence u2seq = seqImporter.finalizeSequence(os); dbiObjects.objects << u2seq.id; CHECK_OP(os, ); GObjectReference sequenceRef(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id)); U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs); objects << new U2SequenceObject(u2seq.visualName,U2EntityRef(dbiRef, u2seq.id)); }
void TextObjectTestData::initData() { U2DbiRef dbiRef = TextObjectTestData::getDbiRef(); U2RawData object(dbiRef); object.visualName = "Text"; U2OpStatusImpl os; RawDataUdrSchema::createObject(dbiRef, object, os); SAFE_POINT_OP(os, ); objRef = U2EntityRef(dbiRef, object.id); RawDataUdrSchema::writeContent("text", objRef, os); SAFE_POINT_OP(os, ); }
U2SequenceObject *importSequence(DNASequence &sequence, const QString &objName, QList<GObject*>& objects, U2SequenceImporter &seqImporter, const U2DbiRef& dbiRef, const QString& folder, U2OpStatus& os) { seqImporter.startSequence(dbiRef, folder, sequence.getName(), sequence.circular, os); CHECK_OP(os, NULL); seqImporter.addBlock(sequence.seq.constData(), sequence.seq.length(), os); CHECK_OP(os, NULL); U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os); TmpDbiObjects dbiObjects(dbiRef, os); dbiObjects.objects << u2seq.id; CHECK_OP(os, NULL); U2SequenceObject *seqObj = new U2SequenceObject(objName, U2EntityRef(dbiRef, u2seq.id)); seqObj->setSequenceInfo(sequence.info); objects << seqObj; return seqObj; }
void PhylipPlugin::processCmdlineOptions() { CMDLineRegistry *cmdLineRegistry = AppContext::getCMDLineRegistry(); CHECK(cmdLineRegistry->hasParameter(PhylipCmdlineTask::PHYLIP_CMDLINE), ); CHECK(cmdLineRegistry->hasParameter(CmdlineInOutTaskRunner::OUT_DB_ARG), ); CHECK(cmdLineRegistry->hasParameter(CmdlineInOutTaskRunner::IN_DB_ARG), ); CHECK(cmdLineRegistry->hasParameter(CmdlineInOutTaskRunner::IN_ID_ARG), ); CreatePhyTreeSettings settings = fetchSettings(); QString outDbString = cmdLineRegistry->getParameterValue(CmdlineInOutTaskRunner::OUT_DB_ARG); QString inDbString = cmdLineRegistry->getParameterValue(CmdlineInOutTaskRunner::IN_DB_ARG); QString idString = cmdLineRegistry->getParameterValue(CmdlineInOutTaskRunner::IN_ID_ARG); U2OpStatus2Log os; U2DbiRef outDbiRef = CmdlineInOutTaskRunner::parseDbiRef(outDbString, os); CHECK_OP(os, ); U2DbiRef inDbiRef = CmdlineInOutTaskRunner::parseDbiRef(inDbString, os); CHECK_OP(os, ); U2DataId dataId = CmdlineInOutTaskRunner::parseDataId(idString, inDbiRef, os); CHECK_OP(os, ); Task *t = new PhylipTask(U2EntityRef(inDbiRef, dataId), outDbiRef, settings); connect(AppContext::getPluginSupport(), SIGNAL(si_allStartUpPluginsLoaded()), new TaskStarter(t), SLOT(registerTask())); }
U2SequenceObject * DocumentFormatUtils::addSequenceObject(const U2DbiRef& dbiRef, const QString& name, const QByteArray& seq, bool circular, const QVariantMap& hints, U2OpStatus& os) { U2SequenceImporter importer; const QString folder = hints.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString(); importer.startSequence(dbiRef, folder, name, circular, os); CHECK_OP(os, NULL); importer.addBlock(seq.constData(), seq.length(), os); CHECK_OP(os, NULL); U2Sequence sequence = importer.finalizeSequenceAndValidate(os); TmpDbiObjects dbiObjects(dbiRef, os); dbiObjects.objects << sequence.id; CHECK_OP(os, NULL); U2SequenceObject* so = new U2SequenceObject(name, U2EntityRef(dbiRef, sequence.id)); return so; }
/** * FASTQ format specification: http://maq.sourceforge.net/fastq.shtml */ static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& hints, const GUrl& docUrl, QList<GObject*>& objects, U2OpStatus& os, int gapSize, int predictedSize, QString& writeLockReason) { DbiOperationsBlock opBlock(dbiRef, os); CHECK_OP(os, ); Q_UNUSED(opBlock); writeLockReason.clear(); bool merge = gapSize!=-1; QByteArray sequence; QByteArray qualityScores; QStringList headers; QSet<QString> uniqueNames; QVector<U2Region> mergedMapping; QByteArray gapSequence((merge ? gapSize : 0), 0); sequence.reserve(predictedSize); qualityScores.reserve(predictedSize); // for lower case annotations GObjectReference sequenceRef; qint64 sequenceStart = 0; U2SequenceImporter seqImporter(hints, true); const QString folder = hints.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString(); int seqNumber = 0; int progressUpNum = 0; const int objectsCountLimit = hints.contains(DocumentReadingMode_MaxObjectsInDoc) ? hints[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1; const bool settingsMakeUniqueName = !hints.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool(); while (!os.isCoR()) { //read header QString sequenceName = readSequenceName(os, io, '@'); // check for eof while trying to read another FASTQ block if (io->isEof()) { break; } CHECK_OP_BREAK(os); if(sequenceName.isEmpty()){ sequenceName = "Sequence"; } if ((merge == false) || (seqNumber == 0)) { QString objName = sequenceName; if (settingsMakeUniqueName) { objName = (merge) ? "Sequence" : TextUtils::variate(sequenceName, "_", uniqueNames); objName.squeeze(); uniqueNames.insert(objName); } seqImporter.startSequence(dbiRef, folder, objName, false, os); CHECK_OP_BREAK(os); } //read sequence if (merge && sequence.length() > 0) { seqImporter.addDefaultSymbolsBlock(gapSize,os); sequenceStart += sequence.length(); sequenceStart+=gapSize; CHECK_OP_BREAK(os); } sequence.clear(); readSequence(os, io, sequence); MemoryLocker lSequence(os, qCeil(sequence.size()/(1000*1000))); CHECK_OP_BREAK(os); Q_UNUSED(lSequence); seqImporter.addBlock(sequence.data(),sequence.length(),os); CHECK_OP_BREAK(os); QString qualSequenceName = readSequenceName(os, io, '+'); if (!qualSequenceName.isEmpty()) { static const QString err = U2::FastqFormat::tr("Not a valid FASTQ file: %1, sequence name differs from quality scores name: %2 and %3"); CHECK_EXT_BREAK(sequenceName == qualSequenceName, os.setError(err.arg(docUrl.getURLString()).arg(sequenceName).arg(qualSequenceName))); } // read qualities qualityScores.clear(); readQuality(os, io, qualityScores, sequence.size()); CHECK_OP_BREAK(os); static const QString err = U2::FastqFormat::tr("Not a valid FASTQ file: %1. Bad quality scores: inconsistent size.").arg(docUrl.getURLString()); CHECK_EXT_BREAK(sequence.length() == qualityScores.length(), os.setError(err)); seqNumber++; progressUpNum++; if (merge) { headers.append(sequenceName); mergedMapping.append(U2Region(sequenceStart, sequence.length() )); } else { if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) { os.setError(FastqFormat::tr("File \"%1\" contains too many sequences to be displayed. " "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> " "or pipelines built with Workflow Designer.") .arg(io->getURL().getURLString())); break; } U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os); CHECK_OP_BREAK(os); sequenceRef = GObjectReference(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id)); U2SequenceObject* seqObj = new U2SequenceObject(u2seq.visualName, U2EntityRef(dbiRef, u2seq.id)); CHECK_EXT_BREAK(seqObj != NULL, os.setError("U2SequenceObject is NULL")); seqObj->setQuality(DNAQuality(qualityScores)); objects << seqObj; U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, hints); } if (PROGRESS_UPDATE_STEP == progressUpNum) { progressUpNum = 0; os.setProgress(io->getProgress()); } } CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), ); bool emptyObjects = objects.isEmpty(); CHECK_EXT(!emptyObjects || merge, os.setError(Document::tr("Document is empty.")), ); SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", ); if (!merge) { return; } U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os); CHECK_OP(os,); sequenceRef = GObjectReference(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id)); U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, hints); objects << new U2SequenceObject(u2seq.visualName, U2EntityRef(dbiRef, u2seq.id)); objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence(sequenceRef, dbiRef, headers, mergedMapping, hints); if (headers.size() > 1) { writeLockReason = DocumentFormat::MERGED_SEQ_LOCK; } }
complementarySeq.reserve(seq.size()); complSeq = complementarySeq.data(); TextUtils::translate(complTT->getOne2OneMapper(), seq, seq.size(), complSeq); TextUtils::reverse(complSeq, seq.size()); blockCounter = 0; } SAFE_POINT(complSeq != NULL, "complSeq is NULL", ); translatedSeq.append(aminoTT->translate3to1(complSeq[blockCounter], complSeq[blockCounter + 1], complSeq[blockCounter + 2])); } importer.addBlock(translatedSeq.constData(), translatedSeq.size(), stateInfo); U2Sequence u2Seq = importer.finalizeSequence(stateInfo); CHECK_OP(stateInfo, ); results << new U2SequenceObject(u2Seq.visualName, U2EntityRef(dbiRef, u2Seq.id)); currentSeq++; } } void AminoTranslationWorkerFactory::init(){ QMap<Descriptor, DataTypePtr> m; m[BaseSlots::DNA_SEQUENCE_SLOT()] = BaseTypes::DNA_SEQUENCE_TYPE(); DataTypePtr inSet(new MapDataType(Descriptor("regioned.sequence"), m)); DataTypeRegistry* dr = WorkflowEnv::getDataTypeRegistry(); assert(dr); dr->registerEntry(inSet); QList<PortDescriptor*> p;
static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& fs, QList<GObject*>& objects, int gapSize, QString& writeLockReason, U2OpStatus& os) { DbiOperationsBlock opBlock(dbiRef, os); CHECK_OP(os, ); Q_UNUSED(opBlock); static char fastaCommentStartChar = FastaFormat::FASTA_COMMENT_START_SYMBOL; MemoryLocker memoryLocker(os, 1); CHECK_OP(os, ); writeLockReason.clear(); QByteArray readBuff(DocumentFormat::READ_BUFF_SIZE + 1, 0); char* buff = readBuff.data(); qint64 len = 0; bool merge = gapSize != -1; QStringList headers; QSet<QString> uniqueNames; QVector<U2Region> mergedMapping; // for lower case annotations GObjectReference sequenceRef; //skip leading whites if present bool lineOk = true; static QBitArray nonWhites = ~TextUtils::WHITES; io->readUntil(buff, DocumentFormat::READ_BUFF_SIZE, nonWhites, IOAdapter::Term_Exclude, &lineOk); CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); U2SequenceImporter seqImporter(fs, true); const QString folder = fs.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString(); qint64 sequenceStart = 0; int sequenceNumber = 0; DbiConnection con(dbiRef, os); bool headerReaded = false; QStringList emptySeqNames; const int objectsCountLimit = fs.contains(DocumentReadingMode_MaxObjectsInDoc) ? fs[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1; const bool settingsMakeUniqueName = !fs.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool(); while (!os.isCoR()) { //skip start comments and read header if(!headerReaded){ do{ len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE); CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); }while(buff[0] == fastaCommentStartChar && len > 0); } if (len == 0 && io->isEof()) { //end if stream break; } CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); CHECK_EXT_BREAK(lineOk, os.setError(FastaFormat::tr("Line is too long"))); QString headerLine = QString(QByteArray(buff+1, len-1)).trimmed(); CHECK_EXT_BREAK(buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL, os.setError(FastaFormat::tr("First line is not a FASTA header"))); //read sequence if (sequenceNumber == 0 || !merge) { QString objName = headerLine; if(objName.isEmpty()){ objName = "Sequence"; } if (settingsMakeUniqueName) { objName = (merge) ? "Sequence" : TextUtils::variate(objName, "_", uniqueNames); objName.squeeze(); memoryLocker.tryAcquire(2*objName.size()); CHECK_OP_BREAK(os); uniqueNames.insert(objName); } seqImporter.startSequence(os, dbiRef, folder, objName, false); CHECK_OP_BREAK(os); sequenceRef = GObjectReference(io->getURL().getURLString(), objName, GObjectTypes::SEQUENCE); } if (sequenceNumber >= 1 && merge) { seqImporter.addDefaultSymbolsBlock(gapSize, os); sequenceStart += gapSize; CHECK_OP_BREAK(os); } int sequenceLen = 0; while (!os.isCoR()) { do { len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE); CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); } while (len <= 0 && !io->isEof()); CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); if (len <= 0 && io->isEof()) { break; } CHECK_EXT(!io->hasError(), os.setError(io->errorString()), ); buff[len] = 0; if(buff[0] != fastaCommentStartChar && buff[0] != FastaFormat::FASTA_HEADER_START_SYMBOL){ len = TextUtils::remove(buff, len, TextUtils::WHITES); if(len > 0){ seqImporter.addBlock(buff, len, os); sequenceLen += len; } }else if( buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL){ headerReaded = true; break; } CHECK_OP_BREAK(os); os.setProgress(io->getProgress()); } if (merge) { memoryLocker.tryAcquire(headerLine.size()); CHECK_OP_BREAK(os); headers.append(headerLine); mergedMapping.append(U2Region(sequenceStart, sequenceLen)); } else { if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) { os.setError(FastaFormat::tr("File \"%1\" contains too many sequences to be displayed. " "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> " "or pipelines built with Workflow Designer.") .arg(io->getURL().getURLString())); break; } memoryLocker.tryAcquire(800); CHECK_OP_BREAK(os); U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os); if (os.hasError() && os.getError() == U2SequenceImporter::EMPTY_SEQUENCE_ERROR) { os.setError(""); emptySeqNames << headerLine; continue; } sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id); //TODO parse header U2StringAttribute attr(seq.id, DNAInfo::FASTA_HDR, headerLine); con.dbi->getAttributeDbi()->createStringAttribute(attr, os); CHECK_OP_BREAK(os); objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id)); CHECK_OP_BREAK(os); U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs); } sequenceStart += sequenceLen; sequenceNumber++; ioLog.trace(QString("Sequence #%1 is processed").arg(sequenceNumber)); } CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), ); CHECK_EXT(!objects.isEmpty() || merge, os.setError(Document::tr("Document is empty.")), ); SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", ); ioLog.trace("All sequences are processed"); if (!emptySeqNames.isEmpty()) { QString warningMessage; warningMessage.append(FastaFormat::tr("Loaded sequences: %1.\n").arg(sequenceNumber)); warningMessage.append(FastaFormat::tr("Skipped sequences: %1.\n").arg(emptySeqNames.size())); warningMessage.append(FastaFormat::tr("The following sequences are empty:\n%1").arg(emptySeqNames.join(",\n"))); os.addWarning(warningMessage); } if (!merge) { return; } U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os); CHECK_OP(os, ); sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id); U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs); objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id)); objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence( sequenceRef, dbiRef, headers, mergedMapping, fs ); if (headers.size() > 1) { writeLockReason = QObject::tr("Document sequences were merged"); } }