void GenerateDNASequenceTask::run( ) {
    if ( seed < 0 ) {
        qsrand( QDateTime::currentDateTime( ).toTime_t( ) );
    } else {
        qsrand( seed );
    }
    dbiRef = AppContext::getDbiRegistry( )->getSessionTmpDbiRef( stateInfo );
    CHECK_OP( stateInfo, );
    DbiConnection con( dbiRef, stateInfo );
    CHECK_OP( stateInfo, );

    results.reserve( count );
    for ( int seqCount = 0; seqCount < count; seqCount++ ) {
        U2SequenceImporter seqImporter( QVariantMap( ), true );

        QByteArray sequenceChunk;
        if( window > length ) {
            window = length;
        }

        seqImporter.startSequence(stateInfo, dbiRef, U2ObjectDbi::ROOT_FOLDER, QString( "default" ), false);
        CHECK_OP_BREAK( stateInfo );

        for( int chunkCount = 0; chunkCount < length / window && !isCanceled( );
            chunkCount++ )
        {
            DNASequenceGenerator::generateSequence( baseContent, window, sequenceChunk );
            seqImporter.addBlock( sequenceChunk.constData( ), sequenceChunk.length( ), stateInfo );
            CHECK_OP_BREAK( stateInfo );
            const int currentProgress = int( 100 * ( seqCount + chunkCount * double ( window )
                / double ( length ) ) / double( count ) );
            stateInfo.setProgress( currentProgress );
        }
        // append last chunk
        DNASequenceGenerator::generateSequence( baseContent, length % window, sequenceChunk );
        seqImporter.addBlock( sequenceChunk.constData( ), sequenceChunk.length( ), stateInfo );
        CHECK_OP_BREAK( stateInfo );
        U2Sequence seq = seqImporter.finalizeSequence( stateInfo );
        CHECK_OP_BREAK( stateInfo );

        results.append( seq );
        stateInfo.setProgress( seqCount / count * 100 );
    }
}
QModelIndex GTUtilsSharedDatabaseDocument::getItemIndex(HI::GUITestOpStatus &os, Document *databaseDoc, const QString &itemPath, bool mustExist) {
    Q_UNUSED(os);
    GT_CHECK_RESULT(NULL != databaseDoc, "databaseDoc is NULL", QModelIndex());
    GT_CHECK_RESULT(!itemPath.isEmpty(), "Folder path is empty", QModelIndex());

    const QStringList folders = itemPath.split(U2ObjectDbi::PATH_SEP, QString::SkipEmptyParts);

    QModelIndex itemIndex = GTUtilsProjectTreeView::findIndex(os, databaseDoc->getName());
    CHECK(!folders.isEmpty(), itemIndex);

    GTGlobals::FindOptions options;
    options.depth = 1;
    options.failIfNotFound = mustExist;
    foreach (const QString& folder, folders) {
        itemIndex = GTUtilsProjectTreeView::findIndex(os, folder, itemIndex, options);
        CHECK_OP_BREAK(os);
        CHECK_BREAK(itemIndex.isValid());
    }
示例#3
0
/**
 * FASTQ format specification: http://maq.sourceforge.net/fastq.shtml
 */
static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& hints, const GUrl& docUrl, QList<GObject*>& objects, U2OpStatus& os,
                 int gapSize, int predictedSize, QString& writeLockReason) {
    DbiOperationsBlock opBlock(dbiRef, os);
    CHECK_OP(os, );
    Q_UNUSED(opBlock);
    writeLockReason.clear();

    bool merge = gapSize!=-1;
    QByteArray sequence;
    QByteArray qualityScores;
    QStringList headers;
    QSet<QString> uniqueNames;

    QVector<U2Region> mergedMapping;
    QByteArray gapSequence((merge ? gapSize : 0), 0);
    sequence.reserve(predictedSize);
    qualityScores.reserve(predictedSize);

    // for lower case annotations
    GObjectReference sequenceRef;
    qint64 sequenceStart = 0;

    U2SequenceImporter seqImporter(hints, true);
    const QString folder = hints.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();
    int seqNumber = 0;
    int progressUpNum = 0;

    const int objectsCountLimit = hints.contains(DocumentReadingMode_MaxObjectsInDoc) ? hints[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1;
    const bool settingsMakeUniqueName = !hints.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool();
    while (!os.isCoR()) {
        //read header
        QString sequenceName = readSequenceName(os, io, '@');
        // check for eof while trying to read another FASTQ block
        if (io->isEof()) {
            break;
        }

        CHECK_OP_BREAK(os);

        if(sequenceName.isEmpty()){
            sequenceName = "Sequence";
        }

        if ((merge == false) || (seqNumber == 0)) {
            QString objName = sequenceName;
            if (settingsMakeUniqueName) {
                objName = (merge) ? "Sequence" : TextUtils::variate(sequenceName, "_", uniqueNames);
                objName.squeeze();
                uniqueNames.insert(objName);
            }
            seqImporter.startSequence(dbiRef, folder, objName, false, os);
            CHECK_OP_BREAK(os);
        }

        //read sequence
        if (merge && sequence.length() > 0) {
            seqImporter.addDefaultSymbolsBlock(gapSize,os);
            sequenceStart += sequence.length();
            sequenceStart+=gapSize;
            CHECK_OP_BREAK(os);
        }

        sequence.clear();
        readSequence(os, io, sequence);
        MemoryLocker lSequence(os, qCeil(sequence.size()/(1000*1000)));
        CHECK_OP_BREAK(os);
        Q_UNUSED(lSequence);

        seqImporter.addBlock(sequence.data(),sequence.length(),os);
        CHECK_OP_BREAK(os);

        QString qualSequenceName = readSequenceName(os, io, '+');
        if (!qualSequenceName.isEmpty()) {
            static const QString err = U2::FastqFormat::tr("Not a valid FASTQ file: %1, sequence name differs from quality scores name: %2 and %3");
            CHECK_EXT_BREAK(sequenceName == qualSequenceName,
                os.setError(err.arg(docUrl.getURLString()).arg(sequenceName).arg(qualSequenceName)));
        }

        // read qualities
        qualityScores.clear();
        readQuality(os, io, qualityScores, sequence.size());
        CHECK_OP_BREAK(os);

        static const QString err = U2::FastqFormat::tr("Not a valid FASTQ file: %1. Bad quality scores: inconsistent size.").arg(docUrl.getURLString());
        CHECK_EXT_BREAK(sequence.length() == qualityScores.length(), os.setError(err));

        seqNumber++;
        progressUpNum++;
        if (merge) {
            headers.append(sequenceName);
            mergedMapping.append(U2Region(sequenceStart, sequence.length() ));
        }
        else {
            if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) {
                os.setError(FastqFormat::tr("File \"%1\" contains too many sequences to be displayed. "
                    "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> "
                    "or pipelines built with Workflow Designer.")
                    .arg(io->getURL().getURLString()));
                break;
            }

            U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os);
            CHECK_OP_BREAK(os);
            sequenceRef = GObjectReference(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id));

            U2SequenceObject* seqObj = new U2SequenceObject(u2seq.visualName, U2EntityRef(dbiRef, u2seq.id));
            CHECK_EXT_BREAK(seqObj != NULL, os.setError("U2SequenceObject is NULL"));
            seqObj->setQuality(DNAQuality(qualityScores));
            objects << seqObj;

            U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, hints);
        }
        if (PROGRESS_UPDATE_STEP == progressUpNum) {
            progressUpNum = 0;
            os.setProgress(io->getProgress());
        }
    }

    CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), );
    bool emptyObjects = objects.isEmpty();
    CHECK_EXT(!emptyObjects || merge, os.setError(Document::tr("Document is empty.")), );
    SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", );

    if (!merge) {
        return;
    }
    U2Sequence u2seq = seqImporter.finalizeSequenceAndValidate(os);
    CHECK_OP(os,);

    sequenceRef = GObjectReference(io->getURL().getURLString(), u2seq.visualName, GObjectTypes::SEQUENCE, U2EntityRef(dbiRef, u2seq.id));

    U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, hints);
    objects << new U2SequenceObject(u2seq.visualName, U2EntityRef(dbiRef, u2seq.id));
    objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence(sequenceRef, dbiRef, headers, mergedMapping, hints);
    if (headers.size() > 1) {
        writeLockReason = DocumentFormat::MERGED_SEQ_LOCK;
    }
}
示例#4
0
static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& fs, QList<GObject*>& objects,
                 int gapSize, QString& writeLockReason, U2OpStatus& os)
{
    DbiOperationsBlock opBlock(dbiRef, os);
    CHECK_OP(os, );
    Q_UNUSED(opBlock);

    static char fastaCommentStartChar = FastaFormat::FASTA_COMMENT_START_SYMBOL;

    MemoryLocker memoryLocker(os, 1);
    CHECK_OP(os, );

    writeLockReason.clear();
    QByteArray readBuff(DocumentFormat::READ_BUFF_SIZE + 1, 0);
    char* buff = readBuff.data();
    qint64 len = 0;

    bool merge = gapSize != -1;
    QStringList headers;
    QSet<QString> uniqueNames;
    QVector<U2Region> mergedMapping;

    // for lower case annotations
    GObjectReference sequenceRef;

    //skip leading whites if present
    bool lineOk = true;
    static QBitArray nonWhites = ~TextUtils::WHITES;
    io->readUntil(buff, DocumentFormat::READ_BUFF_SIZE, nonWhites, IOAdapter::Term_Exclude, &lineOk);
    CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

    U2SequenceImporter seqImporter(fs, true);
    const QString folder = fs.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();

    qint64 sequenceStart = 0;
    int sequenceNumber = 0;
    DbiConnection con(dbiRef, os);
    bool headerReaded = false;
    QStringList emptySeqNames;

    const int objectsCountLimit = fs.contains(DocumentReadingMode_MaxObjectsInDoc) ? fs[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1;
    const bool settingsMakeUniqueName = !fs.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool();
    while (!os.isCoR()) {
        //skip start comments and read header
        if(!headerReaded){
            do{
                len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE);
                CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
            }while(buff[0] == fastaCommentStartChar && len > 0);
        }

        if (len == 0 && io->isEof()) { //end if stream
            break;
        }
        CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
        CHECK_EXT_BREAK(lineOk, os.setError(FastaFormat::tr("Line is too long")));

        QString headerLine = QString(QByteArray(buff+1, len-1)).trimmed();
        CHECK_EXT_BREAK(buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL, os.setError(FastaFormat::tr("First line is not a FASTA header")));

        //read sequence
        if (sequenceNumber == 0 || !merge) {
            QString objName = headerLine;
            if(objName.isEmpty()){
                objName = "Sequence";
            }
            if (settingsMakeUniqueName) {
                objName = (merge) ? "Sequence" : TextUtils::variate(objName, "_", uniqueNames);
                objName.squeeze();
                memoryLocker.tryAcquire(2*objName.size());
                CHECK_OP_BREAK(os);
                uniqueNames.insert(objName);
            }
            seqImporter.startSequence(os, dbiRef, folder, objName, false);
            CHECK_OP_BREAK(os);

            sequenceRef = GObjectReference(io->getURL().getURLString(), objName, GObjectTypes::SEQUENCE);
        }
        if (sequenceNumber >= 1 && merge) {
            seqImporter.addDefaultSymbolsBlock(gapSize, os);
            sequenceStart += gapSize;
            CHECK_OP_BREAK(os);
        }
        int sequenceLen = 0;
        while (!os.isCoR()) {
            do {
                len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE);
                CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
            } while (len <= 0 && !io->isEof());
            CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

            if (len <= 0 && io->isEof()) {
                break;
            }
            CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

            buff[len] = 0;

            if(buff[0] != fastaCommentStartChar && buff[0] != FastaFormat::FASTA_HEADER_START_SYMBOL){
                len = TextUtils::remove(buff, len, TextUtils::WHITES);
                if(len > 0){
                    seqImporter.addBlock(buff, len, os);
                    sequenceLen += len;
                }
            }else if( buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL){
                headerReaded = true;
                break;
            }

            CHECK_OP_BREAK(os);
            os.setProgress(io->getProgress());
        }

        if (merge) {
            memoryLocker.tryAcquire(headerLine.size());
            CHECK_OP_BREAK(os);
            headers.append(headerLine);
            mergedMapping.append(U2Region(sequenceStart, sequenceLen));
        } else {
            if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) {
                os.setError(FastaFormat::tr("File \"%1\" contains too many sequences to be displayed. "
                    "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> "
                    "or pipelines built with Workflow Designer.")
                    .arg(io->getURL().getURLString()));
                break;
            }
            memoryLocker.tryAcquire(800);
            CHECK_OP_BREAK(os);
            U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os);
            if (os.hasError() && os.getError() == U2SequenceImporter::EMPTY_SEQUENCE_ERROR) {
                os.setError("");
                emptySeqNames << headerLine;
                continue;
            }
            sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id);

            //TODO parse header
            U2StringAttribute attr(seq.id, DNAInfo::FASTA_HDR, headerLine);
            con.dbi->getAttributeDbi()->createStringAttribute(attr, os);
            CHECK_OP_BREAK(os);

            objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id));
            CHECK_OP_BREAK(os);

            U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs);
        }
        sequenceStart += sequenceLen;
        sequenceNumber++;
        ioLog.trace(QString("Sequence #%1 is processed").arg(sequenceNumber));
    }

    CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), );
    CHECK_EXT(!objects.isEmpty() || merge, os.setError(Document::tr("Document is empty.")), );
    SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", );
    ioLog.trace("All sequences are processed");

    if (!emptySeqNames.isEmpty()) {
        QString warningMessage;
        warningMessage.append(FastaFormat::tr("Loaded sequences: %1.\n").arg(sequenceNumber));
        warningMessage.append(FastaFormat::tr("Skipped sequences: %1.\n").arg(emptySeqNames.size()));
        warningMessage.append(FastaFormat::tr("The following sequences are empty:\n%1").arg(emptySeqNames.join(",\n")));
        os.addWarning(warningMessage);
    }

    if (!merge) {
        return;
    }

    U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os);
    CHECK_OP(os, );
    sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id);

    U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs);
    objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id));
    objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence( sequenceRef, dbiRef, headers, mergedMapping, fs );
    if (headers.size() > 1) {
        writeLockReason = QObject::tr("Document sequences were merged");
    }
}