Пример #1
0
QList<SharedAnnotationData> FindTandemsToAnnotationsTask::importTandemAnnotations(const QList<Tandem>& tandems, qint64 seqStart, const bool showOverlapped) {

    seqStart += s.reportSeqShift;

    QList<SharedAnnotationData> res;
    foreach(const Tandem& tan, tandems) {
        unsigned offset = 0;
        const unsigned maxOffset = tan.size % tan.repeatLen;
        do{
            SharedAnnotationData ad(new AnnotationData());
            ad->type = U2FeatureTypes::RepeatRegion;
            ad->name = annName;
            const quint32 tandemEnd = tan.offset+tan.size+seqStart;
            quint32 pos = tan.offset+seqStart+offset;
            for(; pos<=tandemEnd-tan.repeatLen; pos+=tan.repeatLen){
                ad->location->regions << U2Region(pos, tan.repeatLen);
            }
            if (ad->location->isEmpty()){
                continue;
            }
            ad->qualifiers.append(U2Qualifier("num_of_repeats", QString::number(tan.size / tan.repeatLen)));
            ad->qualifiers.append(U2Qualifier("repeat_length", QString::number(tan.repeatLen)));
            ad->qualifiers.append(U2Qualifier("whole_length", QString::number(tan.size)));
            U1AnnotationUtils::addDescriptionQualifier(ad, annDescription);
            res.append(ad);
            offset++;
        }while(showOverlapped && offset<=maxOffset);
    }
Пример #2
0
AnnotationTableObject * DocumentFormatUtils::addAnnotationsForMergedU2Sequence(const GObjectReference& mergedSequenceRef,
                                                                               const U2DbiRef& dbiRef,
                                                                               const QStringList &contigNames,
                                                                               const QVector<U2Region> &mergedMapping,
                                                                               const QVariantMap& hints)
{
    QVariantMap objectHints;
    objectHints.insert(DocumentFormat::DBI_FOLDER_HINT, hints.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER));
    AnnotationTableObject *ao = new AnnotationTableObject("Contigs", dbiRef, objectHints);

    // save relation if mergedSequenceRef is valid
    if (mergedSequenceRef.isValid()) {
        ao->addObjectRelation(GObjectRelation(mergedSequenceRef, ObjectRole_Sequence));
    }

    //save mapping info as annotations
    QStringList::const_iterator it = contigNames.begin();
    QList<SharedAnnotationData> resultData;
    for (int i = 0; it != contigNames.end(); i++, it++) {
        SharedAnnotationData d(new AnnotationData);
        d->name = QString("contig");
        d->location->regions << mergedMapping[i];
        d->qualifiers << U2Qualifier("name", *it);
        d->qualifiers << U2Qualifier("number", QString("%1").arg(i));
        resultData.append(d);
    }
    ao->addAnnotations(resultData);
    return ao;
}
Пример #3
0
QList<SharedAnnotationData> SecStructPredictUtils::saveAlgorithmResultsAsAnnotations(const QByteArray& predicted, const QString& annotationName)
{
    char emptyCoil = 'C';

    int numAcronyms = predicted.length();
    QList<SharedAnnotationData> predictedStructures;
    char prevChar = predicted.at(0);
    int lastRecordedPos = 0;
    for (int i = 1; i < numAcronyms; ++i) {
        char curChar = predicted.at(i);
        if ((curChar != prevChar) || (i == numAcronyms - 1)) {
            if (prevChar != emptyCoil) {
                SharedAnnotationData sd(new AnnotationData);
                sd->type = U2FeatureTypes::SeconadaryStructure;
                sd->name = annotationName;
                sd->location->regions.append(U2Region(lastRecordedPos, i - lastRecordedPos));
                sd->qualifiers.append(U2Qualifier(BioStruct3D::SecStructTypeQualifierName, getStructNameForCharTag(prevChar)));
                predictedStructures.append(sd);
            }
            lastRecordedPos = i;
        }
        prevChar = curChar;
    }

    return predictedStructures;
}
Пример #4
0
SharedAnnotationData Peak2GeneFormatLoader::parseLine(const QStringList &lineValues) {
    SharedAnnotationData data(new AnnotationData);
    CHECK_EXT(lineValues.size() == COLUMNS_COUNT, skipLine = true; os.addWarning(QString("Incorrect columns count at line %1: expect %2, got %3")
                                                                                 .arg(currentLineNumber).arg(COLUMNS_COUNT).arg(lineValues.size())), data);

    data->qualifiers << U2Qualifier("chrom", getChromName(lineValues));
    CHECK(!skipLine, data);

    data->location->regions << getRegion(lineValues);
    CHECK(!skipLine, data);

    data->name = getPeakName(lineValues);
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("score", getPeakScore(lineValues));
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("NA", getNa(lineValues));
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("Genes", getGenes(lineValues));
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("Strand", getStrand(lineValues));
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("TSS2pCenter", getTss2pCenter(lineValues));
    CHECK(!skipLine, data);

    return data;
}
Пример #5
0
void EditQualifierDialog::accept() {
    if (ui->nameEdit->isReadOnly()) {
        QDialog::accept();
        return;
    }
    QString name = simplify(ui->nameEdit->text());
    QString val = simplify(ui->valueEdit->toPlainText());
    if (!Annotation::isValidQualifierName(name)) {
        QMessageBox::critical(this, tr("Error!"), tr("Illegal qualifier name"));
        return;
    }
    if (!Annotation::isValidQualifierValue(val)) {
        QMessageBox::critical(this, tr("Error!"), tr("Illegal qualifier value"));
        return;
    }
    q = U2Qualifier(name, val);
    QDialog::accept();
}
Пример #6
0
SharedAnnotationData FindAlgorithmResult::toAnnotation(const QString &name, bool splitCircular, int seqLen) const {
    SAFE_POINT(!splitCircular || (seqLen != -1), "Sequence length is not set!", SharedAnnotationData());
    SharedAnnotationData data(new AnnotationData);
    data->name = name;
    if (splitCircular && (region.endPos() > seqLen) ) {
        if (region.startPos >= seqLen) {
            data->location->regions << U2Region(region.startPos - seqLen, region.length);
        } else {
            SAFE_POINT(region.startPos < seqLen, "Region is not correct", SharedAnnotationData());
            data->location->regions << U2Region(region.startPos, seqLen - region.startPos);
            data->location->regions << U2Region(0, region.length - (seqLen - region.startPos));
        }
    } else {
        data->location->regions << region;
    }
    data->setStrand(strand);
    data->qualifiers.append(U2Qualifier("mismatches", QString::number(err)));
    return data;
}
Пример #7
0
SharedAnnotationData SwissProtPlainTextFormat::readAnnotation(IOAdapter* io, char* cbuff, int len, int READ_BUFF_SIZE, U2OpStatus& si, int offset, int seqLen){

    Q_UNUSED(seqLen);
    AnnotationData* a = new AnnotationData();
    SharedAnnotationData f(a);
    QString key = QString::fromLatin1(cbuff+5, 10).trimmed();
    if (key.isEmpty()) {
        si.setError(EMBLGenbankAbstractDocument::tr("Annotation name is empty"));
        return SharedAnnotationData();
    }
    a->name = key;
    if(key == "STRAND" || key == "HELIX" || key == "TURN"){
        a->qualifiers.append(U2Qualifier(GBFeatureUtils::QUALIFIER_GROUP, "Secondary structure"));
    }
    QString start= QString::fromLatin1(cbuff+15, 5).trimmed();
    if(start.isEmpty()){
        si.setError(EMBLGenbankAbstractDocument::tr("Annotation start position is empty"));
        return SharedAnnotationData();
    }
    QString end= QString::fromLatin1(cbuff+22, 5).trimmed();
    if(end.isEmpty()){
        si.setError(EMBLGenbankAbstractDocument::tr("Annotation end position is empty"));
        return SharedAnnotationData();
    }
    a->location->reset();

    if(key == "DISULFID" && start != end){
        a->location->op=U2LocationOperator_Order;
        U2Region reg1(start.toInt()-1,1);
        U2Region reg2(end.toInt()-1,1);
        a->location->regions.append(reg1);
        a->location->regions.append(reg2);
    }else{
        U2Region reg(start.toInt()-1,end.toInt() - start.toInt()+1);
        a->location->regions.append(reg);
    }

    if (offset!=0) {
        U2Region::shift(offset, a->location->regions);
    }


    QString valQStr = QString::fromLatin1(cbuff).split(QRegExp("\\n")).first().mid(34);
    QString nameQStr = "Description";
    bool isDescription=true;

    const QByteArray& aminoQ = GBFeatureUtils::QUALIFIER_AMINO_STRAND;
    const QByteArray& nameQ = GBFeatureUtils::QUALIFIER_NAME;
    //here we have valid key and location;
    //reading qualifiers
    bool lineOk = true;
    while ((len = io->readUntil(cbuff, READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk) ) > 0)  {
        if (len == 0 || len < QN_COL+1 || cbuff[K_COL]!=' ' || cbuff[0]!=fPrefix[0] || cbuff[1]!=fPrefix[1]) {
            io->skip(-len);
            if(isDescription && !valQStr.isEmpty()){
                isDescription=false;
                a->qualifiers.append(U2Qualifier(nameQStr, valQStr));
            }
            break;
        }
        if (!lineOk) {
            si.setError(EMBLGenbankAbstractDocument::tr("Unexpected line format"));
            break;
        }
        //parse line
        if(cbuff[A_COL] != '/'){//continue of description
            valQStr.append(" ");
            valQStr.append(QString::fromLatin1(cbuff).split(QRegExp("\\n")).takeAt(0).mid(34));
        }else{
            for (; QN_COL < len && TextUtils::LINE_BREAKS[(uchar)cbuff[len-1]]; len--){}; //remove line breaks
            int flen = len + readMultilineQualifier(io, cbuff, READ_BUFF_SIZE-len, len == maxAnnotationLineLen, len, si);
            //now the whole feature is in cbuff
            int valStart = A_COL + 1;
            for (; valStart < flen && cbuff[valStart] != '='; valStart++){}; //find '==' and valStart
            if (valStart < flen) {
                valStart++; //skip '=' char
            }
            const QBitArray& WHITE_SPACES = TextUtils::WHITES;
            for (; valStart < flen && WHITE_SPACES[(uchar)cbuff[flen-1]]; flen--){}; //trim value
            const char* qname = cbuff + QN_COL;
            int qnameLen = valStart - (QN_COL + 1);
            const char* qval = cbuff + valStart;
            int qvalLen = flen - valStart;
            if (qnameLen == aminoQ.length() && TextUtils::equals(qname, aminoQ.constData(), qnameLen)) {
                //a->aminoFrame = qvalLen == aminoQYes.length() && TextUtils::equals(qval, aminoQYes.constData(), qvalLen) ? TriState_Yes
                //             :  (qvalLen == aminoQNo.length()  && TextUtils::equals(qval, aminoQNo.constData(), qvalLen) ? TriState_No : TriState_Unknown);
            } else if (qnameLen == nameQ.length() && TextUtils::equals(qname, nameQ.constData(), qnameLen)) {
                a->name = QString::fromLocal8Bit(qval, qvalLen);
            } else {
                QString nameQStr = QString::fromLocal8Bit(qname, qnameLen);
                QString valQStr = QString::fromLocal8Bit(qval, qvalLen);
                a->qualifiers.append(U2Qualifier(nameQStr, valQStr));
            }
        }
    }
    return f;
}
Пример #8
0
Task* DNAStatWorker::tick() {
    while (input->hasMessage()) {
        Message inputMessage = getMessageAndSetupScriptValues(input);
        if (inputMessage.isEmpty()) {
            output->transit();
            return NULL;
        }
        QVariantMap qm = inputMessage.getData().toMap();
        SharedDbiDataHandler seqId = qm.value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<SharedDbiDataHandler>();
        QScopedPointer<U2SequenceObject> seqObj(StorageUtils::getSequenceObject(context->getDataStorage(), seqId));
        if (NULL == seqObj.data()) {
            return NULL;
        }
        U2OpStatusImpl os;
        DNASequence dna = seqObj->getWholeSequence(os);
        CHECK_OP(os, new FailTask(os.getError()));

        if(!dna.alphabet->isNucleic()) {
            return new FailTask(tr("Sequence must be nucleotide"));
        }

        QList<SharedAnnotationData> res;
        SharedAnnotationData gcAnn(new AnnotationData);
        gcAnn->name = "statistics";
        gcAnn->location->regions << U2Region( 0, dna.seq.size());

        if (actor->getParameter(GCCONTENT)->getAttributeValue<bool>(context)) {
            float gcContent = calcGCContent(dna.seq);
            gcAnn->qualifiers.push_back(U2Qualifier("gc-content", QString::number(gcContent*100) + "%"));
        }

        if (actor->getParameter(GC1CONTENT)->getAttributeValue<bool>(context)) {
            float gc1Content = calcGC1Content(dna.seq);
            gcAnn->qualifiers.push_back(U2Qualifier("gc1-content", QString::number(gc1Content*100) + "%"));
        }

        if (actor->getParameter(GC2CONTENT)->getAttributeValue<bool>(context)) {
            float gc2Content = calcGC2Content(dna.seq);
            gcAnn->qualifiers.push_back(U2Qualifier("gc2-content", QString::number(gc2Content*100) + "%"));
        }

        if (actor->getParameter(GC3CONTENT)->getAttributeValue<bool>(context)) {
            float gc3Content = calcGC3Content(dna.seq);
            gcAnn->qualifiers.push_back(U2Qualifier("gc3-content", QString::number(gc3Content*100) + "%"));
        }

        if (gcAnn->qualifiers.isEmpty()) {
            return new FailTask(tr("No statistics was selected"));
        }

        res << gcAnn;

        const SharedDbiDataHandler tableId = context->getDataStorage()->putAnnotationTable(res);
        const QVariant v = qVariantFromValue<SharedDbiDataHandler>(tableId);
        output->put(Message(BaseTypes::ANNOTATION_TABLE_TYPE(), v));
    }
    if (input->isEnded()) {
        setDone();
        output->setEnded();
    }
    return NULL;
}
void ReadCSVAsAnnotationsTask::run() {
    GUrl url(file);
    IOAdapterId ioId = IOAdapterUtils::url2io(url);
    IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(ioId);
    QScopedPointer<IOAdapter> io(iof->createIOAdapter());

    if (!io->open(url, IOAdapterMode_Read)) {
        setError(L10N::errorOpeningFileRead(url));
        return;
    }

    QByteArray block(BUFF_SIZE, '\0');
    int blockLen = 0;
    QString text;
    while ((blockLen = io->readBlock(block.data(), BUFF_SIZE)) > 0) {
        int sizeBefore = text.length();
        QString line = QString::fromLocal8Bit(block.data(), blockLen);
        text.append(line);
        if (text.length() != sizeBefore + line.length()) {
            setError(L10N::errorReadingFile(url));
        }
        stateInfo.progress = io->getProgress();
    }
    int maxColumns = 0;
    QList<QStringList> parsedLines = parseLinesIntoTokens(text, config, maxColumns, stateInfo);

    foreach (const QStringList &lineTokens, parsedLines) {
        SharedAnnotationData a(new AnnotationData);
        bool ok = true;
        QString error;
        int startPos = -1;
        int startPosOffset = 0;
        int len = -1;
        int endPos = -1;
        QString groupName;

        for (int column = 0; column < lineTokens.size() && ok; column++) {
            if (column >= config.columns.size()) {
                break;
            }
            const ColumnConfig& columnConf = config.columns.at(column);
            const QString& token = lineTokens.at(column);
            switch(columnConf.role) {
                case ColumnRole_Qualifier:
                    assert(!columnConf.qualifierName.isEmpty());
                    a->qualifiers.append(U2Qualifier(columnConf.qualifierName, token));
                    break;
                case ColumnRole_Name:
                    a->name = token.isEmpty() ? config.defaultAnnotationName : token;
                    ok = Annotation::isValidAnnotationName(a->name);
                    if (!ok) {
                        error = tr("Invalid annotation name: '%1'").arg(a->name);
                    }
                    break;
                case ColumnRole_StartPos:
                    assert(startPos == -1);
                    startPos = token.toInt(&ok) - 1;
                    startPosOffset = columnConf.startPositionOffset;
                    if (!ok) {
                        error = tr("Start offset is not numeric: '%1'").arg(token);
                    }
                    break;
                case ColumnRole_EndPos:
                    assert(endPos == -1);
                    endPos = token.toInt(&ok) + (columnConf.endPositionIsInclusive ? 1 : 0) - 1;
                    if (!ok) {
                        error = tr("End offset is not numeric: '%1'").arg(token);
                    }
                    break;
                case ColumnRole_Length:
                    assert(len == -1);
                    len = token.toInt(&ok);
                    if (!ok) {
                        error = tr("Length is not numeric: '%1'").arg(token);
                    }
                    break;
                case ColumnRole_ComplMark:
                    a->location->strand = (columnConf.complementMark.isEmpty() || token == columnConf.complementMark)
                        ? U2Strand::Complementary : U2Strand::Direct;
                    break;
                case ColumnRole_Group:
                    groupName = token;
                    break;
                default:
                    assert(columnConf.role == ColumnRole_Ignore);
            }
        }

        //add annotation
        if (ok) {
            //set up default name
            if (a->name.isEmpty()) {
                a->name = config.defaultAnnotationName;
            }
            //set up location
            U2Region location;
            if (startPos != -1) {
                location.startPos = startPos + startPosOffset;
                if (endPos != -1) {
                    location.length = endPos - startPos;
                } else {
                    location.length = len;
                }
            } else {
                location.length = len;
                location.startPos = endPos - len;
            }
            if (location.length < 0) {
                location.startPos = location.startPos + location.length;
                location.length = - location.length;
            }
            if (location.startPos < 0 || location.startPos > location.endPos()) {
                algoLog.details(tr("Invalid location: start: %1  len: %2, in line :%3, ignoring")
                    .arg(QString::number(location.startPos)).arg(QString::number(location.length)).arg(lineTokens.join(config.splitToken)));
            } else {
                a->location->regions.append(location);
                result[groupName] << a;
            }
        } else {
            //TODO: make configurable to allow stop parsing on any error!
            algoLog.details(tr("Can't parse line: '%1', error = %2, ignoring").arg(lineTokens.join(config.splitToken)).arg(error));
        }
    }