QList<SharedAnnotationData> FindTandemsToAnnotationsTask::importTandemAnnotations(const QList<Tandem>& tandems, qint64 seqStart, const bool showOverlapped) { seqStart += s.reportSeqShift; QList<SharedAnnotationData> res; foreach(const Tandem& tan, tandems) { unsigned offset = 0; const unsigned maxOffset = tan.size % tan.repeatLen; do{ SharedAnnotationData ad(new AnnotationData()); ad->type = U2FeatureTypes::RepeatRegion; ad->name = annName; const quint32 tandemEnd = tan.offset+tan.size+seqStart; quint32 pos = tan.offset+seqStart+offset; for(; pos<=tandemEnd-tan.repeatLen; pos+=tan.repeatLen){ ad->location->regions << U2Region(pos, tan.repeatLen); } if (ad->location->isEmpty()){ continue; } ad->qualifiers.append(U2Qualifier("num_of_repeats", QString::number(tan.size / tan.repeatLen))); ad->qualifiers.append(U2Qualifier("repeat_length", QString::number(tan.repeatLen))); ad->qualifiers.append(U2Qualifier("whole_length", QString::number(tan.size))); U1AnnotationUtils::addDescriptionQualifier(ad, annDescription); res.append(ad); offset++; }while(showOverlapped && offset<=maxOffset); }
AnnotationTableObject * DocumentFormatUtils::addAnnotationsForMergedU2Sequence(const GObjectReference& mergedSequenceRef, const U2DbiRef& dbiRef, const QStringList &contigNames, const QVector<U2Region> &mergedMapping, const QVariantMap& hints) { QVariantMap objectHints; objectHints.insert(DocumentFormat::DBI_FOLDER_HINT, hints.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER)); AnnotationTableObject *ao = new AnnotationTableObject("Contigs", dbiRef, objectHints); // save relation if mergedSequenceRef is valid if (mergedSequenceRef.isValid()) { ao->addObjectRelation(GObjectRelation(mergedSequenceRef, ObjectRole_Sequence)); } //save mapping info as annotations QStringList::const_iterator it = contigNames.begin(); QList<SharedAnnotationData> resultData; for (int i = 0; it != contigNames.end(); i++, it++) { SharedAnnotationData d(new AnnotationData); d->name = QString("contig"); d->location->regions << mergedMapping[i]; d->qualifiers << U2Qualifier("name", *it); d->qualifiers << U2Qualifier("number", QString("%1").arg(i)); resultData.append(d); } ao->addAnnotations(resultData); return ao; }
QList<SharedAnnotationData> SecStructPredictUtils::saveAlgorithmResultsAsAnnotations(const QByteArray& predicted, const QString& annotationName) { char emptyCoil = 'C'; int numAcronyms = predicted.length(); QList<SharedAnnotationData> predictedStructures; char prevChar = predicted.at(0); int lastRecordedPos = 0; for (int i = 1; i < numAcronyms; ++i) { char curChar = predicted.at(i); if ((curChar != prevChar) || (i == numAcronyms - 1)) { if (prevChar != emptyCoil) { SharedAnnotationData sd(new AnnotationData); sd->type = U2FeatureTypes::SeconadaryStructure; sd->name = annotationName; sd->location->regions.append(U2Region(lastRecordedPos, i - lastRecordedPos)); sd->qualifiers.append(U2Qualifier(BioStruct3D::SecStructTypeQualifierName, getStructNameForCharTag(prevChar))); predictedStructures.append(sd); } lastRecordedPos = i; } prevChar = curChar; } return predictedStructures; }
SharedAnnotationData Peak2GeneFormatLoader::parseLine(const QStringList &lineValues) { SharedAnnotationData data(new AnnotationData); CHECK_EXT(lineValues.size() == COLUMNS_COUNT, skipLine = true; os.addWarning(QString("Incorrect columns count at line %1: expect %2, got %3") .arg(currentLineNumber).arg(COLUMNS_COUNT).arg(lineValues.size())), data); data->qualifiers << U2Qualifier("chrom", getChromName(lineValues)); CHECK(!skipLine, data); data->location->regions << getRegion(lineValues); CHECK(!skipLine, data); data->name = getPeakName(lineValues); CHECK(!skipLine, data); data->qualifiers << U2Qualifier("score", getPeakScore(lineValues)); CHECK(!skipLine, data); data->qualifiers << U2Qualifier("NA", getNa(lineValues)); CHECK(!skipLine, data); data->qualifiers << U2Qualifier("Genes", getGenes(lineValues)); CHECK(!skipLine, data); data->qualifiers << U2Qualifier("Strand", getStrand(lineValues)); CHECK(!skipLine, data); data->qualifiers << U2Qualifier("TSS2pCenter", getTss2pCenter(lineValues)); CHECK(!skipLine, data); return data; }
void EditQualifierDialog::accept() { if (ui->nameEdit->isReadOnly()) { QDialog::accept(); return; } QString name = simplify(ui->nameEdit->text()); QString val = simplify(ui->valueEdit->toPlainText()); if (!Annotation::isValidQualifierName(name)) { QMessageBox::critical(this, tr("Error!"), tr("Illegal qualifier name")); return; } if (!Annotation::isValidQualifierValue(val)) { QMessageBox::critical(this, tr("Error!"), tr("Illegal qualifier value")); return; } q = U2Qualifier(name, val); QDialog::accept(); }
SharedAnnotationData FindAlgorithmResult::toAnnotation(const QString &name, bool splitCircular, int seqLen) const { SAFE_POINT(!splitCircular || (seqLen != -1), "Sequence length is not set!", SharedAnnotationData()); SharedAnnotationData data(new AnnotationData); data->name = name; if (splitCircular && (region.endPos() > seqLen) ) { if (region.startPos >= seqLen) { data->location->regions << U2Region(region.startPos - seqLen, region.length); } else { SAFE_POINT(region.startPos < seqLen, "Region is not correct", SharedAnnotationData()); data->location->regions << U2Region(region.startPos, seqLen - region.startPos); data->location->regions << U2Region(0, region.length - (seqLen - region.startPos)); } } else { data->location->regions << region; } data->setStrand(strand); data->qualifiers.append(U2Qualifier("mismatches", QString::number(err))); return data; }
SharedAnnotationData SwissProtPlainTextFormat::readAnnotation(IOAdapter* io, char* cbuff, int len, int READ_BUFF_SIZE, U2OpStatus& si, int offset, int seqLen){ Q_UNUSED(seqLen); AnnotationData* a = new AnnotationData(); SharedAnnotationData f(a); QString key = QString::fromLatin1(cbuff+5, 10).trimmed(); if (key.isEmpty()) { si.setError(EMBLGenbankAbstractDocument::tr("Annotation name is empty")); return SharedAnnotationData(); } a->name = key; if(key == "STRAND" || key == "HELIX" || key == "TURN"){ a->qualifiers.append(U2Qualifier(GBFeatureUtils::QUALIFIER_GROUP, "Secondary structure")); } QString start= QString::fromLatin1(cbuff+15, 5).trimmed(); if(start.isEmpty()){ si.setError(EMBLGenbankAbstractDocument::tr("Annotation start position is empty")); return SharedAnnotationData(); } QString end= QString::fromLatin1(cbuff+22, 5).trimmed(); if(end.isEmpty()){ si.setError(EMBLGenbankAbstractDocument::tr("Annotation end position is empty")); return SharedAnnotationData(); } a->location->reset(); if(key == "DISULFID" && start != end){ a->location->op=U2LocationOperator_Order; U2Region reg1(start.toInt()-1,1); U2Region reg2(end.toInt()-1,1); a->location->regions.append(reg1); a->location->regions.append(reg2); }else{ U2Region reg(start.toInt()-1,end.toInt() - start.toInt()+1); a->location->regions.append(reg); } if (offset!=0) { U2Region::shift(offset, a->location->regions); } QString valQStr = QString::fromLatin1(cbuff).split(QRegExp("\\n")).first().mid(34); QString nameQStr = "Description"; bool isDescription=true; const QByteArray& aminoQ = GBFeatureUtils::QUALIFIER_AMINO_STRAND; const QByteArray& nameQ = GBFeatureUtils::QUALIFIER_NAME; //here we have valid key and location; //reading qualifiers bool lineOk = true; while ((len = io->readUntil(cbuff, READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk) ) > 0) { if (len == 0 || len < QN_COL+1 || cbuff[K_COL]!=' ' || cbuff[0]!=fPrefix[0] || cbuff[1]!=fPrefix[1]) { io->skip(-len); if(isDescription && !valQStr.isEmpty()){ isDescription=false; a->qualifiers.append(U2Qualifier(nameQStr, valQStr)); } break; } if (!lineOk) { si.setError(EMBLGenbankAbstractDocument::tr("Unexpected line format")); break; } //parse line if(cbuff[A_COL] != '/'){//continue of description valQStr.append(" "); valQStr.append(QString::fromLatin1(cbuff).split(QRegExp("\\n")).takeAt(0).mid(34)); }else{ for (; QN_COL < len && TextUtils::LINE_BREAKS[(uchar)cbuff[len-1]]; len--){}; //remove line breaks int flen = len + readMultilineQualifier(io, cbuff, READ_BUFF_SIZE-len, len == maxAnnotationLineLen, len, si); //now the whole feature is in cbuff int valStart = A_COL + 1; for (; valStart < flen && cbuff[valStart] != '='; valStart++){}; //find '==' and valStart if (valStart < flen) { valStart++; //skip '=' char } const QBitArray& WHITE_SPACES = TextUtils::WHITES; for (; valStart < flen && WHITE_SPACES[(uchar)cbuff[flen-1]]; flen--){}; //trim value const char* qname = cbuff + QN_COL; int qnameLen = valStart - (QN_COL + 1); const char* qval = cbuff + valStart; int qvalLen = flen - valStart; if (qnameLen == aminoQ.length() && TextUtils::equals(qname, aminoQ.constData(), qnameLen)) { //a->aminoFrame = qvalLen == aminoQYes.length() && TextUtils::equals(qval, aminoQYes.constData(), qvalLen) ? TriState_Yes // : (qvalLen == aminoQNo.length() && TextUtils::equals(qval, aminoQNo.constData(), qvalLen) ? TriState_No : TriState_Unknown); } else if (qnameLen == nameQ.length() && TextUtils::equals(qname, nameQ.constData(), qnameLen)) { a->name = QString::fromLocal8Bit(qval, qvalLen); } else { QString nameQStr = QString::fromLocal8Bit(qname, qnameLen); QString valQStr = QString::fromLocal8Bit(qval, qvalLen); a->qualifiers.append(U2Qualifier(nameQStr, valQStr)); } } } return f; }
Task* DNAStatWorker::tick() { while (input->hasMessage()) { Message inputMessage = getMessageAndSetupScriptValues(input); if (inputMessage.isEmpty()) { output->transit(); return NULL; } QVariantMap qm = inputMessage.getData().toMap(); SharedDbiDataHandler seqId = qm.value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<SharedDbiDataHandler>(); QScopedPointer<U2SequenceObject> seqObj(StorageUtils::getSequenceObject(context->getDataStorage(), seqId)); if (NULL == seqObj.data()) { return NULL; } U2OpStatusImpl os; DNASequence dna = seqObj->getWholeSequence(os); CHECK_OP(os, new FailTask(os.getError())); if(!dna.alphabet->isNucleic()) { return new FailTask(tr("Sequence must be nucleotide")); } QList<SharedAnnotationData> res; SharedAnnotationData gcAnn(new AnnotationData); gcAnn->name = "statistics"; gcAnn->location->regions << U2Region( 0, dna.seq.size()); if (actor->getParameter(GCCONTENT)->getAttributeValue<bool>(context)) { float gcContent = calcGCContent(dna.seq); gcAnn->qualifiers.push_back(U2Qualifier("gc-content", QString::number(gcContent*100) + "%")); } if (actor->getParameter(GC1CONTENT)->getAttributeValue<bool>(context)) { float gc1Content = calcGC1Content(dna.seq); gcAnn->qualifiers.push_back(U2Qualifier("gc1-content", QString::number(gc1Content*100) + "%")); } if (actor->getParameter(GC2CONTENT)->getAttributeValue<bool>(context)) { float gc2Content = calcGC2Content(dna.seq); gcAnn->qualifiers.push_back(U2Qualifier("gc2-content", QString::number(gc2Content*100) + "%")); } if (actor->getParameter(GC3CONTENT)->getAttributeValue<bool>(context)) { float gc3Content = calcGC3Content(dna.seq); gcAnn->qualifiers.push_back(U2Qualifier("gc3-content", QString::number(gc3Content*100) + "%")); } if (gcAnn->qualifiers.isEmpty()) { return new FailTask(tr("No statistics was selected")); } res << gcAnn; const SharedDbiDataHandler tableId = context->getDataStorage()->putAnnotationTable(res); const QVariant v = qVariantFromValue<SharedDbiDataHandler>(tableId); output->put(Message(BaseTypes::ANNOTATION_TABLE_TYPE(), v)); } if (input->isEnded()) { setDone(); output->setEnded(); } return NULL; }
void ReadCSVAsAnnotationsTask::run() { GUrl url(file); IOAdapterId ioId = IOAdapterUtils::url2io(url); IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(ioId); QScopedPointer<IOAdapter> io(iof->createIOAdapter()); if (!io->open(url, IOAdapterMode_Read)) { setError(L10N::errorOpeningFileRead(url)); return; } QByteArray block(BUFF_SIZE, '\0'); int blockLen = 0; QString text; while ((blockLen = io->readBlock(block.data(), BUFF_SIZE)) > 0) { int sizeBefore = text.length(); QString line = QString::fromLocal8Bit(block.data(), blockLen); text.append(line); if (text.length() != sizeBefore + line.length()) { setError(L10N::errorReadingFile(url)); } stateInfo.progress = io->getProgress(); } int maxColumns = 0; QList<QStringList> parsedLines = parseLinesIntoTokens(text, config, maxColumns, stateInfo); foreach (const QStringList &lineTokens, parsedLines) { SharedAnnotationData a(new AnnotationData); bool ok = true; QString error; int startPos = -1; int startPosOffset = 0; int len = -1; int endPos = -1; QString groupName; for (int column = 0; column < lineTokens.size() && ok; column++) { if (column >= config.columns.size()) { break; } const ColumnConfig& columnConf = config.columns.at(column); const QString& token = lineTokens.at(column); switch(columnConf.role) { case ColumnRole_Qualifier: assert(!columnConf.qualifierName.isEmpty()); a->qualifiers.append(U2Qualifier(columnConf.qualifierName, token)); break; case ColumnRole_Name: a->name = token.isEmpty() ? config.defaultAnnotationName : token; ok = Annotation::isValidAnnotationName(a->name); if (!ok) { error = tr("Invalid annotation name: '%1'").arg(a->name); } break; case ColumnRole_StartPos: assert(startPos == -1); startPos = token.toInt(&ok) - 1; startPosOffset = columnConf.startPositionOffset; if (!ok) { error = tr("Start offset is not numeric: '%1'").arg(token); } break; case ColumnRole_EndPos: assert(endPos == -1); endPos = token.toInt(&ok) + (columnConf.endPositionIsInclusive ? 1 : 0) - 1; if (!ok) { error = tr("End offset is not numeric: '%1'").arg(token); } break; case ColumnRole_Length: assert(len == -1); len = token.toInt(&ok); if (!ok) { error = tr("Length is not numeric: '%1'").arg(token); } break; case ColumnRole_ComplMark: a->location->strand = (columnConf.complementMark.isEmpty() || token == columnConf.complementMark) ? U2Strand::Complementary : U2Strand::Direct; break; case ColumnRole_Group: groupName = token; break; default: assert(columnConf.role == ColumnRole_Ignore); } } //add annotation if (ok) { //set up default name if (a->name.isEmpty()) { a->name = config.defaultAnnotationName; } //set up location U2Region location; if (startPos != -1) { location.startPos = startPos + startPosOffset; if (endPos != -1) { location.length = endPos - startPos; } else { location.length = len; } } else { location.length = len; location.startPos = endPos - len; } if (location.length < 0) { location.startPos = location.startPos + location.length; location.length = - location.length; } if (location.startPos < 0 || location.startPos > location.endPos()) { algoLog.details(tr("Invalid location: start: %1 len: %2, in line :%3, ignoring") .arg(QString::number(location.startPos)).arg(QString::number(location.length)).arg(lineTokens.join(config.splitToken))); } else { a->location->regions.append(location); result[groupName] << a; } } else { //TODO: make configurable to allow stop parsing on any error! algoLog.details(tr("Can't parse line: '%1', error = %2, ignoring").arg(lineTokens.join(config.splitToken)).arg(error)); } }