MultipleSequenceAlignmentRowData::MultipleSequenceAlignmentRowData(const U2MsaRow &rowInDb, const QString &rowName, const QByteArray &rawData, MultipleSequenceAlignmentData *msaData) : MultipleAlignmentRowData(), alignment(msaData), initialRowInDb(rowInDb) { QByteArray sequenceData; U2MsaRowGapModel gapModel; MaDbiUtils::splitBytesToCharsAndGaps(rawData, sequenceData, gapModel); sequence = DNASequence(rowName, sequenceData); setGapModel(gapModel); }
void readFasta(istream& istr, SQDict& dict, bool parseSeqOffset) { string name; vector<char> buffer; while (true) { string s; getline(istr, s); if (istr.bad()) { cerr << "error reading SAM file" << endl; exit(1); } if (istr.eof() or s[0] == '>') { if (name.length() > 0) { // add previous sequence long long int seqOffset = 0; if (parseSeqOffset) { int j = name.find(':'); if (j < (int)name.length()) { seqOffset = atoll(&name.c_str()[j + 1]) - 1; name = name.substr(0, j); } } Contig& c = dict[name]; if (c.name.length() == 0) { c.name = name; c.len = buffer.size(); c.idx = dict.size() - 1; c.seq[0] = DNASequence(buffer.begin(), buffer.end()); c.seqOffset[0] = seqOffset; cerr << "added contig [" << c.name << "] of length [" << c.len << "]" << " with start offset [" << c.seqOffset[0] << "]" << endl; } } if (istr.eof()) break; int stop = s.find(" ")-1; if (stop > 0) { name = s.substr(1,stop); } else { name = s.substr(1); } buffer.clear(); } else { buffer.insert(buffer.end(), s.begin(), s.end()); } } }
QList<Task*> DNASequenceGeneratorTask::onGenerateTaskFinished( ) { QList<Task *> resultTasks; SAFE_POINT( generateTask->isFinished( ) && !generateTask->getStateInfo( ).isCoR( ), "Invalid task encountered", resultTasks ); IOAdapterFactory *iof = AppContext::getIOAdapterRegistry( )->getIOAdapterFactoryById( IOAdapterUtils::url2io( cfg.getOutUrlString( ) ) ); if (cfg.saveDoc) { DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(cfg.getDocumentFormatId()); Document* doc = format->createNewLoadedDocument(iof, cfg.getOutUrlString(), stateInfo); CHECK_OP( stateInfo, resultTasks ); const QSet<QString> &supportedFormats = format->getSupportedObjectTypes( ); const bool isSequenceFormat = supportedFormats.contains( GObjectTypes::SEQUENCE ); if ( isSequenceFormat) { addSequencesToSeqDoc( doc ); } else { // consider alignment format SAFE_POINT( supportedFormats.contains( GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT ), "Unexpected format encountered", resultTasks ); addSequencesToMsaDoc( doc ); } saveTask = new SaveDocumentTask(doc, SaveDoc_Overwrite); resultTasks << saveTask; } else { // TODO: avoid high memory consumption here const DNAAlphabet *alp = cfg.getAlphabet( ); SAFE_POINT( NULL != alp, "Generated sequence has invalid alphabet", resultTasks ); const U2DbiRef dbiRef = generateTask->getDbiRef( ); const QString baseSeqName = cfg.getSequenceName( ); QList<U2Sequence> seqs = generateTask->getResults( ); for ( int sequenceNum = 0, totalSeqCount = seqs.size( ); sequenceNum < totalSeqCount; ++sequenceNum ) { const QString seqName = ( 1 < totalSeqCount ) ? ( baseSeqName + " " + QString::number( sequenceNum + 1 ) ) : baseSeqName; DbiConnection con( dbiRef, stateInfo ); CHECK_OP( stateInfo, resultTasks ); const QByteArray seqContent = con.dbi->getSequenceDbi( )->getSequenceData( seqs[sequenceNum].id, U2_REGION_MAX, stateInfo ); results << DNASequence( seqName, seqContent, alp ); } } return resultTasks; }