MultipleSequenceAlignmentRowData::MultipleSequenceAlignmentRowData(const U2MsaRow &rowInDb, const QString &rowName, const QByteArray &rawData, MultipleSequenceAlignmentData *msaData)
    : MultipleAlignmentRowData(),
      alignment(msaData),
      initialRowInDb(rowInDb)
{
    QByteArray sequenceData;
    U2MsaRowGapModel gapModel;
    MaDbiUtils::splitBytesToCharsAndGaps(rawData, sequenceData, gapModel);
    sequence = DNASequence(rowName, sequenceData);
    setGapModel(gapModel);
}
Esempio n. 2
0
void
readFasta(istream& istr, SQDict& dict, bool parseSeqOffset)
{
  string name;
  vector<char> buffer;

  while (true) {
    string s;
    getline(istr, s);
    if (istr.bad()) {
      cerr << "error reading SAM file" << endl;
      exit(1);
    }
    if (istr.eof() or s[0] == '>') {
      if (name.length() > 0) {
	// add previous sequence
	long long int seqOffset = 0;
	if (parseSeqOffset) {
	  int j = name.find(':');
	  if (j < (int)name.length()) {
	    seqOffset = atoll(&name.c_str()[j + 1]) - 1;
	    name = name.substr(0, j);
	  }
	}
	Contig& c = dict[name];
	if (c.name.length() == 0) {
	  c.name = name;
	  c.len = buffer.size();
	  c.idx = dict.size() - 1;
	  c.seq[0] = DNASequence(buffer.begin(), buffer.end());
	  c.seqOffset[0] = seqOffset;
	  cerr << "added contig [" << c.name << "] of length [" << c.len << "]"
	       << " with start offset [" << c.seqOffset[0] << "]" << endl;
	}
      }
      if (istr.eof())
	break;
      int stop = s.find(" ")-1;
      if (stop > 0) {
      	name = s.substr(1,stop);
      } else {
      	name = s.substr(1);
      }
      buffer.clear();
    } else {
      buffer.insert(buffer.end(), s.begin(), s.end());
    }
  }
}
Esempio n. 3
0
QList<Task*> DNASequenceGeneratorTask::onGenerateTaskFinished( ) {
    QList<Task *> resultTasks;
    SAFE_POINT( generateTask->isFinished( ) && !generateTask->getStateInfo( ).isCoR( ),
        "Invalid task encountered", resultTasks );
    IOAdapterFactory *iof = AppContext::getIOAdapterRegistry( )->getIOAdapterFactoryById(
        IOAdapterUtils::url2io( cfg.getOutUrlString( ) ) );

    if (cfg.saveDoc) {
        DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(cfg.getDocumentFormatId());
        Document* doc = format->createNewLoadedDocument(iof, cfg.getOutUrlString(), stateInfo);
        CHECK_OP( stateInfo, resultTasks );
        const QSet<QString> &supportedFormats = format->getSupportedObjectTypes( );
        const bool isSequenceFormat = supportedFormats.contains( GObjectTypes::SEQUENCE );
        if (  isSequenceFormat) {
            addSequencesToSeqDoc( doc );
        } else { // consider alignment format
            SAFE_POINT( supportedFormats.contains( GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT ),
                "Unexpected format encountered", resultTasks );
            addSequencesToMsaDoc( doc );
        }
        saveTask = new SaveDocumentTask(doc, SaveDoc_Overwrite);
        resultTasks << saveTask;
    } else { // TODO: avoid high memory consumption here
        const DNAAlphabet *alp = cfg.getAlphabet( );
        SAFE_POINT( NULL != alp, "Generated sequence has invalid alphabet", resultTasks );
        const U2DbiRef dbiRef = generateTask->getDbiRef( );
        const QString baseSeqName = cfg.getSequenceName( );
        QList<U2Sequence> seqs = generateTask->getResults( );

        for ( int sequenceNum = 0, totalSeqCount = seqs.size( ); sequenceNum < totalSeqCount;
            ++sequenceNum )
        {
            const QString seqName = ( 1 < totalSeqCount )
                ? ( baseSeqName + " " + QString::number( sequenceNum + 1 ) ) : baseSeqName;

            DbiConnection con( dbiRef, stateInfo );
            CHECK_OP( stateInfo, resultTasks );
            const QByteArray seqContent = con.dbi->getSequenceDbi( )->getSequenceData(
                seqs[sequenceNum].id, U2_REGION_MAX, stateInfo );
            results << DNASequence( seqName, seqContent, alp );
        }
    }
    return resultTasks;
}