Exemplo n.º 1
0
int main(int argv, char* argc[]){
	int error = 0;
	while(!eof && readBuff() > 0){	
		if(findEndOfString() == -1){
			error = 1;
			clean();		
		} else{
			int i;
			while((i = findEndOfString()) != -1){
				if(error == 1){
					error = 0;
				} else {
					printReverse(0, i);
				}
				normalize(i + 1);
				size -= i + 1;
			}
		}
	}
	return 0;
}
Exemplo n.º 2
0
void PDBFormat::PDBParser::parseBioStruct3D(BioStruct3D& biostruct, U2OpStatus& ti) {

    QByteArray readBuff(DocumentFormat::READ_BUFF_SIZE + 1, 0);
    char* buf = readBuff.data();
    qint64 len = 0;
    bool firstCompndLine = true;
    while (!ti.isCoR()) {

        bool lineOk = true;

        len = io->readUntil(buf, DocumentFormat::READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk);
        if (len == 0) {
            break;
        }

        // there could be no terminator if this is end of file, so we have to check for this
        if (!lineOk && !io->isEof()) {
            ti.setError(U2::PDBFormat::tr("Line is too long"));
            return;
        }
        currentPDBLine = QString(QByteArray(buf, len));

        ti.setProgress(io->getProgress() * 0.8);

        if (currentPDBLine.startsWith("HEADER")) {
            parseHeader(biostruct, ti);
            continue;
        }

        if (currentPDBLine.startsWith("COMPND")) {
            parseMacromolecularContent(firstCompndLine, ti);
            firstCompndLine = false;
            continue;
        }

        if (currentPDBLine.startsWith("SEQRES")) {
            parseSequence(biostruct, ti);
            continue;
        }


        if (currentPDBLine.startsWith("HELIX ") || currentPDBLine.startsWith("SHEET ")
            || currentPDBLine.startsWith("TURN  ")) {
                parseSecondaryStructure(biostruct, ti);
                continue;
        }


        if (currentPDBLine.startsWith("ATOM  ") || currentPDBLine.startsWith("HETATM")) {
            parseAtom(biostruct, ti);
            continue;
        }

        if (currentPDBLine.startsWith("TER")) {
            ++currentChainIndex;
            continue;
        }

        if (currentPDBLine.startsWith("SPLIT ")) {
            parseSplitSection(ti);
            continue;
        }

        if (currentPDBLine.startsWith("MODEL")) {
            currentChainIndex = 1;
            parseModel(biostruct, ti);
            continue;
        }

        if (currentPDBLine.startsWith("ENDMDL")) {
            flagMultipleModels = true;
            ++currentModelIndex;
            continue;
        }
    }

    CHECK_OP(ti,);

    if (!flagAtomRecordPresent) {
        ti.setError(U2::PDBFormat::tr("Some mandatory records are absent"));
    }

    updateSecStructChainIndexes(biostruct);

}
Exemplo n.º 3
0
static void load(IOAdapter* io, const U2DbiRef& dbiRef, const QVariantMap& fs, QList<GObject*>& objects,
                 int gapSize, QString& writeLockReason, U2OpStatus& os)
{
    DbiOperationsBlock opBlock(dbiRef, os);
    CHECK_OP(os, );
    Q_UNUSED(opBlock);

    static char fastaCommentStartChar = FastaFormat::FASTA_COMMENT_START_SYMBOL;

    MemoryLocker memoryLocker(os, 1);
    CHECK_OP(os, );

    writeLockReason.clear();
    QByteArray readBuff(DocumentFormat::READ_BUFF_SIZE + 1, 0);
    char* buff = readBuff.data();
    qint64 len = 0;

    bool merge = gapSize != -1;
    QStringList headers;
    QSet<QString> uniqueNames;
    QVector<U2Region> mergedMapping;

    // for lower case annotations
    GObjectReference sequenceRef;

    //skip leading whites if present
    bool lineOk = true;
    static QBitArray nonWhites = ~TextUtils::WHITES;
    io->readUntil(buff, DocumentFormat::READ_BUFF_SIZE, nonWhites, IOAdapter::Term_Exclude, &lineOk);
    CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

    U2SequenceImporter seqImporter(fs, true);
    const QString folder = fs.value(DocumentFormat::DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();

    qint64 sequenceStart = 0;
    int sequenceNumber = 0;
    DbiConnection con(dbiRef, os);
    bool headerReaded = false;
    QStringList emptySeqNames;

    const int objectsCountLimit = fs.contains(DocumentReadingMode_MaxObjectsInDoc) ? fs[DocumentReadingMode_MaxObjectsInDoc].toInt() : -1;
    const bool settingsMakeUniqueName = !fs.value(DocumentReadingMode_DontMakeUniqueNames, false).toBool();
    while (!os.isCoR()) {
        //skip start comments and read header
        if(!headerReaded){
            do{
                len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE);
                CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
            }while(buff[0] == fastaCommentStartChar && len > 0);
        }

        if (len == 0 && io->isEof()) { //end if stream
            break;
        }
        CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
        CHECK_EXT_BREAK(lineOk, os.setError(FastaFormat::tr("Line is too long")));

        QString headerLine = QString(QByteArray(buff+1, len-1)).trimmed();
        CHECK_EXT_BREAK(buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL, os.setError(FastaFormat::tr("First line is not a FASTA header")));

        //read sequence
        if (sequenceNumber == 0 || !merge) {
            QString objName = headerLine;
            if(objName.isEmpty()){
                objName = "Sequence";
            }
            if (settingsMakeUniqueName) {
                objName = (merge) ? "Sequence" : TextUtils::variate(objName, "_", uniqueNames);
                objName.squeeze();
                memoryLocker.tryAcquire(2*objName.size());
                CHECK_OP_BREAK(os);
                uniqueNames.insert(objName);
            }
            seqImporter.startSequence(os, dbiRef, folder, objName, false);
            CHECK_OP_BREAK(os);

            sequenceRef = GObjectReference(io->getURL().getURLString(), objName, GObjectTypes::SEQUENCE);
        }
        if (sequenceNumber >= 1 && merge) {
            seqImporter.addDefaultSymbolsBlock(gapSize, os);
            sequenceStart += gapSize;
            CHECK_OP_BREAK(os);
        }
        int sequenceLen = 0;
        while (!os.isCoR()) {
            do {
                len = io->readLine(buff, DocumentFormat::READ_BUFF_SIZE);
                CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );
            } while (len <= 0 && !io->isEof());
            CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

            if (len <= 0 && io->isEof()) {
                break;
            }
            CHECK_EXT(!io->hasError(), os.setError(io->errorString()), );

            buff[len] = 0;

            if(buff[0] != fastaCommentStartChar && buff[0] != FastaFormat::FASTA_HEADER_START_SYMBOL){
                len = TextUtils::remove(buff, len, TextUtils::WHITES);
                if(len > 0){
                    seqImporter.addBlock(buff, len, os);
                    sequenceLen += len;
                }
            }else if( buff[0] == FastaFormat::FASTA_HEADER_START_SYMBOL){
                headerReaded = true;
                break;
            }

            CHECK_OP_BREAK(os);
            os.setProgress(io->getProgress());
        }

        if (merge) {
            memoryLocker.tryAcquire(headerLine.size());
            CHECK_OP_BREAK(os);
            headers.append(headerLine);
            mergedMapping.append(U2Region(sequenceStart, sequenceLen));
        } else {
            if (objectsCountLimit > 0 && objects.size() >= objectsCountLimit) {
                os.setError(FastaFormat::tr("File \"%1\" contains too many sequences to be displayed. "
                    "However, you can process these data using instruments from the menu <i>Tools -> NGS data analysis</i> "
                    "or pipelines built with Workflow Designer.")
                    .arg(io->getURL().getURLString()));
                break;
            }
            memoryLocker.tryAcquire(800);
            CHECK_OP_BREAK(os);
            U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os);
            if (os.hasError() && os.getError() == U2SequenceImporter::EMPTY_SEQUENCE_ERROR) {
                os.setError("");
                emptySeqNames << headerLine;
                continue;
            }
            sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id);

            //TODO parse header
            U2StringAttribute attr(seq.id, DNAInfo::FASTA_HDR, headerLine);
            con.dbi->getAttributeDbi()->createStringAttribute(attr, os);
            CHECK_OP_BREAK(os);

            objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id));
            CHECK_OP_BREAK(os);

            U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs);
        }
        sequenceStart += sequenceLen;
        sequenceNumber++;
        ioLog.trace(QString("Sequence #%1 is processed").arg(sequenceNumber));
    }

    CHECK_OP_EXT(os, qDeleteAll(objects); objects.clear(), );
    CHECK_EXT(!objects.isEmpty() || merge, os.setError(Document::tr("Document is empty.")), );
    SAFE_POINT(headers.size() == mergedMapping.size(), "headers <-> regions mapping failed!", );
    ioLog.trace("All sequences are processed");

    if (!emptySeqNames.isEmpty()) {
        QString warningMessage;
        warningMessage.append(FastaFormat::tr("Loaded sequences: %1.\n").arg(sequenceNumber));
        warningMessage.append(FastaFormat::tr("Skipped sequences: %1.\n").arg(emptySeqNames.size()));
        warningMessage.append(FastaFormat::tr("The following sequences are empty:\n%1").arg(emptySeqNames.join(",\n")));
        os.addWarning(warningMessage);
    }

    if (!merge) {
        return;
    }

    U2Sequence seq = seqImporter.finalizeSequenceAndValidate(os);
    CHECK_OP(os, );
    sequenceRef.entityRef = U2EntityRef(dbiRef, seq.id);

    U1AnnotationUtils::addAnnotations(objects, seqImporter.getCaseAnnotations(), sequenceRef, NULL, fs);
    objects << new U2SequenceObject(seq.visualName, U2EntityRef(dbiRef, seq.id));
    objects << DocumentFormatUtils::addAnnotationsForMergedU2Sequence( sequenceRef, dbiRef, headers, mergedMapping, fs );
    if (headers.size() > 1) {
        writeLockReason = QObject::tr("Document sequences were merged");
    }
}