예제 #1
0
MersenneTwister::MersenneTwister(const bool use_seed, size_t seed){
  if (!use_seed) seed = generateRandomSeed();
  this->construct_common(seed);
}
예제 #2
0
파일: Query.c 프로젝트: GregoryFaust/yaha
void readNextQuery(AlignmentArgs_t * AAs, QueryState_t * QS)
{
    // Lock the input file.
    flockfile(qFile);

    // Continue until we find a valid read or hit end of file.
    QS->queryLen = 0;
    while (TRUE)
    {
        // First read in the ">" or "@" line for the query ID.
        int charCount = 0;
        while (TRUE)
        {
            int curChar = fgetchar(qFile);
            // Check if we are done.
            if (curChar == '\n' || curChar == EOF) break;
            // Input the next char into the query ID buf (if it will fit),
            // Also convert spaces to underscores to aid further pipeline commands.
            if (charCount < MAX_QUERY_ID_LEN)
            {
                QS->queryID[charCount] = (curChar == ' ' ? '_' : curChar);
            }
            charCount += 1;
        }
        if (charCount > MAX_QUERY_ID_LEN)
        {
            fprintf(stderr, "Warning, Query Id length of %d exceeds maximum length %d.  Id will be truncated.\n",
                    charCount, MAX_QUERY_ID_LEN);
            QS->queryIDLen = MAX_QUERY_ID_LEN;
        }
        else
        {
            QS->queryIDLen = charCount;
        }

        // Now read in the query string.
        int reverseOff = AAs->maxQueryLength;
        char breakChar = AAs->fastq ? '+' : '>';
        int qbaseCount = 0;
        BOOL Fail = FALSE;
        while (TRUE)
        {
            int curChar = fgetchar(qFile);
            if (curChar == breakChar || curChar == EOF) break;
            if (curChar == '\n') continue;
            // Check to see that we are within bounds of maximum query length.
            if (qbaseCount >= AAs->maxQueryLength)
            {
                fprintf(stderr, "Warning.  Query sequence exceeds maximum length of %d.  Query will be skipped.\n",
                        AAs->maxQueryLength);
                // Throw away rest of query.
                readToChar(qFile, breakChar, FALSE);
                Fail = TRUE;
                break;
            }
            // process the next char of the query.
            // We fill both the forward and reverse query buffers as we go along.
            // In each case, store both the char for the base for output.
            // And also the 4-bit code for comparison to compressed reference.
            char forCode = map8to4(curChar);
            QS->forwardBuf[qbaseCount] = curChar;
            QS->forwardCodeBuf[qbaseCount] = forCode;
            reverseOff -= 1;
            char revCode = complement4to4(forCode);
            QS->reverseBufBase[reverseOff] = unmap4to8(revCode);
            QS->reverseCodeBufBase[reverseOff] = revCode;
            qbaseCount += 1;
        }
        // For a fastq file, we need to also find the quality string.
        int qualCount = 0;
        if (AAs->fastq)
        {
            // The '+' line can contain extra information.
            // First dump that.
            readToChar(qFile, '\n', FALSE);
            // Reading in the quality informtion is harder, as the @ needs to be preceded by a newline.
            // Given that '@' and newlines can appear in the quality string,
            // is even that good enough to ensure we are at the start of a new query?
            char prevChar = 0;
            while (TRUE)
            {
                int curChar = fgetchar(qFile);
                if ((curChar == '@' && prevChar == '\n') || curChar == EOF) break;
                prevChar = curChar;
                if (curChar == '\n') continue;
                if (qualCount >= AAs->maxQueryLength)
                {
                    fprintf(stderr, "Warning.  Quality score sequence exceeds maximum length of %d.  Query will be skipped.\n",
                            AAs->maxQueryLength);
                    // Throw away rest of quality string
                    readToChar(qFile, '@', TRUE);
                    Fail = TRUE;
                    break;
                }
                QS->qualBuf[qualCount] = curChar;
                qualCount += 1;
            }
            // Make sure the fastq file is well formed.
            if (qbaseCount != qualCount)
            {
                fprintf(stderr, "Warning.  Query sequence (%d) and quality score sequence (%d) have different lengths in fastq file."
                        "  Query will be skipped.\n", qbaseCount, qualCount);
                Fail = TRUE;
            }
        }
        if (qbaseCount > 0 && qbaseCount < AAs->wordLen)
        {
            fprintf(stderr, "Query length must be at least wordlen bases long. Query will be skipped.\n");
            Fail = TRUE;
        }
        // If anything has gone wrong, try again.
        if (Fail) continue;
        funlockfile(qFile);
        // Fill in the QS fields of interest.
        // Since reverse buf is filled in from back to front, now set the starting ptrs.
        QS->reverseCodeBuf = &QS->reverseCodeBufBase[reverseOff];
        QS->reverseBuf = &QS->reverseBufBase[reverseOff];
        QS->queryLen = qbaseCount;
        // Use the query sequence to generate a random seed for stocastically choosing amonst equal alignments.
        // By using the query sequence for the seed, the same one is chosen among equals regardless of file context of the query.
        generateRandomSeed(QS);
        return;
    }
    // This will never be executed, as EOF will result in qbaseCount of zero in while(TRUE) loop.
    // It is here just to avoid the compiler warning.
    return;
}
예제 #3
0
MersenneTwister::MersenneTwister() {
  this->construct_common(generateRandomSeed());
}