void readNextQuery(AlignmentArgs_t * AAs, QueryState_t * QS) { // Lock the input file. flockfile(qFile); // Continue until we find a valid read or hit end of file. QS->queryLen = 0; while (TRUE) { // First read in the ">" or "@" line for the query ID. int charCount = 0; while (TRUE) { int curChar = fgetchar(qFile); // Check if we are done. if (curChar == '\n' || curChar == EOF) break; // Input the next char into the query ID buf (if it will fit), // Also convert spaces to underscores to aid further pipeline commands. if (charCount < MAX_QUERY_ID_LEN) { QS->queryID[charCount] = (curChar == ' ' ? '_' : curChar); } charCount += 1; } if (charCount > MAX_QUERY_ID_LEN) { fprintf(stderr, "Warning, Query Id length of %d exceeds maximum length %d. Id will be truncated.\n", charCount, MAX_QUERY_ID_LEN); QS->queryIDLen = MAX_QUERY_ID_LEN; } else { QS->queryIDLen = charCount; } // Now read in the query string. int reverseOff = AAs->maxQueryLength; char breakChar = AAs->fastq ? '+' : '>'; int qbaseCount = 0; BOOL Fail = FALSE; while (TRUE) { int curChar = fgetchar(qFile); if (curChar == breakChar || curChar == EOF) break; if (curChar == '\n') continue; // Check to see that we are within bounds of maximum query length. if (qbaseCount >= AAs->maxQueryLength) { fprintf(stderr, "Warning. Query sequence exceeds maximum length of %d. Query will be skipped.\n", AAs->maxQueryLength); // Throw away rest of query. readToChar(qFile, breakChar, FALSE); Fail = TRUE; break; } // process the next char of the query. // We fill both the forward and reverse query buffers as we go along. // In each case, store both the char for the base for output. // And also the 4-bit code for comparison to compressed reference. char forCode = map8to4(curChar); QS->forwardBuf[qbaseCount] = curChar; QS->forwardCodeBuf[qbaseCount] = forCode; reverseOff -= 1; char revCode = complement4to4(forCode); QS->reverseBufBase[reverseOff] = unmap4to8(revCode); QS->reverseCodeBufBase[reverseOff] = revCode; qbaseCount += 1; } // For a fastq file, we need to also find the quality string. int qualCount = 0; if (AAs->fastq) { // The '+' line can contain extra information. // First dump that. readToChar(qFile, '\n', FALSE); // Reading in the quality informtion is harder, as the @ needs to be preceded by a newline. // Given that '@' and newlines can appear in the quality string, // is even that good enough to ensure we are at the start of a new query? char prevChar = 0; while (TRUE) { int curChar = fgetchar(qFile); if ((curChar == '@' && prevChar == '\n') || curChar == EOF) break; prevChar = curChar; if (curChar == '\n') continue; if (qualCount >= AAs->maxQueryLength) { fprintf(stderr, "Warning. Quality score sequence exceeds maximum length of %d. Query will be skipped.\n", AAs->maxQueryLength); // Throw away rest of quality string readToChar(qFile, '@', TRUE); Fail = TRUE; break; } QS->qualBuf[qualCount] = curChar; qualCount += 1; } // Make sure the fastq file is well formed. if (qbaseCount != qualCount) { fprintf(stderr, "Warning. Query sequence (%d) and quality score sequence (%d) have different lengths in fastq file." " Query will be skipped.\n", qbaseCount, qualCount); Fail = TRUE; } } if (qbaseCount > 0 && qbaseCount < AAs->wordLen) { fprintf(stderr, "Query length must be at least wordlen bases long. Query will be skipped.\n"); Fail = TRUE; } // If anything has gone wrong, try again. if (Fail) continue; funlockfile(qFile); // Fill in the QS fields of interest. // Since reverse buf is filled in from back to front, now set the starting ptrs. QS->reverseCodeBuf = &QS->reverseCodeBufBase[reverseOff]; QS->reverseBuf = &QS->reverseBufBase[reverseOff]; QS->queryLen = qbaseCount; // Use the query sequence to generate a random seed for stocastically choosing amonst equal alignments. // By using the query sequence for the seed, the same one is chosen among equals regardless of file context of the query. generateRandomSeed(QS); return; } // This will never be executed, as EOF will result in qbaseCount of zero in while(TRUE) loop. // It is here just to avoid the compiler warning. return; }
void nextElement(char *name, FILE *fp) { reachChar('<', fp); readToChar(name, '>', fp); }