Beispiel #1
0
void core_fasta_input_init(struct biosal_input_format *input)
{
    char *file;
    struct core_fasta_input *fasta;
    uint64_t offset;

    fasta = (struct core_fasta_input *)biosal_input_format_implementation(input);

    file = biosal_input_format_file(input);

    CORE_DEBUGGER_ASSERT(input->operations != NULL);

#if 0
    printf("DEBUG BEFORE faulty call.\n");
#endif
    offset = biosal_input_format_start_offset(input);

    core_buffered_reader_init(&fasta->reader, file, offset);

    fasta->buffer = NULL;
    fasta->next_header = NULL;
    fasta->has_header = 0;

    fasta->has_first = 0;
}
Beispiel #2
0
uint64_t core_fasta_input_get_offset(struct biosal_input_format *self)
{
    struct core_fasta_input *fasta;

    fasta = (struct core_fasta_input *)biosal_input_format_implementation(self);

    return core_buffered_reader_get_offset(&fasta->reader);
}
Beispiel #3
0
void core_fastq_input_destroy(struct biosal_input_format *input)
{
    struct core_fastq_input *fastq;

    fastq = (struct core_fastq_input *)biosal_input_format_implementation(input);
    core_buffered_reader_destroy(&fastq->reader);

    if (fastq->buffer != NULL) {
        core_memory_free(fastq->buffer, MEMORY_FASTQ);
        fastq->buffer = NULL;
    }
}
Beispiel #4
0
int core_fastq_input_is_identifier(struct biosal_input_format *self, const char *line)
{
    int length;
    char buffer[2];
    int read;
    struct core_fastq_input *fastq;

    fastq = (struct core_fastq_input *)biosal_input_format_implementation(self);

    length = strlen(line);

    if (length < 1) {
        return 0;
    }

    if (line[0] != '@') {
        return 0;
    }

    /*
     * Now, the line is either a quality string
     * or an identifier string since it starts with a @.
     */

    read = -1;

    read = core_buffered_reader_get_previous_bytes(&fastq->reader,
                    buffer, 3);

    /*
     * This is an identifier if nothing is available before.
     */
    if (read == 0) {
        return 1;
    }

    /*
     * Operation not supported by the driver.
     */
    if (read < 0) {
        return core_fastq_input_is_identifier_mock(self, line);
    }

    /*
     * Fall back on this method call.
     */
    return core_fastq_input_is_identifier_mock(self, line);
}
Beispiel #5
0
void core_fastq_input_init(struct biosal_input_format *input)
{
    char *file;
    struct core_fastq_input *fastq;
    uint64_t offset;

    file = biosal_input_format_file(input);
    offset = biosal_input_format_start_offset(input);

#ifdef BIOSAL_FASTQ_INPUT_DEBUG
    printf("DEBUG core_fastq_input_init %s\n",
                    file);
#endif

    fastq = (struct core_fastq_input *)biosal_input_format_implementation(input);

    core_buffered_reader_init(&fastq->reader, file, offset);
    fastq->buffer = NULL;

    fastq->has_first = 0;
}
Beispiel #6
0
uint64_t core_fasta_input_get_sequence(struct biosal_input_format *input,
                char *sequence)
{
    struct core_fasta_input *fasta;

    /* TODO use a dynamic buffer to accept long reads... */
    int maximum_sequence_length = BIOSAL_INPUT_MAXIMUM_SEQUENCE_LENGTH;
    int value;
    int lines;
    int total;
    int position_in_sequence;
    int is_header;
    int block_length;

    fasta = (struct core_fasta_input *)biosal_input_format_implementation(input);

    if (fasta->buffer == NULL) {
        fasta->buffer = core_memory_allocate(maximum_sequence_length + 1, MEMORY_FASTA);
        fasta->next_header= core_memory_allocate(maximum_sequence_length + 1, MEMORY_FASTA);

        fasta->buffer[0] = '\0';
        fasta->next_header[0] = '\0';
    }

    value = 0;
    total = 0;
    lines = 0;

    /*
     * Read name
     */

    if (fasta->has_header) {

        strcpy(fasta->buffer, fasta->next_header);

        value = strlen(fasta->buffer);

        fasta->has_header = 0;

    } else {
        value = core_buffered_reader_read_line(&fasta->reader, fasta->buffer,
                    maximum_sequence_length);

        /* Make sure that this is an identifier.
         */
        if (!fasta->has_first) {

            while (!core_fasta_input_check_header(input, fasta->buffer)) {

                value = core_buffered_reader_read_line(&fasta->reader, fasta->buffer,
                    maximum_sequence_length);
            }

            fasta->has_first = 1;
        }
    }

    /*
     * Add new line.
     */
    if (value) {
        ++lines;
    }

    total += value;

    /*
     * Read sequence.
     *
     * Discard any new line symbol too.
     */

    position_in_sequence = 0;

    while (1) {
        value = core_buffered_reader_read_line(&fasta->reader, fasta->buffer,
                    maximum_sequence_length);

        if (value == 0) {
            break;
        }

        is_header = 0;

        if (strlen(fasta->buffer) > 0
                        && fasta->buffer[0] == '>') {

            is_header = 1;
        }

        if (is_header) {
            sequence[position_in_sequence] = '\0';

            strcpy(fasta->next_header, fasta->buffer);
            fasta->has_header = 1;
            break;
        }

        /*
         * Otherwise, add the sequence.
         */

        if (value) {
            ++lines;
        }

        block_length = strlen(fasta->buffer);

        /*
         * Remove the new line.
         */
        if (fasta->buffer[block_length - 1] == '\n') {
            --block_length;
        }

        core_memory_copy(sequence + position_in_sequence,
                        fasta->buffer,
                        block_length);

        position_in_sequence += block_length;
    }

    return total;
}
Beispiel #7
0
uint64_t core_fastq_input_get_sequence(struct biosal_input_format *input,
                char *sequence)
{
    struct core_fastq_input *fastq;

    /*
     * Input sequence has at least BIOSAL_INPUT_MAXIMUM_SEQUENCE_LENGTH
     * which is currently 512k
     */

    /* TODO use a dynamic buffer to accept long reads... */
    int maximum_sequence_length = BIOSAL_INPUT_MAXIMUM_SEQUENCE_LENGTH;
    int value;
    int length;

    fastq = (struct core_fastq_input *)biosal_input_format_implementation(input);

    if (fastq->buffer == NULL) {
        fastq->buffer = (char *)core_memory_allocate(maximum_sequence_length + 1, MEMORY_FASTQ);
    }

    value = 0;

    /*
     * Read name
     */
    value += core_buffered_reader_read_line(&fastq->reader, fastq->buffer,
                    maximum_sequence_length);

#ifdef FIND_IDENTIFIER
    /*
     * If we do not have the first entry yet,
     * make sure that the line is a good line.
     */
    if (!fastq->has_first) {

        while (!core_fastq_input_is_identifier(input, fastq->buffer)) {

            value += core_buffered_reader_read_line(&fastq->reader, fastq->buffer,
                    maximum_sequence_length);
        }

        fastq->has_first = 1;
    }
#endif

    /*
     * Read DNA sequence
     */
    length = core_buffered_reader_read_line(&fastq->reader, sequence,
                    maximum_sequence_length);

#ifdef BIOSAL_FASTQ_INPUT_DEBUG_READ_LINE
    printf("FASTQ ReadLine <<%s>>\n", sequence);
#endif

    if (sequence[length - 1] == '\n') {
        /*
         * Remove new line symbol.
         */
        sequence[length - 1] = '\0';
    }

    value += length;

#ifdef BIOSAL_FASTQ_INPUT_DEBUG2
    printf("DEBUG core_fastq_input_get_sequence %s\n", buffer);
#endif

    /*
     * Read the + symbol
     */
    value += core_buffered_reader_read_line(&fastq->reader, fastq->buffer,
                    maximum_sequence_length);

    /*
     * Read quality string.
     */
    value += core_buffered_reader_read_line(&fastq->reader, fastq->buffer,
                    maximum_sequence_length);

    return value;
}