void core_fasta_input_init(struct biosal_input_format *input) { char *file; struct core_fasta_input *fasta; uint64_t offset; fasta = (struct core_fasta_input *)biosal_input_format_implementation(input); file = biosal_input_format_file(input); CORE_DEBUGGER_ASSERT(input->operations != NULL); #if 0 printf("DEBUG BEFORE faulty call.\n"); #endif offset = biosal_input_format_start_offset(input); core_buffered_reader_init(&fasta->reader, file, offset); fasta->buffer = NULL; fasta->next_header = NULL; fasta->has_header = 0; fasta->has_first = 0; }
uint64_t core_fasta_input_get_offset(struct biosal_input_format *self) { struct core_fasta_input *fasta; fasta = (struct core_fasta_input *)biosal_input_format_implementation(self); return core_buffered_reader_get_offset(&fasta->reader); }
void core_fastq_input_destroy(struct biosal_input_format *input) { struct core_fastq_input *fastq; fastq = (struct core_fastq_input *)biosal_input_format_implementation(input); core_buffered_reader_destroy(&fastq->reader); if (fastq->buffer != NULL) { core_memory_free(fastq->buffer, MEMORY_FASTQ); fastq->buffer = NULL; } }
int core_fastq_input_is_identifier(struct biosal_input_format *self, const char *line) { int length; char buffer[2]; int read; struct core_fastq_input *fastq; fastq = (struct core_fastq_input *)biosal_input_format_implementation(self); length = strlen(line); if (length < 1) { return 0; } if (line[0] != '@') { return 0; } /* * Now, the line is either a quality string * or an identifier string since it starts with a @. */ read = -1; read = core_buffered_reader_get_previous_bytes(&fastq->reader, buffer, 3); /* * This is an identifier if nothing is available before. */ if (read == 0) { return 1; } /* * Operation not supported by the driver. */ if (read < 0) { return core_fastq_input_is_identifier_mock(self, line); } /* * Fall back on this method call. */ return core_fastq_input_is_identifier_mock(self, line); }
void core_fastq_input_init(struct biosal_input_format *input) { char *file; struct core_fastq_input *fastq; uint64_t offset; file = biosal_input_format_file(input); offset = biosal_input_format_start_offset(input); #ifdef BIOSAL_FASTQ_INPUT_DEBUG printf("DEBUG core_fastq_input_init %s\n", file); #endif fastq = (struct core_fastq_input *)biosal_input_format_implementation(input); core_buffered_reader_init(&fastq->reader, file, offset); fastq->buffer = NULL; fastq->has_first = 0; }
uint64_t core_fasta_input_get_sequence(struct biosal_input_format *input, char *sequence) { struct core_fasta_input *fasta; /* TODO use a dynamic buffer to accept long reads... */ int maximum_sequence_length = BIOSAL_INPUT_MAXIMUM_SEQUENCE_LENGTH; int value; int lines; int total; int position_in_sequence; int is_header; int block_length; fasta = (struct core_fasta_input *)biosal_input_format_implementation(input); if (fasta->buffer == NULL) { fasta->buffer = core_memory_allocate(maximum_sequence_length + 1, MEMORY_FASTA); fasta->next_header= core_memory_allocate(maximum_sequence_length + 1, MEMORY_FASTA); fasta->buffer[0] = '\0'; fasta->next_header[0] = '\0'; } value = 0; total = 0; lines = 0; /* * Read name */ if (fasta->has_header) { strcpy(fasta->buffer, fasta->next_header); value = strlen(fasta->buffer); fasta->has_header = 0; } else { value = core_buffered_reader_read_line(&fasta->reader, fasta->buffer, maximum_sequence_length); /* Make sure that this is an identifier. */ if (!fasta->has_first) { while (!core_fasta_input_check_header(input, fasta->buffer)) { value = core_buffered_reader_read_line(&fasta->reader, fasta->buffer, maximum_sequence_length); } fasta->has_first = 1; } } /* * Add new line. */ if (value) { ++lines; } total += value; /* * Read sequence. * * Discard any new line symbol too. */ position_in_sequence = 0; while (1) { value = core_buffered_reader_read_line(&fasta->reader, fasta->buffer, maximum_sequence_length); if (value == 0) { break; } is_header = 0; if (strlen(fasta->buffer) > 0 && fasta->buffer[0] == '>') { is_header = 1; } if (is_header) { sequence[position_in_sequence] = '\0'; strcpy(fasta->next_header, fasta->buffer); fasta->has_header = 1; break; } /* * Otherwise, add the sequence. */ if (value) { ++lines; } block_length = strlen(fasta->buffer); /* * Remove the new line. */ if (fasta->buffer[block_length - 1] == '\n') { --block_length; } core_memory_copy(sequence + position_in_sequence, fasta->buffer, block_length); position_in_sequence += block_length; } return total; }
uint64_t core_fastq_input_get_sequence(struct biosal_input_format *input, char *sequence) { struct core_fastq_input *fastq; /* * Input sequence has at least BIOSAL_INPUT_MAXIMUM_SEQUENCE_LENGTH * which is currently 512k */ /* TODO use a dynamic buffer to accept long reads... */ int maximum_sequence_length = BIOSAL_INPUT_MAXIMUM_SEQUENCE_LENGTH; int value; int length; fastq = (struct core_fastq_input *)biosal_input_format_implementation(input); if (fastq->buffer == NULL) { fastq->buffer = (char *)core_memory_allocate(maximum_sequence_length + 1, MEMORY_FASTQ); } value = 0; /* * Read name */ value += core_buffered_reader_read_line(&fastq->reader, fastq->buffer, maximum_sequence_length); #ifdef FIND_IDENTIFIER /* * If we do not have the first entry yet, * make sure that the line is a good line. */ if (!fastq->has_first) { while (!core_fastq_input_is_identifier(input, fastq->buffer)) { value += core_buffered_reader_read_line(&fastq->reader, fastq->buffer, maximum_sequence_length); } fastq->has_first = 1; } #endif /* * Read DNA sequence */ length = core_buffered_reader_read_line(&fastq->reader, sequence, maximum_sequence_length); #ifdef BIOSAL_FASTQ_INPUT_DEBUG_READ_LINE printf("FASTQ ReadLine <<%s>>\n", sequence); #endif if (sequence[length - 1] == '\n') { /* * Remove new line symbol. */ sequence[length - 1] = '\0'; } value += length; #ifdef BIOSAL_FASTQ_INPUT_DEBUG2 printf("DEBUG core_fastq_input_get_sequence %s\n", buffer); #endif /* * Read the + symbol */ value += core_buffered_reader_read_line(&fastq->reader, fastq->buffer, maximum_sequence_length); /* * Read quality string. */ value += core_buffered_reader_read_line(&fastq->reader, fastq->buffer, maximum_sequence_length); return value; }