static int add_seq_to_seq_lib(seq_lib *sl, char *seq, char *seq_id_plus, const char *errfrag) { int i = sl->seq_num; int ss = sl->storage_size; char offender; char buf[2]; /* We need to allocate more storage */ if (i >= ss) { ss += INIT_LIB_SIZE; sl->storage_size = ss; sl->names = (char**) p3sl_safe_realloc(sl->names, ss*sizeof(*sl->names)); sl->seqs = (char**) p3sl_safe_realloc(sl->seqs , ss*sizeof(*sl->seqs)); sl->rev_compl_seqs = p3sl_safe_realloc(sl->seqs , ss*sizeof(*sl->rev_compl_seqs)); sl->weight= (double*) p3sl_safe_realloc(sl->weight, ss*sizeof(*sl->weight)); } sl->seq_num = i + 1; sl->names[i] = (char*) p3sl_safe_malloc(strlen(seq_id_plus) + 1); strcpy(sl->names[i], seq_id_plus); sl->weight[i] = parse_seq_name(sl->names[i]); if(sl->weight[i] < 0) { p3sl_append_new_chunk(&sl->error, "Illegal weight"); return 1; } sl->rev_compl_seqs[i] = sl->seqs[i] = (char*) p3sl_safe_malloc(strlen(seq) + 1); strcpy(sl->seqs[i], seq); if(strlen(sl->seqs[i]) == 0) { p3sl_append_new_chunk(&sl->error, "Empty sequence in "); return 1; } offender = upcase_and_check_char(sl->seqs[i]); if ('\0' != offender) { buf[0] = offender; buf[1] = '\0'; p3sl_append(&sl->warning, "Unrecognized character ("); p3sl_append(&sl->warning, buf); p3sl_append(&sl->warning, ") in "); p3sl_append(&sl->warning, errfrag); p3sl_append(&sl->warning, ", entry "); p3sl_append(&sl->warning, seq_id_plus); } return 0; }
/****************************************************************************** * Read from the current position in the file to the first symbol after the * start of the next sequence. Set the value of the current sequence. * * Returns TRUE if it was able to advance to the next sequence, FALSE if * EOF reached before the next sequence was found. Dies if other errors * encountered. *****************************************************************************/ BOOLEAN_T go_to_next_sequence_in_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) { BOOLEAN_T result = FALSE; SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); fasta_reader->current_position = 0; int c = 0; while((c = fgetc(fasta_reader->fasta_file)) != EOF) { if (fasta_reader->at_start_of_line == TRUE && c == '>') { break; } else if (c == '\n') { fasta_reader->at_start_of_line = TRUE; } else { fasta_reader->at_start_of_line = FALSE; } } // At this point c is '>' or EOF if (c == '>') { BOOLEAN_T found_genomic_coordinates = FALSE; result = read_seq_header_from_seq_reader_from_fasta(fasta_reader); if (result == TRUE && fasta_reader->parse_genomic_coord == TRUE) { // Look for genomic coordinates in header found_genomic_coordinates = parse_genomic_coordinates(fasta_reader); } if (found_genomic_coordinates == FALSE) { // Look for whitespace in header // The sequence name is the string before the white space. BOOLEAN_T found_name = FALSE; found_name = parse_seq_name(fasta_reader); if (found_name == FALSE) { die( "Unable to find sequence name in header %s.\n", fasta_reader->sequence_header ); } } } else { if (ferror(fasta_reader->fasta_file)) { die( "Error reading file:%s.\nError message: %s\n", fasta_reader->filename, strerror(ferror(fasta_reader->fasta_file)) ); } else if (feof(fasta_reader->fasta_file)) { // Reached EOF before reaching the start of the sequence result = FALSE; } } return result; }