Ejemplo n.º 1
0
static int
add_seq_to_seq_lib(seq_lib *sl,
		   char *seq,
		   char *seq_id_plus,
		   const char *errfrag) {

  int  i = sl->seq_num;
  int  ss = sl->storage_size;
  char offender;
  char buf[2];

  /* We need to allocate more storage */
  if (i >= ss) {
    ss += INIT_LIB_SIZE;
    sl->storage_size = ss;
    sl->names = (char**) p3sl_safe_realloc(sl->names, ss*sizeof(*sl->names));
    sl->seqs  = (char**) p3sl_safe_realloc(sl->seqs , ss*sizeof(*sl->seqs));
    sl->rev_compl_seqs  = p3sl_safe_realloc(sl->seqs , ss*sizeof(*sl->rev_compl_seqs)); 
    sl->weight= (double*) p3sl_safe_realloc(sl->weight,
				   ss*sizeof(*sl->weight));
  }
  sl->seq_num = i + 1;

  sl->names[i] = (char*) p3sl_safe_malloc(strlen(seq_id_plus) + 1);
  strcpy(sl->names[i], seq_id_plus);
  sl->weight[i] = parse_seq_name(sl->names[i]);
  if(sl->weight[i] < 0) {
    p3sl_append_new_chunk(&sl->error, "Illegal weight");
    return 1;
  }

  sl->rev_compl_seqs[i] = sl->seqs[i] = (char*) p3sl_safe_malloc(strlen(seq) + 1);
  strcpy(sl->seqs[i], seq);
  if(strlen(sl->seqs[i]) == 0) {
    p3sl_append_new_chunk(&sl->error, "Empty sequence in ");
    return 1;
  }

  offender = upcase_and_check_char(sl->seqs[i]);
  if ('\0' != offender) {
    buf[0] = offender;
    buf[1] = '\0';
    p3sl_append(&sl->warning, "Unrecognized character (");
    p3sl_append(&sl->warning, buf);

    p3sl_append(&sl->warning, ") in ");
    p3sl_append(&sl->warning, errfrag);
    p3sl_append(&sl->warning, ", entry ");
    p3sl_append(&sl->warning, seq_id_plus);
  }

  return 0;
}
Ejemplo n.º 2
0
/******************************************************************************
 * Read from the current position in the file to the first symbol after the
 * start of the next sequence. Set the value of the current sequence.
 *
 * Returns TRUE if it was able to advance to the next sequence, FALSE if 
 * EOF reached before the next sequence was found. Dies if other errors
 * encountered.
 *****************************************************************************/
BOOLEAN_T go_to_next_sequence_in_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) {
  BOOLEAN_T result = FALSE;
  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
  fasta_reader->current_position = 0;
  int c = 0;
  while((c = fgetc(fasta_reader->fasta_file)) != EOF) {
    if (fasta_reader->at_start_of_line == TRUE && c == '>') {
      break;
    }
    else if (c == '\n') {
      fasta_reader->at_start_of_line = TRUE;
    }
    else {
      fasta_reader->at_start_of_line = FALSE;
    }
  }
  // At this point c is '>' or EOF
  if (c == '>') {
    BOOLEAN_T found_genomic_coordinates = FALSE;
    result = read_seq_header_from_seq_reader_from_fasta(fasta_reader);
    if (result == TRUE && fasta_reader->parse_genomic_coord == TRUE) {
      // Look for genomic coordinates in header
      found_genomic_coordinates = parse_genomic_coordinates(fasta_reader);
    }
    if (found_genomic_coordinates == FALSE) {
      //  Look for whitespace in header
      //  The sequence name is the string before the white space.
      BOOLEAN_T found_name = FALSE;
      found_name = parse_seq_name(fasta_reader);
      if (found_name == FALSE) {
        die(
            "Unable to find sequence name in header %s.\n",
            fasta_reader->sequence_header
        );
      }
    }
  }
  else {
    if (ferror(fasta_reader->fasta_file)) {
      die(
        "Error reading file:%s.\nError message: %s\n", 
        fasta_reader->filename,
        strerror(ferror(fasta_reader->fasta_file))
      );
    }
    else if (feof(fasta_reader->fasta_file)) {
        // Reached EOF before reaching the start of the sequence
        result = FALSE;
    }
  }
  return result;
}