Beispiel #1
0
int main(int argc, char** argv)
{
  if(argc < 3 || argc > 4)
  {
    print_usage();
  }

  char* in_path = argv[1];
  char* out_path = argv[2];

  unsigned long line_wrap = 0;
  
  if(argc == 4)
  {
    char *line_wrap_str = argv[3];
    char *endptr;
    line_wrap = strtoul(line_wrap_str, &endptr, 10);

    if((unsigned)(endptr-line_wrap_str) != strlen(line_wrap_str))
    {
      print_usage();
    }
  }

  SeqFileType out_file_type = SEQ_UNKNOWN;
  char out_zipped = 0;

  seq_guess_filetype_from_path(out_path, &out_file_type, &out_zipped);

  if(out_file_type == SEQ_UNKNOWN)
  {
    fprintf(stderr, "%s:%i: Sorry, I cannot identify the output file's format "
                    "from its path [file: %s]\n", __FILE__, __LINE__, out_path);
    exit(EXIT_FAILURE);
  }

  SeqFile* in_file = seq_file_open(in_path);
  SeqFile* out_file = seq_file_open_write(out_path, out_file_type,
                                          out_zipped, line_wrap);

  if(in_file == NULL)
  {
    fprintf(stderr, "%s:%i: Couldn't open input file: %s\n",
            __FILE__, __LINE__, in_path);
    exit(EXIT_FAILURE);
  }

  if(out_file == NULL)
  {
    fprintf(stderr, "%s:%i: Couldn't open output file: %s\n",
            __FILE__, __LINE__, out_path);
    exit(EXIT_FAILURE);
  }

  printf(" In : %s [%s]\n", in_path, seq_file_get_type_str(in_file));
  printf(" Out: %s [%s]\n", out_path, seq_file_get_type_str(out_file));

  // Start converting
  size_t bytes_written = 0;
  char c[2] = ".";

  if(out_file_type == SEQ_PLAIN)
  {
    // Example reading in an entire entry at a time using seq_read_all_bases()
    StrBuf *bases = strbuf_new();

    while(seq_next_read(in_file))
    {
      while(seq_read_all_bases(in_file, bases))
      {
        bytes_written += seq_file_write_seq(out_file, bases->buff);
      }
    }
  }
  else
  {
    // Example reading in a char at a time using seq_read_base()
    while(seq_next_read(in_file))
    {
      const char* read_name = seq_get_read_name(in_file);
      bytes_written += seq_file_write_name(out_file, read_name);

      unsigned long seq_length = 0;

      while(seq_read_base(in_file, c))
      {
        seq_length++;

        if(!(bytes_written += seq_file_write_seq(out_file, c)))
        {
          fprintf(stderr, "%s:%i: Couldn't write base to file "
                          "[file: %s; line: %lu]\n",
                  __FILE__, __LINE__,
                  seq_get_path(out_file), seq_curr_line_number(in_file));

          exit(EXIT_FAILURE);
        }
      }
    
      if(seq_has_quality_scores(out_file))
      {
        size_t bytes_written_before_qual = bytes_written;

        while(seq_read_qual(in_file, c))
        {
          if(!(bytes_written += seq_file_write_qual(out_file, c)))
          {
            fprintf(stderr, "%s:%i: Couldn't write quality score to file "
                            "[file: %s; line: %lu]\n",
                    __FILE__, __LINE__, seq_get_path(out_file),
                    seq_curr_line_number(in_file));

            exit(EXIT_FAILURE);
          }
        }

        if(bytes_written == bytes_written_before_qual)
        {
          // No quality scores were read - fill in
          unsigned long i;
          *c = '?';
          for(i = 0; i < seq_length; i++)
            bytes_written += seq_file_write_qual(out_file, c);
        }
      }
    }
  }

  unsigned long seq_total_bases_read = seq_total_bases_passed(in_file);
  unsigned long total_entries = seq_get_read_index(in_file);

  seq_file_close(in_file);
  bytes_written += seq_file_close(out_file);

  printf("%lu entries read\n", total_entries);
  printf("%lu bases read\n", seq_total_bases_read);
  printf("%lu bytes written\n", bytes_written);
  printf("Done. \n");

  return EXIT_SUCCESS;
}
// If seq2 is NULL, read pair of entries from first file
// Otherwise read an entry from each
void align_from_file(const char *path1, const char *path2,
                     void (align)(StrBuf*, StrBuf*, const char*, const char*))
{
  SeqFile *sf1 = seq_file_open(path1);
  SeqFile *sf2;

  if(sf1 == NULL)
  {
    fprintf(stderr, "Alignment Error: couldn't open file %s\n", path1);
    fflush(stderr);
    return;
  }

  if(path2 != NULL)
  {
    sf2 = seq_file_open(path2);

    if(sf2 == NULL)
    {
      fprintf(stderr, "Alignment Error: couldn't open file %s\n", path1);
      fflush(stderr);
      return;
    }
  }
  else
  {
    sf2 = sf1;
  }

  StrBuf *entry1_title = strbuf_new();
  StrBuf *entry2_title = strbuf_new();
  StrBuf *entry1_seq = strbuf_new();
  StrBuf *entry2_seq = strbuf_new();

  char *title1 = NULL, *title2 = NULL;

  // Loop while we can read a sequence from the first file
  while(seq_next_read(sf1))
  {
    seq_read_all_bases(sf1, entry1_seq);

    if(seq_file_get_type(sf1) != SEQ_PLAIN)
    {
      strbuf_set(entry1_title, seq_get_read_name(sf1));
      title1 = entry1_title->buff;
    }

    if(!seq_next_read(sf2))
    {
      fprintf(stderr, "Alignment Error: Odd number of sequences - "
                      "I read in pairs!\n");
      fflush(stderr);
      break;
    }

    seq_read_all_bases(sf2, entry2_seq);

    if(seq_file_get_type(sf2) != SEQ_PLAIN)
    {
      strbuf_set(entry2_title, seq_get_read_name(sf2));
      title2 = entry2_title->buff;
    }

    (align)(entry1_seq, entry2_seq, title1, title2);
  }

  // warn if no bases read
  if(seq_total_bases_passed(sf1) == 0)
  {
    fprintf(stderr, "Alignment Warning: empty input\n");
    fflush(stderr);
  }

  // Close files
  seq_file_close(sf1);

  if(path2 != NULL)
    seq_file_close(sf2);

  // Free memory
  strbuf_free(entry1_title);
  strbuf_free(entry2_title);
  strbuf_free(entry1_seq);
  strbuf_free(entry2_seq);
}