Example #1
0
Orientation db_node_get_orientation(BinaryKmer * k, Element * e, short kmer_size)
{
	if (binary_kmer_comparison_operator(e->kmer, *k) == true) {
		return forward;
	}

	BinaryKmer tmp_kmer;

	if (binary_kmer_comparison_operator(e->kmer,
					    *(binary_kmer_reverse_complement
					      (k, kmer_size,
					       &tmp_kmer))) == true) {
		return reverse;
	}

	printf("programming error - you have called  db_node_get_orientation with a kmer that is neither equal to the kmer in this node, nor its rev comp\n");
	char tmpseq1[kmer_size];
	char tmpseq2[kmer_size]; 
	printf("Arg 1 Kmer is %s and Arg 2 node kmer is %s\n",
	       binary_kmer_to_seq(k, kmer_size, tmpseq1),
	       binary_kmer_to_seq(&(e->kmer), kmer_size, tmpseq2));
	exit(1);

}
Example #2
0
dBNode * db_graph_get_next_node(dBNode * current_node, Orientation current_orientation,
                                Orientation * next_orientation,
                                Nucleotide edge, Nucleotide * reverse_edge,dBGraph * db_graph){
    
    BinaryKmer local_copy_of_kmer;
    binary_kmer_assignment_operator(local_copy_of_kmer, current_node->kmer);
    
    BinaryKmer tmp_kmer;
    dBNode * next_node=NULL;
    
    // after the following line tmp_kmer and rev_kmer are pointing to the same B Kmer
    BinaryKmer* rev_kmer = binary_kmer_reverse_complement(&local_copy_of_kmer,db_graph->kmer_size, &tmp_kmer);
    
    
    if (current_orientation == reverse){
        *reverse_edge = binary_kmer_get_last_nucleotide(&local_copy_of_kmer);
        binary_kmer_assignment_operator(local_copy_of_kmer,*rev_kmer);
    }
    else{
        *reverse_edge = binary_kmer_get_last_nucleotide(rev_kmer);
    }
    
    
    binary_kmer_left_shift_one_base_and_insert_new_base_at_right_end(&local_copy_of_kmer, edge, db_graph->kmer_size);
    
    //get node from table
    next_node = hash_table_find(element_get_key(&local_copy_of_kmer,db_graph->kmer_size, &tmp_kmer),db_graph);
    
    if (next_node != NULL){
        *next_orientation = db_node_get_orientation(&local_copy_of_kmer,next_node,db_graph->kmer_size);
    }
    else
    {
        // debug
        char tmpzamseq[db_graph->kmer_size+1];
        warn("Cannot find %s so get a NULL node\n", binary_kmer_to_seq(&tmp_kmer, db_graph->kmer_size, tmpzamseq));
    }
    
    return next_node;
    
}
Example #3
0
void metacortex_find_subgraphs(dBGraph* graph, char* consensus_contigs_filename, int min_subgraph_kmers)
{
    SubGraphInfo* sub_graphs;
    FILE* fp;
    Path *path_fwd = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size);
    Path *path_rev = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size);
    Path *final_path = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size);
    char seq[256];
    char analysis_filename[strlen(consensus_contigs_filename) + 10];
    long int total_nodes = 0;
    int n_seeds = 0;
    int i;
    
    sprintf(analysis_filename, "%s.analysis", consensus_contigs_filename);
    log_and_screen_printf("Running metacortex subgraph analysis...\n");
    log_and_screen_printf("          Contig file: %s\n", consensus_contigs_filename);
    log_and_screen_printf("        Analysis file: %s\n", analysis_filename);
    log_and_screen_printf("Minimum subgraph size: %i\n", min_subgraph_kmers);
    
    /* Initialise temporaray path array buffers */
    path_array_initialise_buffers(graph->kmer_size);
    
    /* Create a list of subgraphs */
    log_and_screen_printf("Allocating %d Mb to store subgraph information (max %d seeds)...\n", ((MAX_SEEDS * sizeof(SubGraphInfo)) / 1024) / 1024, MAX_SEEDS);
    sub_graphs = calloc(MAX_SEEDS, sizeof(SubGraphInfo));
    if (!sub_graphs) {
        log_and_screen_printf("ERROR: Can't get memory for subgraphs\n");
        exit(-1);
    }

    /* Open the analysis file */
    fp = fopen(analysis_filename, "w");
    if (!fp) {
        log_and_screen_printf("ERROR: Can't open analysis file.\n");
        exit(-1);
    }
        
    /* For each node, if it's not pruned or visited, try and grow a graph */
    void explore_node(dBNode * node) {
        if (node == NULL) {
            log_and_screen_printf("Error: NULL node passed to explore_node.\n");
            exit(-1);
        }
        
        if (db_node_check_for_any_flag(node, PRUNED | VISITED) == false) {
            int nodes_in_graph;
            
            /* Grow graph from this node, returning the 'best' (highest coverage) node to store as seed point */
            nodes_in_graph = grow_graph_from_node(node, &(sub_graphs[n_seeds].seed_node), graph);
            total_nodes += nodes_in_graph;
            
            if (sub_graphs[n_seeds].seed_node == NULL) {
                printf("ERROR: Seed node is NULL, nodes in graph is %d\n", nodes_in_graph);
            } else {
                /* Write data to analysis file */
                binary_kmer_to_seq(&(node->kmer), graph->kmer_size, seq);            
                fprintf(fp, "%i\t%i\t%ld\t%s\t", n_seeds, nodes_in_graph, total_nodes, seq);
                binary_kmer_to_seq(&(sub_graphs[n_seeds].seed_node->kmer), graph->kmer_size, seq);
                fprintf(fp, "%s\n", seq);

                /* Store nodes in this subgraph */
                sub_graphs[n_seeds].graph_size = nodes_in_graph;
                n_seeds++;
                
                /* Check we've not run out of seed storage - in future, this should dynamically allocate */
                if (n_seeds == MAX_SEEDS) {
                    log_and_screen_printf("Error: MAX_SEEDS exceeded. Quitting.\n");
                    exit(-1);
                }
            }
        }
    }
    
    /* Traverse each node... */
    log_and_screen_printf("Finding subgraphs...\n");
    hash_table_traverse(&explore_node, graph);
    log_and_screen_printf("Finished. Total: %ld\n", total_nodes);
    fclose(fp);    
    
    /* Open consensus contigs file */
    fp = fopen(consensus_contigs_filename, "w");
    if (!fp) {
        log_and_screen_printf("ERROR: Can't open contig file.\n");
        exit(-1);
    }
    
    /* Now go through all the seed points and generate the consensus contigs by walking forward and backward from the seed */
    db_graph_reset_flags(graph);    
    log_and_screen_printf("Outputting contigs...\n");
	log_progress_bar(0);
	long long one_percent = n_seeds/100;
    int percent;
    
    if (one_percent < 1) {
        one_percent = 1;
    }
    
    for (i=0; i<n_seeds; i++) {
        if (i % one_percent == 0) {
            percent = (100 * i) / n_seeds;
            log_progress_bar(percent);
        } 
        
        //log_printf("Graph %i\n", i);           
        if (sub_graphs[i].graph_size >= min_subgraph_kmers) {            
            binary_kmer_to_seq(&(sub_graphs[i].seed_node->kmer), graph->kmer_size, seq);
            coverage_walk_get_path(sub_graphs[i].seed_node, forward, NULL, graph, path_fwd);
            coverage_walk_get_path(sub_graphs[i].seed_node, reverse, NULL, graph, path_rev);
            path_reverse(path_fwd, final_path);
            path_append(final_path, path_rev);
            final_path->id = i;
            path_to_fasta(final_path, fp);
            //log_printf("  Seed %s\tFwd path length %i\tRev path length %i\tFinal path length %i\n", seq, path_fwd->length, path_rev->length, final_path->length);
            path_reset(path_fwd);
            perfect_path_get_path(sub_graphs[i].seed_node, forward, &db_node_action_do_nothing, graph, path_fwd);
            //log_printf("\t\tPerfect path fwd length %i\n", path_fwd->length);
            path_reset(path_rev);
            path_reset(final_path);
        } else {
            log_printf("  Number of nodes (%i} too small. Not outputting contig.\n", sub_graphs[i].graph_size);
        }
        
    }
	log_progress_bar(100);
	printf("\n");
    log_and_screen_printf("Finished contig output.\n");    
    fclose(fp);
    
    free(sub_graphs);
}
int main(int argc, char** argv)
{
  char* filepath;

  if(argc < 2)
  {
    print_usage();
  }
  else if(argc > 2)
  {
    print_info = 0;
    print_kmers = 0;
    parse_kmers = 0;

    int i;

    for(i = 1; i < argc-1; i++)
    {
      if(strcasecmp(argv[i], "--print_info") == 0)
      {
        print_info = 1;
      }
      else if(strcasecmp(argv[i], "--print_kmers") == 0)
      {
        print_kmers = 1;
      }
      else if(strcasecmp(argv[i], "--parse_kmers") == 0)
      {
        print_info = 1;
        parse_kmers = 1;
      }
      else
        print_usage();
    }
  }

  filepath = argv[argc-1];

  if(print_info)
    printf("Loading file: %s\n", filepath);

  file_size = get_file_size(filepath);

  FILE* fh = fopen(filepath, "r");

  if(fh == NULL)
  {
    report_error("cannot open file '%s'\n", filepath);
    exit(EXIT_FAILURE);
  }

  if(file_size != -1 && print_info)
  {
    char str[31];
    bytes_to_str(file_size, 0, str);
    printf("File size: %s\n", str);
  }

  buffer = buffer_new(BUFFER_SIZE);

  /*
  // Check sizes
  printf("-- Datatypes --\n");
  printf("int: %i\n", (int)sizeof(int));
  printf("long: %i\n", (int)sizeof(long));
  printf("long long: %i\n", (int)sizeof(long long));
  printf("double: %i\n", (int)sizeof(double));
  printf("long double: %i\n", (int)sizeof(long double));
  */

  if(print_info)
    printf("----\n");

  unsigned int i;

  // Read magic word at the start of header
  char magic_word[7];
  magic_word[6] = '\0';

  my_fread(fh, magic_word, strlen("CORTEX"), "Magic word");

  if(strcmp(magic_word, "CORTEX") != 0)
  {
    fprintf(stderr, "Magic word doesn't match 'CORTEX' (start)\n");
    exit(EXIT_FAILURE);
  }

  // Read version number
  my_fread(fh, &version, sizeof(uint32_t), "binary version");
  my_fread(fh, &kmer_size, sizeof(uint32_t), "kmer size");
  my_fread(fh, &num_of_bitfields, sizeof(uint32_t), "number of bitfields");
  my_fread(fh, &num_of_colours, sizeof(uint32_t), "number of colours");

  if(print_info)
  {
    printf("binary version: %i\n", (int)version);
    printf("kmer size: %i\n", (int)kmer_size);
    printf("bitfields: %i\n", (int)num_of_bitfields);
    printf("colours: %i\n", (int)num_of_colours);
  }

  if(version >= 7)
  {
    my_fread(fh, &expected_num_of_kmers, sizeof(uint64_t), "number of kmers");
    my_fread(fh, &num_of_shades, sizeof(uint32_t), "number of shades");

    if(print_info)
    {
      char tmp[256];
      printf("kmers: %s\n", ulong_to_str(expected_num_of_kmers,tmp));
      printf("shades: %i\n", (int)num_of_shades);
    }
  }

  // Checks

  if(version > 7 || version < 4)
    report_error("Sorry, we only support binary versions 4, 5, 6 & 7\n");

  if(kmer_size % 2 == 0)
    report_error("kmer size is not an odd number\n");

  if(kmer_size < 3)
    report_error("kmer size is less than three\n");

  if(num_of_bitfields * 32 < kmer_size)
    report_error("Not enough bitfields for kmer size\n");

  if((num_of_bitfields-1)*32 >= kmer_size)
    report_error("using more than the minimum number of bitfields\n");

  if(num_of_colours == 0)
    report_error("number of colours is zero\n");

  if(num_of_shades != 0 && (num_of_shades & (num_of_shades-1)))
    report_error("number of shades is not a power of 2\n");

  //

  // Read array of mean read lengths per colour
  uint32_t *mean_read_lens_per_colour = malloc(num_of_colours*sizeof(uint32_t));

  my_fread(fh, mean_read_lens_per_colour, sizeof(uint32_t) * num_of_colours,
           "mean read length for each colour");

  // Read array of total seq loaded per colour
  uint64_t *total_seq_loaded_per_colour = malloc(num_of_colours*sizeof(uint64_t));

  my_fread(fh, total_seq_loaded_per_colour, sizeof(uint64_t) * num_of_colours,
           "total sequance loaded for each colour");

  for(i = 0; i < num_of_colours; i++)
  {
    sum_of_seq_loaded += total_seq_loaded_per_colour[i];
  }

  if(version >= 6)
  {
    sample_names = malloc(sizeof(char*) * num_of_colours);

    for(i = 0; i < num_of_colours; i++)
    {
      uint32_t str_length;
      my_fread(fh, &str_length, sizeof(uint32_t), "sample name length");

      if(str_length == 0)
      {
        sample_names[i] = NULL;
      }
      else
      {
        sample_names[i] = (char*)malloc((str_length+1) * sizeof(char));
        my_fread(fh, sample_names[i], str_length, "sample name");
        sample_names[i][str_length] = '\0';

        // Check sample length is as long as we were told
        size_t sample_name_len = strlen(sample_names[i]);

        if(sample_name_len != str_length)
        {
          // Premature \0 in string
          report_warning("Sample %i name has length %lu but is only %lu chars "
                         "long (premature '\\0')\n",
                         i, str_length, sample_name_len);
        }
      }
    }

    seq_error_rates = malloc(sizeof(long double) * num_of_colours);
    my_fread(fh, seq_error_rates, sizeof(long double) * num_of_colours,
             "seq error rates");

    cleaning_infos = malloc(sizeof(CleaningInfo) * num_of_colours);

    for(i = 0; i < num_of_colours; i++)
    {
      my_fread(fh, &(cleaning_infos[i].tip_cleaning), 1, "tip cleaning");
      my_fread(fh, &(cleaning_infos[i].remove_low_covg_supernodes), 1,
               "remove low covg supernodes");
      my_fread(fh, &(cleaning_infos[i].remove_low_covg_kmers), 1,
               "remove low covg kmers");
      my_fread(fh, &(cleaning_infos[i].cleaned_against_graph), 1,
               "cleaned against graph");

      my_fread(fh, &(cleaning_infos[i].remove_low_covg_supernodes_thresh),
               sizeof(int32_t), "remove low covg supernode threshold");
    
      my_fread(fh, &(cleaning_infos[i].remove_low_covg_kmers_thresh),
               sizeof(int32_t), "remove low covg kmer threshold");

      if(version > 6)
      {
        if(cleaning_infos[i].remove_low_covg_supernodes_thresh < 0)
        {
          report_warning("Binary header gives sample %i a cleaning threshold of "
                         "%i for supernodes (should be >= 0)\n",
                         i, cleaning_infos[i].remove_low_covg_supernodes_thresh);
        }
        if(cleaning_infos[i].remove_low_covg_kmers_thresh < 0)
        {
          report_warning("Binary header gives sample %i a cleaning threshold of "
                         "%i for kmers (should be >= 0)\n",
                         i, cleaning_infos[i].remove_low_covg_kmers_thresh);
        }
      }

      if(!cleaning_infos[i].remove_low_covg_supernodes &&
         cleaning_infos[i].remove_low_covg_supernodes_thresh > 0)
      {
        report_warning("Binary header gives sample %i a cleaning threshold of "
                       "%i for supernodes when no cleaning was performed\n",
                       i, cleaning_infos[i].remove_low_covg_supernodes_thresh);
      }

      if(!cleaning_infos[i].remove_low_covg_kmers &&
         cleaning_infos[i].remove_low_covg_kmers_thresh > 0)
      {
        report_warning("Binary header gives sample %i a cleaning threshold of "
                       "%i for kmers when no cleaning was performed\n",
                       i, cleaning_infos[i].remove_low_covg_kmers_thresh);
      }

      uint32_t name_length;
      my_fread(fh, &name_length, sizeof(uint32_t), "graph name length");

      if(name_length == 0)
      {
        cleaning_infos[i].name_of_graph_clean_against = NULL;
      }
      else
      {
        cleaning_infos[i].name_of_graph_clean_against
          = (char*)malloc((name_length + 1) * sizeof(char));

        my_fread(fh, cleaning_infos[i].name_of_graph_clean_against,
                 name_length, "graph name length");

        cleaning_infos[i].name_of_graph_clean_against[name_length] = '\0';
      
        // Check sample length is as long as we were told
        size_t cleaned_name_len
          = strlen(cleaning_infos[i].name_of_graph_clean_against);

        if(cleaned_name_len != name_length)
        {
          // Premature \0 in string
          report_warning("Sample [%i] cleaned-against-name has length %u but is "
                         "only %u chars long (premature '\\0')\n",
                         i, name_length, cleaned_name_len);
        }
      }
    }
  }

  // Print colour info

  if(print_info)
  {
    for(i = 0; i < num_of_colours; i++)
    {
      printf("-- Colour %i --\n", i);

      if(version >= 6)
      {
        // Version 6 only output
        printf("  sample name: '%s'\n", sample_names[i]);
      }

      char tmp[32];

      printf("  mean read length: %u\n",
             (unsigned int)mean_read_lens_per_colour[i]);
      printf("  total sequence loaded: %s\n",
             ulong_to_str(total_seq_loaded_per_colour[i], tmp));
      
      if(version >= 6)
      {
        // Version 6 only output
        printf("  sequence error rate: %Lf\n", seq_error_rates[i]);

        printf("  tip clipping: %s\n",
               (cleaning_infos[i].tip_cleaning == 0 ? "no" : "yes"));

        printf("  remove low coverage supernodes: %s [threshold: %i]\n",
               cleaning_infos[i].remove_low_covg_supernodes ? "yes" : "no",
               cleaning_infos[i].remove_low_covg_supernodes_thresh);

        printf("  remove low coverage kmers: %s [threshold: %i]\n",
               cleaning_infos[i].remove_low_covg_kmers ? "yes" : "no",
               cleaning_infos[i].remove_low_covg_kmers_thresh);

        printf("  cleaned against graph: %s [against: '%s']\n",
               cleaning_infos[i].cleaned_against_graph ? "yes" : "no",
               (cleaning_infos[i].name_of_graph_clean_against == NULL
                  ? "" : cleaning_infos[i].name_of_graph_clean_against));
      }
    }

    printf("--\n");
  }

  // Read magic word at the end of header
  my_fread(fh, magic_word, strlen("CORTEX"), "magic word (end)");

  if(strcmp(magic_word, "CORTEX") != 0)
  {
    report_error("magic word doesn't match 'CORTEX' (end): '%s'\n", magic_word);
    exit(EXIT_FAILURE);
  }

  // Calculate number of kmers
  if(version < 7 && file_size != -1)
  {
    size_t bytes_remaining = file_size - num_bytes_read;
    size_t num_bytes_per_kmer = sizeof(uint64_t) * num_of_bitfields +
                                sizeof(uint32_t) * num_of_colours +
                                sizeof(uint8_t) * num_of_colours;

    expected_num_of_kmers = bytes_remaining / num_bytes_per_kmer;

    size_t excess = bytes_remaining - (expected_num_of_kmers * num_bytes_per_kmer);

    if(excess > 0)
    {
      report_error("Excess bytes. Bytes:\n  file size: %lu;\n  for kmers: %lu;"
                   "\n  num kmers: %lu;\n  per kmer: %lu;\n  excess: %lu\n",
                   file_size, bytes_remaining, expected_num_of_kmers,
                   num_bytes_per_kmer, excess);
    }
  }

  if(print_info)
  {
    char num_str[50];
    printf("Expected number of kmers: %s\n",
           ulong_to_str(expected_num_of_kmers, num_str));
    printf("----\n");
  }

  // Finished parsing header
  if(!parse_kmers && !print_kmers)
  {
    print_kmer_stats();
    fclose(fh);
    exit(EXIT_SUCCESS);
  }


  shade_bytes = num_of_shades >> 3;
  size_t shade_array_bytes = shade_bytes * num_of_colours;

  // Kmer data
  uint64_t* kmer = malloc(sizeof(uint64_t) * num_of_bitfields);
  uint32_t* covgs = malloc(sizeof(uint32_t) * num_of_colours);
  uint8_t* edges = malloc(sizeof(uint8_t) * num_of_colours);
  uint8_t* shade_data = malloc(shade_array_bytes);
  uint8_t* shend_data = malloc(shade_array_bytes);

  if(kmer == NULL || covgs == NULL || edges == NULL ||
     shade_data == NULL || shend_data == NULL) {
    report_error("Out of memory");
    exit(EXIT_SUCCESS);
  }

  // Convert values to strings
  char* seq = malloc(sizeof(char) * kmer_size);
  char kmer_colour_edge_str[9];

  // Check top word of each kmer
  int bits_in_top_word = 2 * (kmer_size % 32);
  uint64_t top_word_mask = (~(uint64_t)0) << bits_in_top_word;

  size_t num_bytes_per_bkmer = sizeof(uint64_t)*num_of_bitfields;

  // Read kmer in bytes so we can see if there are extra bytes at the end of
  // the file
  size_t bytes_read;

  // while((bytes_read = fread(kmer, 1, num_bytes_per_bkmer, fh)) > 0)
  while((bytes_read = fread_buf(fh, kmer, num_bytes_per_bkmer, buffer)) > 0)
  {
    if(bytes_read != num_bytes_per_bkmer)
    {
      report_error("unusual extra bytes [%i] at the end of the file\n",
                   (int)bytes_read);
      break;
    }
    num_bytes_read += bytes_read;

    my_fread(fh, covgs, sizeof(uint32_t) * num_of_colours, "kmer covg");
    my_fread(fh, edges, sizeof(uint8_t) * num_of_colours, "kmer edges");

    if(version >= 7)
    {
      uint8_t *shades = shade_data, *shends = shend_data;
      for(i = 0; i < num_of_colours; i++)
      {
        my_fread(fh, shades, sizeof(uint8_t) * shade_bytes, "shades");
        my_fread(fh, shends, sizeof(uint8_t) * shade_bytes, "shade ends");
        shades += shade_bytes;
        shends += shade_bytes;
      }
    }

    //
    // Kmer checks
    //

    // Check top bits of kmer
    if(kmer[0] & top_word_mask)
    {
      if(num_of_oversized_kmers == 0)
      {
        report_error("oversized kmer [index: %lu]\n", num_of_kmers_read);

        for(i = 0; i < num_of_bitfields; i++)
        {
          fprintf(stderr, "  word %i: ", i);
          print_binary(stderr, kmer[i]);
          fprintf(stderr, "\n");
        }
      }

      num_of_oversized_kmers++;
    }

    // Check for all-zeros (i.e. all As kmer: AAAAAA)
    uint64_t kmer_words_or = 0;

    for(i = 0; i < num_of_bitfields; i++)
      kmer_words_or |= kmer[i];

    if(kmer_words_or == 0)
    {
      if(num_of_all_zero_kmers == 1)
      {
        report_error("more than one all 'A's kmers seen [index: %lu]\n",
                     num_of_kmers_read);
      }

      num_of_all_zero_kmers++;
    }

    // Check covg is 0 for all colours
    for(i = 0; i < num_of_colours && covgs[i] == 0; i++);

    if(i == num_of_colours)
    {
      if(num_of_zero_covg_kmers == 0)
      {
        report_warning("a kmer has zero coverage in all colours [index: %lu]\n",
                       num_of_kmers_read);
      }

      num_of_zero_covg_kmers++;
    }

    // Print?
    if(print_kmers)
    {
      binary_kmer_to_seq(kmer, seq, kmer_size, num_of_bitfields);
      printf("%s", seq);

      // Print coverages
      for(i = 0; i < num_of_colours; i++)
        printf(" %li", (unsigned long)covgs[i]);

      // Print edges
      for(i = 0; i < num_of_colours; i++)
        printf(" %s", get_edges_str(edges[i], kmer_colour_edge_str));

      if(version >= 7 && num_of_shades > 0)
      {
        for(i = 0; i < num_of_colours; i++)
        {
          putc(' ', stdout);
          print_colour_shades(shade_data + i*shade_bytes, shend_data + i*shade_bytes);
        }
      }

      putc('\n', stdout);
    }

    num_of_kmers_read++;

    for(i = 0; i < num_of_colours; i++)
      sum_of_covgs_read += covgs[i];
  }

  if(num_of_kmers_read != expected_num_of_kmers)
  {
    report_error("Expected %lu kmers, read %lu\n",
                 expected_num_of_kmers, num_of_kmers_read);
  }

  if(print_kmers && print_info)
    printf("----\n");

  // check for various reading errors
  if(errno != 0)
  {
    report_error("errno set [%i]\n", (int)errno);
  }

  int err;
  if((err = ferror(fh)) != 0)
  {
    report_error("occurred after file reading [%i]\n", err);
  }

  // For testing output
  //num_of_bitfields = 2;
  //num_of_kmers_read = 3600000000;
  //num_of_kmers_read = 12345;
  //num_of_kmers_read = 3581787;
  //num_of_kmers_read = 0;

  print_kmer_stats();

  fclose(fh);

  free(kmer);
  free(covgs);
  free(edges);
  free(shade_data);
  free(shend_data);

  buffer_free(buffer);

  if((print_kmers || parse_kmers) && print_info)
  {
    printf("----\n");
    if(num_warnings > 0 || num_errors > 0)
      printf("Warnings: %u; Errors: %u\n", num_warnings, num_errors);
    if(num_errors == 0)
      printf(num_warnings ? "Binary may be ok\n" : "Binary is valid\n");
  }

  exit(EXIT_SUCCESS);
}