Ejemplo n.º 1
0
  ZIGM::ZeroInflatedGammaModel(int number_of_zeros, int number_of_positives,
                               double sum_of_positives,
                               double sum_of_logs_of_positives)
      : gamma_(new GammaModel),
        binomial_(new BinomialModel),
        zero_threshold_(1e-8),
        log_probabilities_are_current_(false) {
    if (sum_of_positives == 0 &&
        (sum_of_logs_of_positives != 0 || number_of_positives != 0)) {
      report_error(
          "If sum_of_positives is zero, then sum_of_log_positives and "
          "number_of_positives must also be zero.");
    }
    gamma_->suf()->set(sum_of_positives, sum_of_logs_of_positives,
                       number_of_positives);
    binomial_->suf()->set(number_of_positives,
                          number_of_positives + number_of_zeros);

    if (number_of_positives > 0 && number_of_zeros > 0) {
      // The binomial model has a closed form MLE.
      binomial_->mle();
    }
    if (number_of_positives > 1) {
      try {
        gamma_->mle();
      } catch (...) {
        report_warning("Warning:  failed to set gamma model to its MLE.");
      }
    }
  }
Ejemplo n.º 2
0
static boolean
epilog(
   struct tgsi_iterate_context *iter )
{
   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
   uint file;

   /* There must be an END instruction somewhere.
    */
   if (ctx->index_of_END == ~0) {
      report_error( ctx, "Missing END instruction" );
   }

   /* Check if all declared registers were used.
    */
   for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) {
      uint i;

      for (i = 0; i < MAX_REGISTERS; i++) {
         if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) {
            report_warning( ctx, "%s[%u]: Register never used", file_names[file], i );
         }
      }
   }

   /* Print totals, if any.
    */
   if (ctx->errors || ctx->warnings)
      debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings );

   return TRUE;
}
Ejemplo n.º 3
0
static boolean
epilog(
   struct tgsi_iterate_context *iter )
{
   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;

   /* There must be an END instruction somewhere.
    */
   if (ctx->index_of_END == ~0) {
      report_error( ctx, "Missing END instruction" );
   }

   /* Check if all declared registers were used.
    */
   {
      struct cso_hash_iter iter =
         cso_hash_first_node(ctx->regs_decl);

      while (!cso_hash_iter_is_null(iter)) {
         scan_register *reg = (scan_register *)cso_hash_iter_data(iter);
         if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) {
            report_warning( ctx, "%s[%u]: Register never used",
                            file_names[reg->file], reg->indices[0] );
         }
         iter = cso_hash_iter_next(iter);
      }
   }

   /* Print totals, if any.
    */
   if (ctx->errors || ctx->warnings)
      debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings );

   return TRUE;
}
Ejemplo n.º 4
0
RuntimeWarningAdHoc::~RuntimeWarningAdHoc()
{
    try {
        report_warning(message.str().c_str(), m_messageCode);
    }
    catch (std::exception &)
    {}
}
static enum callback_status
void_to_hidden_int(struct prototype *proto, struct param *param, void *data)
{
	struct locus *loc = data;
	if (param_is_void(param)) {
		report_warning(loc->filename, loc->line_no,
			       "void parameter assumed to be 'hide(int)'");

		static struct arg_type_info *type = NULL;
		if (type == NULL)
			type = get_hidden_int();
		param_destroy(param);
		param_init_type(param, type, 0);
	}
	return CBS_CONT;
}
static int
process_line(struct protolib *plib, struct locus *loc, char *buf)
{
	char *str = buf;
	char *tmp;

	debug(3, "Reading line %d of `%s'", loc->line_no, loc->filename);
	eat_spaces(&str);

	/* A comment or empty line.  */
	if (*str == ';' || *str == 0 || *str == '\n' || *str == '#')
		return 0;

	if (strncmp(str, "typedef", 7) == 0) {
		parse_typedef(plib, loc, &str);
		return 0;
	}

	struct prototype fun;
	prototype_init(&fun);

	struct param *extra_param = NULL;
	char *proto_name = NULL;
	int own;
	fun.return_info = parse_lens(plib, loc, &str, NULL, 0, &own, NULL);
	if (fun.return_info == NULL) {
	err:
		debug(3, " Skipping line %d", loc->line_no);

		if (extra_param != NULL) {
			param_destroy(extra_param);
			free(extra_param);
		}

		prototype_destroy(&fun);
		free(proto_name);
		return -1;
	}
	fun.own_return_info = own;
	debug(4, " return_type = %d", fun.return_info->type);

	eat_spaces(&str);
	tmp = start_of_arg_sig(str);
	if (tmp == NULL) {
		report_error(loc->filename, loc->line_no, "syntax error");
		goto err;
	}
	*tmp = '\0';

	proto_name = strdup(str);
	if (proto_name == NULL) {
	oom:
		report_error(loc->filename, loc->line_no,
			     "%s", strerror(errno));
		goto err;
	}

	str = tmp + 1;
	debug(3, " name = %s", proto_name);

	int have_stop = 0;

	while (1) {
		eat_spaces(&str);
		if (*str == ')')
			break;

		if (str[0] == '+') {
			if (have_stop == 0) {
				struct param param;
				param_init_stop(&param);
				if (prototype_push_param(&fun, &param) < 0)
					goto oom;
				have_stop = 1;
			}
			str++;
		}

		int own;
		size_t param_num = prototype_num_params(&fun) - have_stop;
		struct arg_type_info *type
			= parse_lens(plib, loc, &str, &extra_param,
				     param_num, &own, NULL);
		if (type == NULL) {
			report_error(loc->filename, loc->line_no,
				     "unknown argument type");
			goto err;
		}

		struct param param;
		param_init_type(&param, type, own);
		if (prototype_push_param(&fun, &param) < 0)
			goto oom;

		eat_spaces(&str);
		if (*str == ',') {
			str++;
			continue;
		} else if (*str == ')') {
			continue;
		} else {
			if (str[strlen(str) - 1] == '\n')
				str[strlen(str) - 1] = '\0';
			report_error(loc->filename, loc->line_no,
				     "syntax error around \"%s\"", str);
			goto err;
		}
	}

	/* We used to allow void parameter as a synonym to an argument
	 * that shouldn't be displayed.  But backends really need to
	 * know the exact type that they are dealing with.  The proper
	 * way to do this these days is to use the hide lens.
	 *
	 * So if there are any voids in the parameter list, show a
	 * warning and assume that they are ints.  If there's a sole
	 * void, assume the function doesn't take any arguments.  The
	 * latter is conservative, we can drop the argument
	 * altogether, instead of fetching and then not showing it,
	 * without breaking any observable behavior.  */
	if (prototype_num_params(&fun) == 1
	    && param_is_void(prototype_get_nth_param(&fun, 0))) {
		if (0)
			/* Don't show this warning.  Pre-0.7.0
			 * ltrace.conf often used this idiom.  This
			 * should be postponed until much later, when
			 * extant uses are likely gone.  */
			report_warning(loc->filename, loc->line_no,
				       "sole void parameter ignored");
		prototype_destroy_nth_param(&fun, 0);
	} else {
		prototype_each_param(&fun, NULL, void_to_hidden_int, loc);
	}

	if (extra_param != NULL) {
		prototype_push_param(&fun, extra_param);
		free(extra_param);
		extra_param = NULL;
	}

	if (protolib_add_prototype(plib, proto_name, 1, &fun) < 0) {
		report_error(loc->filename, loc->line_no,
			     "couldn't add prototype: %s",
			     strerror(errno));
		goto err;
	}

	return 0;
}
Ejemplo n.º 7
0
static boolean
iter_instruction(
   struct tgsi_iterate_context *iter,
   struct tgsi_full_instruction *inst )
{
   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
   const struct tgsi_opcode_info *info;
   uint i;

   if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
      if (ctx->index_of_END != ~0) {
         report_error( ctx, "Too many END instructions" );
      }
      ctx->index_of_END = ctx->num_instructions;
   }

   info = tgsi_get_opcode_info( inst->Instruction.Opcode );
   if (info == NULL) {
      report_error( ctx, "(%u): Invalid instruction opcode", inst->Instruction.Opcode );
      return TRUE;
   }

   if (info->num_dst != inst->Instruction.NumDstRegs) {
      report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst );
   }
   if (info->num_src != inst->Instruction.NumSrcRegs) {
      report_error( ctx, "Invalid number of source operands, should be %u", info->num_src );
   }

   /* Check destination and source registers' validity.
    * Mark the registers as used.
    */
   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
      check_register_usage(
         ctx,
         inst->FullDstRegisters[i].DstRegister.File,
         inst->FullDstRegisters[i].DstRegister.Index,
         "destination",
         FALSE );
   }
   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
      check_register_usage(
         ctx,
         inst->FullSrcRegisters[i].SrcRegister.File,
         inst->FullSrcRegisters[i].SrcRegister.Index,
         "source",
         (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect );
      if (inst->FullSrcRegisters[i].SrcRegister.Indirect) {
         uint file;
         int index;

         file = inst->FullSrcRegisters[i].SrcRegisterInd.File;
         index = inst->FullSrcRegisters[i].SrcRegisterInd.Index;
         check_register_usage(
            ctx,
            file,
            index,
            "indirect",
            FALSE );
         if (file != TGSI_FILE_ADDRESS || index != 0)
            report_warning( ctx, "Indirect register not ADDR[0]" );
      }
   }

   ctx->num_instructions++;

   return TRUE;
}
int main(int argc, char** argv)
{
  char* filepath;

  if(argc < 2)
  {
    print_usage();
  }
  else if(argc > 2)
  {
    print_info = 0;
    print_kmers = 0;
    parse_kmers = 0;

    int i;

    for(i = 1; i < argc-1; i++)
    {
      if(strcasecmp(argv[i], "--print_info") == 0)
      {
        print_info = 1;
      }
      else if(strcasecmp(argv[i], "--print_kmers") == 0)
      {
        print_kmers = 1;
      }
      else if(strcasecmp(argv[i], "--parse_kmers") == 0)
      {
        print_info = 1;
        parse_kmers = 1;
      }
      else
        print_usage();
    }
  }

  filepath = argv[argc-1];

  if(print_info)
    printf("Loading file: %s\n", filepath);

  file_size = get_file_size(filepath);

  FILE* fh = fopen(filepath, "r");

  if(fh == NULL)
  {
    report_error("cannot open file '%s'\n", filepath);
    exit(EXIT_FAILURE);
  }

  if(file_size != -1 && print_info)
  {
    char str[31];
    bytes_to_str(file_size, 0, str);
    printf("File size: %s\n", str);
  }

  buffer = buffer_new(BUFFER_SIZE);

  /*
  // Check sizes
  printf("-- Datatypes --\n");
  printf("int: %i\n", (int)sizeof(int));
  printf("long: %i\n", (int)sizeof(long));
  printf("long long: %i\n", (int)sizeof(long long));
  printf("double: %i\n", (int)sizeof(double));
  printf("long double: %i\n", (int)sizeof(long double));
  */

  if(print_info)
    printf("----\n");

  unsigned int i;

  // Read magic word at the start of header
  char magic_word[7];
  magic_word[6] = '\0';

  my_fread(fh, magic_word, strlen("CORTEX"), "Magic word");

  if(strcmp(magic_word, "CORTEX") != 0)
  {
    fprintf(stderr, "Magic word doesn't match 'CORTEX' (start)\n");
    exit(EXIT_FAILURE);
  }

  // Read version number
  my_fread(fh, &version, sizeof(uint32_t), "binary version");
  my_fread(fh, &kmer_size, sizeof(uint32_t), "kmer size");
  my_fread(fh, &num_of_bitfields, sizeof(uint32_t), "number of bitfields");
  my_fread(fh, &num_of_colours, sizeof(uint32_t), "number of colours");

  if(print_info)
  {
    printf("binary version: %i\n", (int)version);
    printf("kmer size: %i\n", (int)kmer_size);
    printf("bitfields: %i\n", (int)num_of_bitfields);
    printf("colours: %i\n", (int)num_of_colours);
  }

  if(version >= 7)
  {
    my_fread(fh, &expected_num_of_kmers, sizeof(uint64_t), "number of kmers");
    my_fread(fh, &num_of_shades, sizeof(uint32_t), "number of shades");

    if(print_info)
    {
      char tmp[256];
      printf("kmers: %s\n", ulong_to_str(expected_num_of_kmers,tmp));
      printf("shades: %i\n", (int)num_of_shades);
    }
  }

  // Checks

  if(version > 7 || version < 4)
    report_error("Sorry, we only support binary versions 4, 5, 6 & 7\n");

  if(kmer_size % 2 == 0)
    report_error("kmer size is not an odd number\n");

  if(kmer_size < 3)
    report_error("kmer size is less than three\n");

  if(num_of_bitfields * 32 < kmer_size)
    report_error("Not enough bitfields for kmer size\n");

  if((num_of_bitfields-1)*32 >= kmer_size)
    report_error("using more than the minimum number of bitfields\n");

  if(num_of_colours == 0)
    report_error("number of colours is zero\n");

  if(num_of_shades != 0 && (num_of_shades & (num_of_shades-1)))
    report_error("number of shades is not a power of 2\n");

  //

  // Read array of mean read lengths per colour
  uint32_t *mean_read_lens_per_colour = malloc(num_of_colours*sizeof(uint32_t));

  my_fread(fh, mean_read_lens_per_colour, sizeof(uint32_t) * num_of_colours,
           "mean read length for each colour");

  // Read array of total seq loaded per colour
  uint64_t *total_seq_loaded_per_colour = malloc(num_of_colours*sizeof(uint64_t));

  my_fread(fh, total_seq_loaded_per_colour, sizeof(uint64_t) * num_of_colours,
           "total sequance loaded for each colour");

  for(i = 0; i < num_of_colours; i++)
  {
    sum_of_seq_loaded += total_seq_loaded_per_colour[i];
  }

  if(version >= 6)
  {
    sample_names = malloc(sizeof(char*) * num_of_colours);

    for(i = 0; i < num_of_colours; i++)
    {
      uint32_t str_length;
      my_fread(fh, &str_length, sizeof(uint32_t), "sample name length");

      if(str_length == 0)
      {
        sample_names[i] = NULL;
      }
      else
      {
        sample_names[i] = (char*)malloc((str_length+1) * sizeof(char));
        my_fread(fh, sample_names[i], str_length, "sample name");
        sample_names[i][str_length] = '\0';

        // Check sample length is as long as we were told
        size_t sample_name_len = strlen(sample_names[i]);

        if(sample_name_len != str_length)
        {
          // Premature \0 in string
          report_warning("Sample %i name has length %lu but is only %lu chars "
                         "long (premature '\\0')\n",
                         i, str_length, sample_name_len);
        }
      }
    }

    seq_error_rates = malloc(sizeof(long double) * num_of_colours);
    my_fread(fh, seq_error_rates, sizeof(long double) * num_of_colours,
             "seq error rates");

    cleaning_infos = malloc(sizeof(CleaningInfo) * num_of_colours);

    for(i = 0; i < num_of_colours; i++)
    {
      my_fread(fh, &(cleaning_infos[i].tip_cleaning), 1, "tip cleaning");
      my_fread(fh, &(cleaning_infos[i].remove_low_covg_supernodes), 1,
               "remove low covg supernodes");
      my_fread(fh, &(cleaning_infos[i].remove_low_covg_kmers), 1,
               "remove low covg kmers");
      my_fread(fh, &(cleaning_infos[i].cleaned_against_graph), 1,
               "cleaned against graph");

      my_fread(fh, &(cleaning_infos[i].remove_low_covg_supernodes_thresh),
               sizeof(int32_t), "remove low covg supernode threshold");
    
      my_fread(fh, &(cleaning_infos[i].remove_low_covg_kmers_thresh),
               sizeof(int32_t), "remove low covg kmer threshold");

      if(version > 6)
      {
        if(cleaning_infos[i].remove_low_covg_supernodes_thresh < 0)
        {
          report_warning("Binary header gives sample %i a cleaning threshold of "
                         "%i for supernodes (should be >= 0)\n",
                         i, cleaning_infos[i].remove_low_covg_supernodes_thresh);
        }
        if(cleaning_infos[i].remove_low_covg_kmers_thresh < 0)
        {
          report_warning("Binary header gives sample %i a cleaning threshold of "
                         "%i for kmers (should be >= 0)\n",
                         i, cleaning_infos[i].remove_low_covg_kmers_thresh);
        }
      }

      if(!cleaning_infos[i].remove_low_covg_supernodes &&
         cleaning_infos[i].remove_low_covg_supernodes_thresh > 0)
      {
        report_warning("Binary header gives sample %i a cleaning threshold of "
                       "%i for supernodes when no cleaning was performed\n",
                       i, cleaning_infos[i].remove_low_covg_supernodes_thresh);
      }

      if(!cleaning_infos[i].remove_low_covg_kmers &&
         cleaning_infos[i].remove_low_covg_kmers_thresh > 0)
      {
        report_warning("Binary header gives sample %i a cleaning threshold of "
                       "%i for kmers when no cleaning was performed\n",
                       i, cleaning_infos[i].remove_low_covg_kmers_thresh);
      }

      uint32_t name_length;
      my_fread(fh, &name_length, sizeof(uint32_t), "graph name length");

      if(name_length == 0)
      {
        cleaning_infos[i].name_of_graph_clean_against = NULL;
      }
      else
      {
        cleaning_infos[i].name_of_graph_clean_against
          = (char*)malloc((name_length + 1) * sizeof(char));

        my_fread(fh, cleaning_infos[i].name_of_graph_clean_against,
                 name_length, "graph name length");

        cleaning_infos[i].name_of_graph_clean_against[name_length] = '\0';
      
        // Check sample length is as long as we were told
        size_t cleaned_name_len
          = strlen(cleaning_infos[i].name_of_graph_clean_against);

        if(cleaned_name_len != name_length)
        {
          // Premature \0 in string
          report_warning("Sample [%i] cleaned-against-name has length %u but is "
                         "only %u chars long (premature '\\0')\n",
                         i, name_length, cleaned_name_len);
        }
      }
    }
  }

  // Print colour info

  if(print_info)
  {
    for(i = 0; i < num_of_colours; i++)
    {
      printf("-- Colour %i --\n", i);

      if(version >= 6)
      {
        // Version 6 only output
        printf("  sample name: '%s'\n", sample_names[i]);
      }

      char tmp[32];

      printf("  mean read length: %u\n",
             (unsigned int)mean_read_lens_per_colour[i]);
      printf("  total sequence loaded: %s\n",
             ulong_to_str(total_seq_loaded_per_colour[i], tmp));
      
      if(version >= 6)
      {
        // Version 6 only output
        printf("  sequence error rate: %Lf\n", seq_error_rates[i]);

        printf("  tip clipping: %s\n",
               (cleaning_infos[i].tip_cleaning == 0 ? "no" : "yes"));

        printf("  remove low coverage supernodes: %s [threshold: %i]\n",
               cleaning_infos[i].remove_low_covg_supernodes ? "yes" : "no",
               cleaning_infos[i].remove_low_covg_supernodes_thresh);

        printf("  remove low coverage kmers: %s [threshold: %i]\n",
               cleaning_infos[i].remove_low_covg_kmers ? "yes" : "no",
               cleaning_infos[i].remove_low_covg_kmers_thresh);

        printf("  cleaned against graph: %s [against: '%s']\n",
               cleaning_infos[i].cleaned_against_graph ? "yes" : "no",
               (cleaning_infos[i].name_of_graph_clean_against == NULL
                  ? "" : cleaning_infos[i].name_of_graph_clean_against));
      }
    }

    printf("--\n");
  }

  // Read magic word at the end of header
  my_fread(fh, magic_word, strlen("CORTEX"), "magic word (end)");

  if(strcmp(magic_word, "CORTEX") != 0)
  {
    report_error("magic word doesn't match 'CORTEX' (end): '%s'\n", magic_word);
    exit(EXIT_FAILURE);
  }

  // Calculate number of kmers
  if(version < 7 && file_size != -1)
  {
    size_t bytes_remaining = file_size - num_bytes_read;
    size_t num_bytes_per_kmer = sizeof(uint64_t) * num_of_bitfields +
                                sizeof(uint32_t) * num_of_colours +
                                sizeof(uint8_t) * num_of_colours;

    expected_num_of_kmers = bytes_remaining / num_bytes_per_kmer;

    size_t excess = bytes_remaining - (expected_num_of_kmers * num_bytes_per_kmer);

    if(excess > 0)
    {
      report_error("Excess bytes. Bytes:\n  file size: %lu;\n  for kmers: %lu;"
                   "\n  num kmers: %lu;\n  per kmer: %lu;\n  excess: %lu\n",
                   file_size, bytes_remaining, expected_num_of_kmers,
                   num_bytes_per_kmer, excess);
    }
  }

  if(print_info)
  {
    char num_str[50];
    printf("Expected number of kmers: %s\n",
           ulong_to_str(expected_num_of_kmers, num_str));
    printf("----\n");
  }

  // Finished parsing header
  if(!parse_kmers && !print_kmers)
  {
    print_kmer_stats();
    fclose(fh);
    exit(EXIT_SUCCESS);
  }


  shade_bytes = num_of_shades >> 3;
  size_t shade_array_bytes = shade_bytes * num_of_colours;

  // Kmer data
  uint64_t* kmer = malloc(sizeof(uint64_t) * num_of_bitfields);
  uint32_t* covgs = malloc(sizeof(uint32_t) * num_of_colours);
  uint8_t* edges = malloc(sizeof(uint8_t) * num_of_colours);
  uint8_t* shade_data = malloc(shade_array_bytes);
  uint8_t* shend_data = malloc(shade_array_bytes);

  if(kmer == NULL || covgs == NULL || edges == NULL ||
     shade_data == NULL || shend_data == NULL) {
    report_error("Out of memory");
    exit(EXIT_SUCCESS);
  }

  // Convert values to strings
  char* seq = malloc(sizeof(char) * kmer_size);
  char kmer_colour_edge_str[9];

  // Check top word of each kmer
  int bits_in_top_word = 2 * (kmer_size % 32);
  uint64_t top_word_mask = (~(uint64_t)0) << bits_in_top_word;

  size_t num_bytes_per_bkmer = sizeof(uint64_t)*num_of_bitfields;

  // Read kmer in bytes so we can see if there are extra bytes at the end of
  // the file
  size_t bytes_read;

  // while((bytes_read = fread(kmer, 1, num_bytes_per_bkmer, fh)) > 0)
  while((bytes_read = fread_buf(fh, kmer, num_bytes_per_bkmer, buffer)) > 0)
  {
    if(bytes_read != num_bytes_per_bkmer)
    {
      report_error("unusual extra bytes [%i] at the end of the file\n",
                   (int)bytes_read);
      break;
    }
    num_bytes_read += bytes_read;

    my_fread(fh, covgs, sizeof(uint32_t) * num_of_colours, "kmer covg");
    my_fread(fh, edges, sizeof(uint8_t) * num_of_colours, "kmer edges");

    if(version >= 7)
    {
      uint8_t *shades = shade_data, *shends = shend_data;
      for(i = 0; i < num_of_colours; i++)
      {
        my_fread(fh, shades, sizeof(uint8_t) * shade_bytes, "shades");
        my_fread(fh, shends, sizeof(uint8_t) * shade_bytes, "shade ends");
        shades += shade_bytes;
        shends += shade_bytes;
      }
    }

    //
    // Kmer checks
    //

    // Check top bits of kmer
    if(kmer[0] & top_word_mask)
    {
      if(num_of_oversized_kmers == 0)
      {
        report_error("oversized kmer [index: %lu]\n", num_of_kmers_read);

        for(i = 0; i < num_of_bitfields; i++)
        {
          fprintf(stderr, "  word %i: ", i);
          print_binary(stderr, kmer[i]);
          fprintf(stderr, "\n");
        }
      }

      num_of_oversized_kmers++;
    }

    // Check for all-zeros (i.e. all As kmer: AAAAAA)
    uint64_t kmer_words_or = 0;

    for(i = 0; i < num_of_bitfields; i++)
      kmer_words_or |= kmer[i];

    if(kmer_words_or == 0)
    {
      if(num_of_all_zero_kmers == 1)
      {
        report_error("more than one all 'A's kmers seen [index: %lu]\n",
                     num_of_kmers_read);
      }

      num_of_all_zero_kmers++;
    }

    // Check covg is 0 for all colours
    for(i = 0; i < num_of_colours && covgs[i] == 0; i++);

    if(i == num_of_colours)
    {
      if(num_of_zero_covg_kmers == 0)
      {
        report_warning("a kmer has zero coverage in all colours [index: %lu]\n",
                       num_of_kmers_read);
      }

      num_of_zero_covg_kmers++;
    }

    // Print?
    if(print_kmers)
    {
      binary_kmer_to_seq(kmer, seq, kmer_size, num_of_bitfields);
      printf("%s", seq);

      // Print coverages
      for(i = 0; i < num_of_colours; i++)
        printf(" %li", (unsigned long)covgs[i]);

      // Print edges
      for(i = 0; i < num_of_colours; i++)
        printf(" %s", get_edges_str(edges[i], kmer_colour_edge_str));

      if(version >= 7 && num_of_shades > 0)
      {
        for(i = 0; i < num_of_colours; i++)
        {
          putc(' ', stdout);
          print_colour_shades(shade_data + i*shade_bytes, shend_data + i*shade_bytes);
        }
      }

      putc('\n', stdout);
    }

    num_of_kmers_read++;

    for(i = 0; i < num_of_colours; i++)
      sum_of_covgs_read += covgs[i];
  }

  if(num_of_kmers_read != expected_num_of_kmers)
  {
    report_error("Expected %lu kmers, read %lu\n",
                 expected_num_of_kmers, num_of_kmers_read);
  }

  if(print_kmers && print_info)
    printf("----\n");

  // check for various reading errors
  if(errno != 0)
  {
    report_error("errno set [%i]\n", (int)errno);
  }

  int err;
  if((err = ferror(fh)) != 0)
  {
    report_error("occurred after file reading [%i]\n", err);
  }

  // For testing output
  //num_of_bitfields = 2;
  //num_of_kmers_read = 3600000000;
  //num_of_kmers_read = 12345;
  //num_of_kmers_read = 3581787;
  //num_of_kmers_read = 0;

  print_kmer_stats();

  fclose(fh);

  free(kmer);
  free(covgs);
  free(edges);
  free(shade_data);
  free(shend_data);

  buffer_free(buffer);

  if((print_kmers || parse_kmers) && print_info)
  {
    printf("----\n");
    if(num_warnings > 0 || num_errors > 0)
      printf("Warnings: %u; Errors: %u\n", num_warnings, num_errors);
    if(num_errors == 0)
      printf(num_warnings ? "Binary may be ok\n" : "Binary is valid\n");
  }

  exit(EXIT_SUCCESS);
}
static void print_kmer_stats()
{
  char num_str[50];

  if(num_of_all_zero_kmers > 1)
  {
    report_error("%s all-zero-kmers seen\n",
                 ulong_to_str(num_of_all_zero_kmers, num_str));
  }

  if(num_of_oversized_kmers > 0)
  {
    report_error("%s oversized kmers seen\n",
                 ulong_to_str(num_of_oversized_kmers, num_str));
  }

  if(num_of_zero_covg_kmers > 0)
  {
    report_warning("%s kmers have no coverage in any colour\n",
                   ulong_to_str(num_of_zero_covg_kmers, num_str));
  }

  if((print_kmers || parse_kmers) && print_info)
  {
    printf("kmers read: %s\n", ulong_to_str(num_of_kmers_read, num_str));
    printf("covgs read: %s\n", ulong_to_str(sum_of_covgs_read, num_str));
    printf("seq loaded: %s\n", ulong_to_str(sum_of_seq_loaded, num_str));
  }

  if(print_info)
  {
    // Memory calculations
    // use expected number of kmers if we haven't read the whole file
    unsigned long kmer_count
      = (print_kmers || parse_kmers ? num_of_kmers_read : expected_num_of_kmers);

    // Number of hash table entries is 2^mem_height * mem_width
    // Aim for 80% occupancy once loaded
    float extra_space = 10.0/8;
    unsigned long hash_capacity = extra_space * kmer_count;

    // mem_width must be within these boundaries
    unsigned int min_mem_width = 5;
    unsigned int max_mem_width = 50;
    unsigned int min_mem_height = 12;
    // min mem usage = 2^12 * 5 = 20,480 entries = 320.0 KB with k=31,cols=1

    unsigned long mem_height = min_mem_height;
    unsigned long mem_width = max_mem_width;
    unsigned long hash_entries = (0x1UL << mem_height) * mem_width;

    if(hash_capacity > hash_entries)
    {
      // Resize
      mem_height = Log2((double)hash_capacity / (max_mem_width-1))+0.99;
      mem_height = MIN2(mem_height, 32);
      mem_height = MAX2(mem_height, min_mem_height);

      mem_width = hash_capacity / (0x1UL << mem_height) + 1;

      printf("mem_width: %lu; mem_height: %lu;\n", mem_width, mem_height);

      if(mem_width < min_mem_width)
      {
        // re-calculate mem_height
        mem_height = Log2((double)hash_capacity / min_mem_width)+0.99;
        mem_height = MIN2(mem_height, 32);
        mem_height = MAX2(mem_height, min_mem_height);
        mem_width = hash_capacity / (0x1UL << mem_height) + 1;
        mem_width = MAX2(mem_width, min_mem_width);
      }

      hash_entries = (0x1UL << mem_height) * mem_width;
    }

    char min_mem_required[50];
    char rec_mem_required[50];

    set_memory_required_str(kmer_count, min_mem_required);
    set_memory_required_str(hash_entries, rec_mem_required);

    printf("Memory required: %s\n", min_mem_required);
    printf("Memory suggested: --mem_width %lu --mem_height %lu\n",
           mem_width, mem_height);

    char hash_entries_numstr[50];
    ulong_to_str(hash_entries, hash_entries_numstr);

    printf("  [%s entries; %s memory]\n", hash_entries_numstr, rec_mem_required);
  }
}