ZIGM::ZeroInflatedGammaModel(int number_of_zeros, int number_of_positives, double sum_of_positives, double sum_of_logs_of_positives) : gamma_(new GammaModel), binomial_(new BinomialModel), zero_threshold_(1e-8), log_probabilities_are_current_(false) { if (sum_of_positives == 0 && (sum_of_logs_of_positives != 0 || number_of_positives != 0)) { report_error( "If sum_of_positives is zero, then sum_of_log_positives and " "number_of_positives must also be zero."); } gamma_->suf()->set(sum_of_positives, sum_of_logs_of_positives, number_of_positives); binomial_->suf()->set(number_of_positives, number_of_positives + number_of_zeros); if (number_of_positives > 0 && number_of_zeros > 0) { // The binomial model has a closed form MLE. binomial_->mle(); } if (number_of_positives > 1) { try { gamma_->mle(); } catch (...) { report_warning("Warning: failed to set gamma model to its MLE."); } } }
static boolean epilog( struct tgsi_iterate_context *iter ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; uint file; /* There must be an END instruction somewhere. */ if (ctx->index_of_END == ~0) { report_error( ctx, "Missing END instruction" ); } /* Check if all declared registers were used. */ for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { uint i; for (i = 0; i < MAX_REGISTERS; i++) { if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { report_warning( ctx, "%s[%u]: Register never used", file_names[file], i ); } } } /* Print totals, if any. */ if (ctx->errors || ctx->warnings) debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings ); return TRUE; }
static boolean epilog( struct tgsi_iterate_context *iter ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; /* There must be an END instruction somewhere. */ if (ctx->index_of_END == ~0) { report_error( ctx, "Missing END instruction" ); } /* Check if all declared registers were used. */ { struct cso_hash_iter iter = cso_hash_first_node(ctx->regs_decl); while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) { report_warning( ctx, "%s[%u]: Register never used", file_names[reg->file], reg->indices[0] ); } iter = cso_hash_iter_next(iter); } } /* Print totals, if any. */ if (ctx->errors || ctx->warnings) debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings ); return TRUE; }
RuntimeWarningAdHoc::~RuntimeWarningAdHoc() { try { report_warning(message.str().c_str(), m_messageCode); } catch (std::exception &) {} }
static enum callback_status void_to_hidden_int(struct prototype *proto, struct param *param, void *data) { struct locus *loc = data; if (param_is_void(param)) { report_warning(loc->filename, loc->line_no, "void parameter assumed to be 'hide(int)'"); static struct arg_type_info *type = NULL; if (type == NULL) type = get_hidden_int(); param_destroy(param); param_init_type(param, type, 0); } return CBS_CONT; }
static int process_line(struct protolib *plib, struct locus *loc, char *buf) { char *str = buf; char *tmp; debug(3, "Reading line %d of `%s'", loc->line_no, loc->filename); eat_spaces(&str); /* A comment or empty line. */ if (*str == ';' || *str == 0 || *str == '\n' || *str == '#') return 0; if (strncmp(str, "typedef", 7) == 0) { parse_typedef(plib, loc, &str); return 0; } struct prototype fun; prototype_init(&fun); struct param *extra_param = NULL; char *proto_name = NULL; int own; fun.return_info = parse_lens(plib, loc, &str, NULL, 0, &own, NULL); if (fun.return_info == NULL) { err: debug(3, " Skipping line %d", loc->line_no); if (extra_param != NULL) { param_destroy(extra_param); free(extra_param); } prototype_destroy(&fun); free(proto_name); return -1; } fun.own_return_info = own; debug(4, " return_type = %d", fun.return_info->type); eat_spaces(&str); tmp = start_of_arg_sig(str); if (tmp == NULL) { report_error(loc->filename, loc->line_no, "syntax error"); goto err; } *tmp = '\0'; proto_name = strdup(str); if (proto_name == NULL) { oom: report_error(loc->filename, loc->line_no, "%s", strerror(errno)); goto err; } str = tmp + 1; debug(3, " name = %s", proto_name); int have_stop = 0; while (1) { eat_spaces(&str); if (*str == ')') break; if (str[0] == '+') { if (have_stop == 0) { struct param param; param_init_stop(¶m); if (prototype_push_param(&fun, ¶m) < 0) goto oom; have_stop = 1; } str++; } int own; size_t param_num = prototype_num_params(&fun) - have_stop; struct arg_type_info *type = parse_lens(plib, loc, &str, &extra_param, param_num, &own, NULL); if (type == NULL) { report_error(loc->filename, loc->line_no, "unknown argument type"); goto err; } struct param param; param_init_type(¶m, type, own); if (prototype_push_param(&fun, ¶m) < 0) goto oom; eat_spaces(&str); if (*str == ',') { str++; continue; } else if (*str == ')') { continue; } else { if (str[strlen(str) - 1] == '\n') str[strlen(str) - 1] = '\0'; report_error(loc->filename, loc->line_no, "syntax error around \"%s\"", str); goto err; } } /* We used to allow void parameter as a synonym to an argument * that shouldn't be displayed. But backends really need to * know the exact type that they are dealing with. The proper * way to do this these days is to use the hide lens. * * So if there are any voids in the parameter list, show a * warning and assume that they are ints. If there's a sole * void, assume the function doesn't take any arguments. The * latter is conservative, we can drop the argument * altogether, instead of fetching and then not showing it, * without breaking any observable behavior. */ if (prototype_num_params(&fun) == 1 && param_is_void(prototype_get_nth_param(&fun, 0))) { if (0) /* Don't show this warning. Pre-0.7.0 * ltrace.conf often used this idiom. This * should be postponed until much later, when * extant uses are likely gone. */ report_warning(loc->filename, loc->line_no, "sole void parameter ignored"); prototype_destroy_nth_param(&fun, 0); } else { prototype_each_param(&fun, NULL, void_to_hidden_int, loc); } if (extra_param != NULL) { prototype_push_param(&fun, extra_param); free(extra_param); extra_param = NULL; } if (protolib_add_prototype(plib, proto_name, 1, &fun) < 0) { report_error(loc->filename, loc->line_no, "couldn't add prototype: %s", strerror(errno)); goto err; } return 0; }
static boolean iter_instruction( struct tgsi_iterate_context *iter, struct tgsi_full_instruction *inst ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; const struct tgsi_opcode_info *info; uint i; if (inst->Instruction.Opcode == TGSI_OPCODE_END) { if (ctx->index_of_END != ~0) { report_error( ctx, "Too many END instructions" ); } ctx->index_of_END = ctx->num_instructions; } info = tgsi_get_opcode_info( inst->Instruction.Opcode ); if (info == NULL) { report_error( ctx, "(%u): Invalid instruction opcode", inst->Instruction.Opcode ); return TRUE; } if (info->num_dst != inst->Instruction.NumDstRegs) { report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst ); } if (info->num_src != inst->Instruction.NumSrcRegs) { report_error( ctx, "Invalid number of source operands, should be %u", info->num_src ); } /* Check destination and source registers' validity. * Mark the registers as used. */ for (i = 0; i < inst->Instruction.NumDstRegs; i++) { check_register_usage( ctx, inst->FullDstRegisters[i].DstRegister.File, inst->FullDstRegisters[i].DstRegister.Index, "destination", FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { check_register_usage( ctx, inst->FullSrcRegisters[i].SrcRegister.File, inst->FullSrcRegisters[i].SrcRegister.Index, "source", (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { uint file; int index; file = inst->FullSrcRegisters[i].SrcRegisterInd.File; index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; check_register_usage( ctx, file, index, "indirect", FALSE ); if (file != TGSI_FILE_ADDRESS || index != 0) report_warning( ctx, "Indirect register not ADDR[0]" ); } } ctx->num_instructions++; return TRUE; }
int main(int argc, char** argv) { char* filepath; if(argc < 2) { print_usage(); } else if(argc > 2) { print_info = 0; print_kmers = 0; parse_kmers = 0; int i; for(i = 1; i < argc-1; i++) { if(strcasecmp(argv[i], "--print_info") == 0) { print_info = 1; } else if(strcasecmp(argv[i], "--print_kmers") == 0) { print_kmers = 1; } else if(strcasecmp(argv[i], "--parse_kmers") == 0) { print_info = 1; parse_kmers = 1; } else print_usage(); } } filepath = argv[argc-1]; if(print_info) printf("Loading file: %s\n", filepath); file_size = get_file_size(filepath); FILE* fh = fopen(filepath, "r"); if(fh == NULL) { report_error("cannot open file '%s'\n", filepath); exit(EXIT_FAILURE); } if(file_size != -1 && print_info) { char str[31]; bytes_to_str(file_size, 0, str); printf("File size: %s\n", str); } buffer = buffer_new(BUFFER_SIZE); /* // Check sizes printf("-- Datatypes --\n"); printf("int: %i\n", (int)sizeof(int)); printf("long: %i\n", (int)sizeof(long)); printf("long long: %i\n", (int)sizeof(long long)); printf("double: %i\n", (int)sizeof(double)); printf("long double: %i\n", (int)sizeof(long double)); */ if(print_info) printf("----\n"); unsigned int i; // Read magic word at the start of header char magic_word[7]; magic_word[6] = '\0'; my_fread(fh, magic_word, strlen("CORTEX"), "Magic word"); if(strcmp(magic_word, "CORTEX") != 0) { fprintf(stderr, "Magic word doesn't match 'CORTEX' (start)\n"); exit(EXIT_FAILURE); } // Read version number my_fread(fh, &version, sizeof(uint32_t), "binary version"); my_fread(fh, &kmer_size, sizeof(uint32_t), "kmer size"); my_fread(fh, &num_of_bitfields, sizeof(uint32_t), "number of bitfields"); my_fread(fh, &num_of_colours, sizeof(uint32_t), "number of colours"); if(print_info) { printf("binary version: %i\n", (int)version); printf("kmer size: %i\n", (int)kmer_size); printf("bitfields: %i\n", (int)num_of_bitfields); printf("colours: %i\n", (int)num_of_colours); } if(version >= 7) { my_fread(fh, &expected_num_of_kmers, sizeof(uint64_t), "number of kmers"); my_fread(fh, &num_of_shades, sizeof(uint32_t), "number of shades"); if(print_info) { char tmp[256]; printf("kmers: %s\n", ulong_to_str(expected_num_of_kmers,tmp)); printf("shades: %i\n", (int)num_of_shades); } } // Checks if(version > 7 || version < 4) report_error("Sorry, we only support binary versions 4, 5, 6 & 7\n"); if(kmer_size % 2 == 0) report_error("kmer size is not an odd number\n"); if(kmer_size < 3) report_error("kmer size is less than three\n"); if(num_of_bitfields * 32 < kmer_size) report_error("Not enough bitfields for kmer size\n"); if((num_of_bitfields-1)*32 >= kmer_size) report_error("using more than the minimum number of bitfields\n"); if(num_of_colours == 0) report_error("number of colours is zero\n"); if(num_of_shades != 0 && (num_of_shades & (num_of_shades-1))) report_error("number of shades is not a power of 2\n"); // // Read array of mean read lengths per colour uint32_t *mean_read_lens_per_colour = malloc(num_of_colours*sizeof(uint32_t)); my_fread(fh, mean_read_lens_per_colour, sizeof(uint32_t) * num_of_colours, "mean read length for each colour"); // Read array of total seq loaded per colour uint64_t *total_seq_loaded_per_colour = malloc(num_of_colours*sizeof(uint64_t)); my_fread(fh, total_seq_loaded_per_colour, sizeof(uint64_t) * num_of_colours, "total sequance loaded for each colour"); for(i = 0; i < num_of_colours; i++) { sum_of_seq_loaded += total_seq_loaded_per_colour[i]; } if(version >= 6) { sample_names = malloc(sizeof(char*) * num_of_colours); for(i = 0; i < num_of_colours; i++) { uint32_t str_length; my_fread(fh, &str_length, sizeof(uint32_t), "sample name length"); if(str_length == 0) { sample_names[i] = NULL; } else { sample_names[i] = (char*)malloc((str_length+1) * sizeof(char)); my_fread(fh, sample_names[i], str_length, "sample name"); sample_names[i][str_length] = '\0'; // Check sample length is as long as we were told size_t sample_name_len = strlen(sample_names[i]); if(sample_name_len != str_length) { // Premature \0 in string report_warning("Sample %i name has length %lu but is only %lu chars " "long (premature '\\0')\n", i, str_length, sample_name_len); } } } seq_error_rates = malloc(sizeof(long double) * num_of_colours); my_fread(fh, seq_error_rates, sizeof(long double) * num_of_colours, "seq error rates"); cleaning_infos = malloc(sizeof(CleaningInfo) * num_of_colours); for(i = 0; i < num_of_colours; i++) { my_fread(fh, &(cleaning_infos[i].tip_cleaning), 1, "tip cleaning"); my_fread(fh, &(cleaning_infos[i].remove_low_covg_supernodes), 1, "remove low covg supernodes"); my_fread(fh, &(cleaning_infos[i].remove_low_covg_kmers), 1, "remove low covg kmers"); my_fread(fh, &(cleaning_infos[i].cleaned_against_graph), 1, "cleaned against graph"); my_fread(fh, &(cleaning_infos[i].remove_low_covg_supernodes_thresh), sizeof(int32_t), "remove low covg supernode threshold"); my_fread(fh, &(cleaning_infos[i].remove_low_covg_kmers_thresh), sizeof(int32_t), "remove low covg kmer threshold"); if(version > 6) { if(cleaning_infos[i].remove_low_covg_supernodes_thresh < 0) { report_warning("Binary header gives sample %i a cleaning threshold of " "%i for supernodes (should be >= 0)\n", i, cleaning_infos[i].remove_low_covg_supernodes_thresh); } if(cleaning_infos[i].remove_low_covg_kmers_thresh < 0) { report_warning("Binary header gives sample %i a cleaning threshold of " "%i for kmers (should be >= 0)\n", i, cleaning_infos[i].remove_low_covg_kmers_thresh); } } if(!cleaning_infos[i].remove_low_covg_supernodes && cleaning_infos[i].remove_low_covg_supernodes_thresh > 0) { report_warning("Binary header gives sample %i a cleaning threshold of " "%i for supernodes when no cleaning was performed\n", i, cleaning_infos[i].remove_low_covg_supernodes_thresh); } if(!cleaning_infos[i].remove_low_covg_kmers && cleaning_infos[i].remove_low_covg_kmers_thresh > 0) { report_warning("Binary header gives sample %i a cleaning threshold of " "%i for kmers when no cleaning was performed\n", i, cleaning_infos[i].remove_low_covg_kmers_thresh); } uint32_t name_length; my_fread(fh, &name_length, sizeof(uint32_t), "graph name length"); if(name_length == 0) { cleaning_infos[i].name_of_graph_clean_against = NULL; } else { cleaning_infos[i].name_of_graph_clean_against = (char*)malloc((name_length + 1) * sizeof(char)); my_fread(fh, cleaning_infos[i].name_of_graph_clean_against, name_length, "graph name length"); cleaning_infos[i].name_of_graph_clean_against[name_length] = '\0'; // Check sample length is as long as we were told size_t cleaned_name_len = strlen(cleaning_infos[i].name_of_graph_clean_against); if(cleaned_name_len != name_length) { // Premature \0 in string report_warning("Sample [%i] cleaned-against-name has length %u but is " "only %u chars long (premature '\\0')\n", i, name_length, cleaned_name_len); } } } } // Print colour info if(print_info) { for(i = 0; i < num_of_colours; i++) { printf("-- Colour %i --\n", i); if(version >= 6) { // Version 6 only output printf(" sample name: '%s'\n", sample_names[i]); } char tmp[32]; printf(" mean read length: %u\n", (unsigned int)mean_read_lens_per_colour[i]); printf(" total sequence loaded: %s\n", ulong_to_str(total_seq_loaded_per_colour[i], tmp)); if(version >= 6) { // Version 6 only output printf(" sequence error rate: %Lf\n", seq_error_rates[i]); printf(" tip clipping: %s\n", (cleaning_infos[i].tip_cleaning == 0 ? "no" : "yes")); printf(" remove low coverage supernodes: %s [threshold: %i]\n", cleaning_infos[i].remove_low_covg_supernodes ? "yes" : "no", cleaning_infos[i].remove_low_covg_supernodes_thresh); printf(" remove low coverage kmers: %s [threshold: %i]\n", cleaning_infos[i].remove_low_covg_kmers ? "yes" : "no", cleaning_infos[i].remove_low_covg_kmers_thresh); printf(" cleaned against graph: %s [against: '%s']\n", cleaning_infos[i].cleaned_against_graph ? "yes" : "no", (cleaning_infos[i].name_of_graph_clean_against == NULL ? "" : cleaning_infos[i].name_of_graph_clean_against)); } } printf("--\n"); } // Read magic word at the end of header my_fread(fh, magic_word, strlen("CORTEX"), "magic word (end)"); if(strcmp(magic_word, "CORTEX") != 0) { report_error("magic word doesn't match 'CORTEX' (end): '%s'\n", magic_word); exit(EXIT_FAILURE); } // Calculate number of kmers if(version < 7 && file_size != -1) { size_t bytes_remaining = file_size - num_bytes_read; size_t num_bytes_per_kmer = sizeof(uint64_t) * num_of_bitfields + sizeof(uint32_t) * num_of_colours + sizeof(uint8_t) * num_of_colours; expected_num_of_kmers = bytes_remaining / num_bytes_per_kmer; size_t excess = bytes_remaining - (expected_num_of_kmers * num_bytes_per_kmer); if(excess > 0) { report_error("Excess bytes. Bytes:\n file size: %lu;\n for kmers: %lu;" "\n num kmers: %lu;\n per kmer: %lu;\n excess: %lu\n", file_size, bytes_remaining, expected_num_of_kmers, num_bytes_per_kmer, excess); } } if(print_info) { char num_str[50]; printf("Expected number of kmers: %s\n", ulong_to_str(expected_num_of_kmers, num_str)); printf("----\n"); } // Finished parsing header if(!parse_kmers && !print_kmers) { print_kmer_stats(); fclose(fh); exit(EXIT_SUCCESS); } shade_bytes = num_of_shades >> 3; size_t shade_array_bytes = shade_bytes * num_of_colours; // Kmer data uint64_t* kmer = malloc(sizeof(uint64_t) * num_of_bitfields); uint32_t* covgs = malloc(sizeof(uint32_t) * num_of_colours); uint8_t* edges = malloc(sizeof(uint8_t) * num_of_colours); uint8_t* shade_data = malloc(shade_array_bytes); uint8_t* shend_data = malloc(shade_array_bytes); if(kmer == NULL || covgs == NULL || edges == NULL || shade_data == NULL || shend_data == NULL) { report_error("Out of memory"); exit(EXIT_SUCCESS); } // Convert values to strings char* seq = malloc(sizeof(char) * kmer_size); char kmer_colour_edge_str[9]; // Check top word of each kmer int bits_in_top_word = 2 * (kmer_size % 32); uint64_t top_word_mask = (~(uint64_t)0) << bits_in_top_word; size_t num_bytes_per_bkmer = sizeof(uint64_t)*num_of_bitfields; // Read kmer in bytes so we can see if there are extra bytes at the end of // the file size_t bytes_read; // while((bytes_read = fread(kmer, 1, num_bytes_per_bkmer, fh)) > 0) while((bytes_read = fread_buf(fh, kmer, num_bytes_per_bkmer, buffer)) > 0) { if(bytes_read != num_bytes_per_bkmer) { report_error("unusual extra bytes [%i] at the end of the file\n", (int)bytes_read); break; } num_bytes_read += bytes_read; my_fread(fh, covgs, sizeof(uint32_t) * num_of_colours, "kmer covg"); my_fread(fh, edges, sizeof(uint8_t) * num_of_colours, "kmer edges"); if(version >= 7) { uint8_t *shades = shade_data, *shends = shend_data; for(i = 0; i < num_of_colours; i++) { my_fread(fh, shades, sizeof(uint8_t) * shade_bytes, "shades"); my_fread(fh, shends, sizeof(uint8_t) * shade_bytes, "shade ends"); shades += shade_bytes; shends += shade_bytes; } } // // Kmer checks // // Check top bits of kmer if(kmer[0] & top_word_mask) { if(num_of_oversized_kmers == 0) { report_error("oversized kmer [index: %lu]\n", num_of_kmers_read); for(i = 0; i < num_of_bitfields; i++) { fprintf(stderr, " word %i: ", i); print_binary(stderr, kmer[i]); fprintf(stderr, "\n"); } } num_of_oversized_kmers++; } // Check for all-zeros (i.e. all As kmer: AAAAAA) uint64_t kmer_words_or = 0; for(i = 0; i < num_of_bitfields; i++) kmer_words_or |= kmer[i]; if(kmer_words_or == 0) { if(num_of_all_zero_kmers == 1) { report_error("more than one all 'A's kmers seen [index: %lu]\n", num_of_kmers_read); } num_of_all_zero_kmers++; } // Check covg is 0 for all colours for(i = 0; i < num_of_colours && covgs[i] == 0; i++); if(i == num_of_colours) { if(num_of_zero_covg_kmers == 0) { report_warning("a kmer has zero coverage in all colours [index: %lu]\n", num_of_kmers_read); } num_of_zero_covg_kmers++; } // Print? if(print_kmers) { binary_kmer_to_seq(kmer, seq, kmer_size, num_of_bitfields); printf("%s", seq); // Print coverages for(i = 0; i < num_of_colours; i++) printf(" %li", (unsigned long)covgs[i]); // Print edges for(i = 0; i < num_of_colours; i++) printf(" %s", get_edges_str(edges[i], kmer_colour_edge_str)); if(version >= 7 && num_of_shades > 0) { for(i = 0; i < num_of_colours; i++) { putc(' ', stdout); print_colour_shades(shade_data + i*shade_bytes, shend_data + i*shade_bytes); } } putc('\n', stdout); } num_of_kmers_read++; for(i = 0; i < num_of_colours; i++) sum_of_covgs_read += covgs[i]; } if(num_of_kmers_read != expected_num_of_kmers) { report_error("Expected %lu kmers, read %lu\n", expected_num_of_kmers, num_of_kmers_read); } if(print_kmers && print_info) printf("----\n"); // check for various reading errors if(errno != 0) { report_error("errno set [%i]\n", (int)errno); } int err; if((err = ferror(fh)) != 0) { report_error("occurred after file reading [%i]\n", err); } // For testing output //num_of_bitfields = 2; //num_of_kmers_read = 3600000000; //num_of_kmers_read = 12345; //num_of_kmers_read = 3581787; //num_of_kmers_read = 0; print_kmer_stats(); fclose(fh); free(kmer); free(covgs); free(edges); free(shade_data); free(shend_data); buffer_free(buffer); if((print_kmers || parse_kmers) && print_info) { printf("----\n"); if(num_warnings > 0 || num_errors > 0) printf("Warnings: %u; Errors: %u\n", num_warnings, num_errors); if(num_errors == 0) printf(num_warnings ? "Binary may be ok\n" : "Binary is valid\n"); } exit(EXIT_SUCCESS); }
static void print_kmer_stats() { char num_str[50]; if(num_of_all_zero_kmers > 1) { report_error("%s all-zero-kmers seen\n", ulong_to_str(num_of_all_zero_kmers, num_str)); } if(num_of_oversized_kmers > 0) { report_error("%s oversized kmers seen\n", ulong_to_str(num_of_oversized_kmers, num_str)); } if(num_of_zero_covg_kmers > 0) { report_warning("%s kmers have no coverage in any colour\n", ulong_to_str(num_of_zero_covg_kmers, num_str)); } if((print_kmers || parse_kmers) && print_info) { printf("kmers read: %s\n", ulong_to_str(num_of_kmers_read, num_str)); printf("covgs read: %s\n", ulong_to_str(sum_of_covgs_read, num_str)); printf("seq loaded: %s\n", ulong_to_str(sum_of_seq_loaded, num_str)); } if(print_info) { // Memory calculations // use expected number of kmers if we haven't read the whole file unsigned long kmer_count = (print_kmers || parse_kmers ? num_of_kmers_read : expected_num_of_kmers); // Number of hash table entries is 2^mem_height * mem_width // Aim for 80% occupancy once loaded float extra_space = 10.0/8; unsigned long hash_capacity = extra_space * kmer_count; // mem_width must be within these boundaries unsigned int min_mem_width = 5; unsigned int max_mem_width = 50; unsigned int min_mem_height = 12; // min mem usage = 2^12 * 5 = 20,480 entries = 320.0 KB with k=31,cols=1 unsigned long mem_height = min_mem_height; unsigned long mem_width = max_mem_width; unsigned long hash_entries = (0x1UL << mem_height) * mem_width; if(hash_capacity > hash_entries) { // Resize mem_height = Log2((double)hash_capacity / (max_mem_width-1))+0.99; mem_height = MIN2(mem_height, 32); mem_height = MAX2(mem_height, min_mem_height); mem_width = hash_capacity / (0x1UL << mem_height) + 1; printf("mem_width: %lu; mem_height: %lu;\n", mem_width, mem_height); if(mem_width < min_mem_width) { // re-calculate mem_height mem_height = Log2((double)hash_capacity / min_mem_width)+0.99; mem_height = MIN2(mem_height, 32); mem_height = MAX2(mem_height, min_mem_height); mem_width = hash_capacity / (0x1UL << mem_height) + 1; mem_width = MAX2(mem_width, min_mem_width); } hash_entries = (0x1UL << mem_height) * mem_width; } char min_mem_required[50]; char rec_mem_required[50]; set_memory_required_str(kmer_count, min_mem_required); set_memory_required_str(hash_entries, rec_mem_required); printf("Memory required: %s\n", min_mem_required); printf("Memory suggested: --mem_width %lu --mem_height %lu\n", mem_width, mem_height); char hash_entries_numstr[50]; ulong_to_str(hash_entries, hash_entries_numstr); printf(" [%s entries; %s memory]\n", hash_entries_numstr, rec_mem_required); } }