static void dissect_ap1394(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) { proto_item *ti; proto_tree *fh_tree = NULL; const guint8 *src_addr, *dst_addr; guint16 etype; tvbuff_t *next_tvb; col_set_str(pinfo->cinfo, COL_PROTOCOL, "IP/IEEE1394"); col_clear(pinfo->cinfo, COL_INFO); src_addr=tvb_get_ptr(tvb, 8, 8); SET_ADDRESS(&pinfo->dl_src, AT_EUI64, 8, src_addr); SET_ADDRESS(&pinfo->src, AT_EUI64, 8, src_addr); dst_addr=tvb_get_ptr(tvb, 0, 8); SET_ADDRESS(&pinfo->dl_dst, AT_EUI64, 8, dst_addr); SET_ADDRESS(&pinfo->dst, AT_EUI64, 8, dst_addr); if (tree) { ti = proto_tree_add_protocol_format(tree, proto_ap1394, tvb, 0, 18, "Apple IP-over-IEEE 1394, Src: %s, Dst: %s", bytes_to_str(src_addr, 8), bytes_to_str(dst_addr, 8)); fh_tree = proto_item_add_subtree(ti, ett_ap1394); proto_tree_add_bytes(fh_tree, hf_ap1394_dst, tvb, 0, 8, dst_addr); proto_tree_add_bytes(fh_tree, hf_ap1394_src, tvb, 8, 8, src_addr); } etype = tvb_get_ntohs(tvb, 16); if (tree) proto_tree_add_uint(fh_tree, hf_ap1394_type, tvb, 16, 2, etype); next_tvb = tvb_new_subset_remaining(tvb, 18); if (!dissector_try_uint(ethertype_subdissector_table, etype, next_tvb, pinfo, tree)) call_dissector(data_handle, next_tvb, pinfo, tree); }
void progress_unknown(long sofar, long bps) { if(PROGRESS_CAN_SHOW()){ static int i = 0; char buf[2][64]; fprintf(stderr, "\r%s %s %c...", bytes_to_str(buf[0], sizeof buf[0], sofar), bytes_to_str(buf[1], sizeof buf[1], bps), "/-\\|"[i++ % 4]); } }
static const gchar * atmarpnum_to_str(const guint8 *ad, int ad_tl) { int ad_len = ad_tl & ATMARP_LEN_MASK; gchar *cur; if (ad_len == 0) return "<No address>"; if (ad_tl & ATMARP_IS_E164) { /* * I'm assuming this means it's an ASCII (IA5) string. */ cur = ep_alloc(MAX_E164_STR_LEN+3+1); if (ad_len > MAX_E164_STR_LEN) { /* Can't show it all. */ memcpy(cur, ad, MAX_E164_STR_LEN); g_snprintf(&cur[MAX_E164_STR_LEN], 3+1, "..."); } else { memcpy(cur, ad, ad_len); cur[ad_len + 1] = '\0'; } return cur; } else { /* * NSAP. * * XXX - break down into subcomponents. */ return bytes_to_str(ad, ad_len); } }
static gchar * aarpproaddr_to_str(const guint8 *ad, int ad_len, guint16 type) { if (AARP_PRO_IS_ATALK(type, ad_len)) { /* Appletalk address. */ return atalkid_to_str(ad); } return bytes_to_str(ad, ad_len); }
static gchar * aarphrdaddr_to_str(const guint8 *ad, int ad_len, guint16 type) { if (AARP_HW_IS_ETHER(type, ad_len)) { /* Ethernet address (or Token Ring address, which is the same type of address). */ return ether_to_str(ad); } return bytes_to_str(ad, ad_len); }
// str must be at least 32 bytes long // max lenth: strlen '18,446,744,073,709,551,615.0 GB' + 1 = 32 bytes static void set_memory_required_str(unsigned long num_of_hash_entries, char* str) { // Size of each entry is rounded up to nearest 8 bytes unsigned long num_of_bytes = num_of_hash_entries * round_up_ulong(8*num_of_bitfields + 5*num_of_colours + 1, 8); bytes_to_str(num_of_bytes, 1, str); }
static const gchar * arpproaddr_to_str(const guint8 *ad, int ad_len, guint16 type) { if (ad_len == 0) return "<No address>"; if (ARP_PRO_IS_IPv4(type, ad_len)) { /* IPv4 address. */ return ip_to_str(ad); } return bytes_to_str(ad, ad_len); }
void gpath_store_print_stats(const GPathStore *gpstore) { gpath_set_print_stats(&gpstore->gpset); char kmers_str[50], paths_str[50], bytes_str[50]; ulong_to_str(gpstore->num_kmers_with_paths, kmers_str); ulong_to_str(gpstore->num_paths, paths_str); bytes_to_str(gpstore->path_bytes, 1, bytes_str); status("[GPathStore] kmers-with-paths: %s, num paths: %s, path-bytes: %s", kmers_str, paths_str, bytes_str); }
QString LBMStreamEntry::formatEndpoint(const packet_info * pinfo, const lbm_uim_stream_endpoint_t * endpoint) { if (endpoint->type == lbm_uim_instance_stream) { return QString(bytes_to_str(pinfo->pool, endpoint->stream_info.ctxinst.ctxinst, sizeof(endpoint->stream_info.ctxinst.ctxinst))); } else { return QString("%1:%2:%3") .arg(endpoint->stream_info.dest.domain) .arg(address_to_str(pinfo->pool, &(endpoint->stream_info.dest.addr))) .arg(endpoint->stream_info.dest.port); } }
void hash_table_alloc(HashTable *ht, uint64_t req_capacity) { uint64_t num_of_buckets, capacity; uint8_t bucket_size; capacity = hash_table_cap(req_capacity, &num_of_buckets, &bucket_size); uint_fast32_t hash_mask = (uint_fast32_t)(num_of_buckets - 1); size_t mem = capacity * sizeof(BinaryKmer) + num_of_buckets * sizeof(uint8_t[2]); char num_bkts_str[100], bkt_size_str[100], cap_str[100], mem_str[100]; ulong_to_str(num_of_buckets, num_bkts_str); ulong_to_str(bucket_size, bkt_size_str); ulong_to_str(capacity, cap_str); bytes_to_str(mem, 1, mem_str); status("[hasht] Allocating table with %s entries, using %s", cap_str, mem_str); status("[hasht] number of buckets: %s, bucket size: %s", num_bkts_str, bkt_size_str); // calloc is required for bucket_data to set the first element of each bucket // to the 0th pos BinaryKmer *table = ctx_malloc(capacity * sizeof(BinaryKmer)); uint8_t (*const buckets)[2] = ctx_calloc(num_of_buckets, sizeof(uint8_t[2])); size_t i; for(i = 0; i < capacity; i++) table[i] = unset_bkmer; HashTable data = { .table = table, .num_of_buckets = num_of_buckets, .hash_mask = hash_mask, .bucket_size = bucket_size, .capacity = capacity, .buckets = buckets, .num_kmers = 0, .collisions = {0}, .seed = rand()}; memcpy(ht, &data, sizeof(data)); } void hash_table_dealloc(HashTable *hash_table) { ctx_free(hash_table->table); ctx_free(hash_table->buckets); }
void hash_table_print_stats_brief(const HashTable *const ht) { size_t nbytes, nkeybits; double occupancy = (100.0 * ht->num_kmers) / ht->capacity; nbytes = ht->capacity * sizeof(BinaryKmer) + ht->num_of_buckets * sizeof(uint8_t[2]); nkeybits = (size_t)__builtin_ctzl(ht->num_of_buckets); char mem_str[50], num_buckets_str[100], num_entries_str[100], capacity_str[100]; ulong_to_str(ht->num_of_buckets, num_buckets_str); bytes_to_str(nbytes, 1, mem_str); ulong_to_str(ht->capacity, capacity_str); ulong_to_str(ht->num_kmers, num_entries_str); status("[hash] buckets: %s [2^%zu]; bucket size: %zu; " "memory: %s; occupancy: %s / %s (%.2f%%)\n", num_buckets_str, nkeybits, (size_t)ht->bucket_size, mem_str, num_entries_str, capacity_str, occupancy); }
static const gchar * atmarpsubaddr_to_str(const guint8 *ad, int ad_tl) { int ad_len = ad_tl & ATMARP_LEN_MASK; if (ad_len == 0) return "<No address>"; /* * E.164 isn't considered legal in subaddresses (RFC 2225 says that * a null or unknown ATM address is indicated by setting the length * to 0, in which case the type must be ignored; we've seen some * captures in which the length of a subaddress is 0 and the type * is E.164). * * XXX - break down into subcomponents? */ return bytes_to_str(ad, ad_len); }
QString UatDialog::fieldString(guint row, guint column) { QString string_rep; if (!uat_) return string_rep; void *rec = UAT_INDEX_PTR(uat_, row); uat_field_t *field = &uat_->fields[column]; guint length; char *str; field->cb.tostr(rec, &str, &length, field->cbdata.tostr, field->fld_data); switch(field->mode) { case PT_TXTMOD_NONE: case PT_TXTMOD_STRING: case PT_TXTMOD_ENUM: case PT_TXTMOD_FILENAME: case PT_TXTMOD_DIRECTORYNAME: string_rep = str; break; case PT_TXTMOD_HEXBYTES: { { char* temp_str = bytes_to_str(NULL, (const guint8 *) str, length); QString qstr(temp_str); string_rep = qstr; wmem_free(NULL, temp_str); } break; } default: g_assert_not_reached(); break; } g_free(str); return string_rep; }
static void test_util_bytes_to_str() { test_status("Testing bytes_to_str()"); char str[100]; // Excess decimal points are trimmed off // 14.0MB -> 14MB TASSERT2(strcmp(bytes_to_str(14688256,1,str),"14MB") == 0, "Got: %s", str); // 1.9GB -> 1.9GB TASSERT2(strcmp(bytes_to_str(2040110000,1,str),"1.9GB") == 0, "Got: %s", str); // 1.99GB -> 2GB TASSERT2(strcmp(bytes_to_str(2140110000,1,str),"2GB") == 0, "Got: %s", str); // 1500KB -> 1.4MB TASSERT2(strcmp(bytes_to_str(1500000,1,str),"1.4MB") == 0, "Got: %s", str); // 0.5GB -> 512MB TASSERT2(strcmp(bytes_to_str(536900000,1,str),"512MB") == 0, "Got: %s", str); // 1 -> 1B TASSERT2(strcmp(bytes_to_str(1,1,str),"1B") == 0, "Got: %s", str); // 1023 -> 1023B TASSERT2(strcmp(bytes_to_str(1023,1,str),"1,023B") == 0, "Got: %s", str); }
int ctx_bubbles(int argc, char **argv) { size_t nthreads = 0; struct MemArgs memargs = MEM_ARGS_INIT; const char *out_path = NULL; size_t max_allele_len = 0, max_flank_len = 0; bool remove_serial_bubbles = true; // List of haploid colours size_t *hapcols = NULL; int nhapcols = 0; char *hapcols_arg = NULL; GPathReader tmp_gpfile; GPathFileBuffer gpfiles; gpfile_buf_alloc(&gpfiles, 8); // Arg parsing char cmd[100]; char shortopts[300]; cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts)); int c; // silence error messages from getopt_long // opterr = 0; while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) { cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd)); switch(c) { case 0: /* flag set */ break; case 'h': cmd_print_usage(NULL); break; case 'o': cmd_check(!out_path, cmd); out_path = optarg; break; case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break; case 'p': memset(&tmp_gpfile, 0, sizeof(GPathReader)); gpath_reader_open(&tmp_gpfile, optarg); gpfile_buf_push(&gpfiles, &tmp_gpfile, 1); break; case 't': cmd_check(!nthreads, cmd); nthreads = cmd_uint32_nonzero(cmd, optarg); break; case 'm': cmd_mem_args_set_memory(&memargs, optarg); break; case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break; case 'H': cmd_check(!hapcols_arg, cmd); hapcols_arg = optarg; break; case 'A': cmd_check(!max_allele_len, cmd); max_allele_len = cmd_uint32_nonzero(cmd, optarg); break; case 'F': cmd_check(!max_flank_len, cmd); max_flank_len = cmd_uint32_nonzero(cmd, optarg); break; case 'S': cmd_check(remove_serial_bubbles,cmd); remove_serial_bubbles = false; break; case ':': /* BADARG */ case '?': /* BADCH getopt_long has already printed error */ // cmd_print_usage(NULL); die("`"CMD" "SUBCMD" -h` for help. Bad option: %s", argv[optind-1]); default: abort(); } } // Defaults for unset values if(out_path == NULL) out_path = "-"; if(nthreads == 0) nthreads = DEFAULT_NTHREADS; if(max_allele_len == 0) max_allele_len = DEFAULT_MAX_ALLELE; if(max_flank_len == 0) max_flank_len = DEFAULT_MAX_FLANK; if(optind >= argc) cmd_print_usage("Require input graph files (.ctx)"); // // Open graph files // const size_t num_gfiles = argc - optind; char **graph_paths = argv + optind; ctx_assert(num_gfiles > 0); GraphFileReader *gfiles = ctx_calloc(num_gfiles, sizeof(GraphFileReader)); size_t i, ncols, ctx_max_kmers = 0, ctx_sum_kmers = 0; ncols = graph_files_open(graph_paths, gfiles, num_gfiles, &ctx_max_kmers, &ctx_sum_kmers); // Check graph + paths are compatible graphs_gpaths_compatible(gfiles, num_gfiles, gpfiles.b, gpfiles.len, -1); // // Check haploid colours are valid // if(hapcols_arg != NULL) { if((nhapcols = range_get_num(hapcols_arg, ncols)) < 0) die("Invalid haploid colour list: %s", hapcols_arg); hapcols = ctx_calloc(nhapcols, sizeof(hapcols[0])); if(range_parse_array(hapcols_arg, hapcols, ncols) < 0) die("Invalid haploid colour list: %s", hapcols_arg); } // // Decide on memory // size_t bits_per_kmer, kmers_in_hash, graph_mem, path_mem, thread_mem; char thread_mem_str[100]; // edges(1bytes) + kmer_paths(8bytes) + in_colour(1bit/col) + // visitedfw/rv(2bits/thread) bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(Edges)*8 + (gpfiles.len > 0 ? sizeof(GPath*)*8 : 0) + ncols + 2*nthreads; kmers_in_hash = cmd_get_kmers_in_hash(memargs.mem_to_use, memargs.mem_to_use_set, memargs.num_kmers, memargs.num_kmers_set, bits_per_kmer, ctx_max_kmers, ctx_sum_kmers, false, &graph_mem); // Thread memory thread_mem = roundup_bits2bytes(kmers_in_hash) * 2; bytes_to_str(thread_mem * nthreads, 1, thread_mem_str); status("[memory] (of which threads: %zu x %zu = %s)\n", nthreads, thread_mem, thread_mem_str); // Paths memory size_t rem_mem = memargs.mem_to_use - MIN2(memargs.mem_to_use, graph_mem+thread_mem); path_mem = gpath_reader_mem_req(gpfiles.b, gpfiles.len, ncols, rem_mem, false, kmers_in_hash, false); // Shift path store memory from graphs->paths graph_mem -= sizeof(GPath*)*kmers_in_hash; path_mem += sizeof(GPath*)*kmers_in_hash; cmd_print_mem(path_mem, "paths"); size_t total_mem = graph_mem + thread_mem + path_mem; cmd_check_mem_limit(memargs.mem_to_use, total_mem); // // Open output file // gzFile gzout = futil_gzopen_create(out_path, "w"); // Allocate memory dBGraph db_graph; db_graph_alloc(&db_graph, gfiles[0].hdr.kmer_size, ncols, 1, kmers_in_hash, DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL); // Paths gpath_reader_alloc_gpstore(gpfiles.b, gpfiles.len, path_mem, false, &db_graph); // // Load graphs // GraphLoadingPrefs gprefs = graph_loading_prefs(&db_graph); gprefs.empty_colours = true; for(i = 0; i < num_gfiles; i++) { graph_load(&gfiles[i], gprefs, NULL); graph_file_close(&gfiles[i]); gprefs.empty_colours = false; } ctx_free(gfiles); hash_table_print_stats(&db_graph.ht); // Load link files for(i = 0; i < gpfiles.len; i++) gpath_reader_load(&gpfiles.b[i], GPATH_DIE_MISSING_KMERS, &db_graph); // Create array of cJSON** from input files cJSON **hdrs = ctx_malloc(gpfiles.len * sizeof(cJSON*)); for(i = 0; i < gpfiles.len; i++) hdrs[i] = gpfiles.b[i].json; // Now call variants BubbleCallingPrefs call_prefs = {.max_allele_len = max_allele_len, .max_flank_len = max_flank_len, .haploid_cols = hapcols, .nhaploid_cols = nhapcols, .remove_serial_bubbles = remove_serial_bubbles}; invoke_bubble_caller(nthreads, &call_prefs, gzout, out_path, hdrs, gpfiles.len, &db_graph); status(" saved to: %s\n", out_path); gzclose(gzout); ctx_free(hdrs); // Close input link files for(i = 0; i < gpfiles.len; i++) gpath_reader_close(&gpfiles.b[i]); gpfile_buf_dealloc(&gpfiles); ctx_free(hapcols); db_graph_dealloc(&db_graph); return EXIT_SUCCESS; }
int ctx_rmsubstr(int argc, char **argv) { struct MemArgs memargs = MEM_ARGS_INIT; size_t kmer_size = 0, nthreads = 0; const char *output_file = NULL; seq_format fmt = SEQ_FMT_FASTA; bool invert = false; // Arg parsing char cmd[100], shortopts[100]; cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts)); int c; while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) { cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd)); switch(c) { case 0: /* flag set */ break; case 'h': cmd_print_usage(NULL); break; case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break; case 'o': cmd_check(!output_file, cmd); output_file = optarg; break; case 't': cmd_check(!nthreads, cmd); nthreads = cmd_uint32_nonzero(cmd, optarg); break; case 'm': cmd_mem_args_set_memory(&memargs, optarg); break; case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break; case 'k': cmd_check(!kmer_size,cmd); kmer_size = cmd_uint32(cmd, optarg); break; case 'F': cmd_check(fmt==SEQ_FMT_FASTA, cmd); fmt = cmd_parse_format(cmd, optarg); break; case 'v': cmd_check(!invert,cmd); invert = true; break; case ':': /* BADARG */ case '?': /* BADCH getopt_long has already printed error */ // cmd_print_usage(NULL); cmd_print_usage("`"CMD" rmsubstr -h` for help. Bad option: %s", argv[optind-1]); default: abort(); } } // Defaults if(!nthreads) nthreads = DEFAULT_NTHREADS; if(!kmer_size) kmer_size = DEFAULT_KMER; if(!(kmer_size&1)) cmd_print_usage("Kmer size must be odd"); if(kmer_size < MIN_KMER_SIZE) cmd_print_usage("Kmer size too small (recompile)"); if(kmer_size > MAX_KMER_SIZE) cmd_print_usage("Kmer size too large (recompile?)"); if(optind >= argc) cmd_print_usage("Please specify at least one input sequence file (.fq, .fq etc.)"); size_t i, num_seq_files = argc - optind; char **seq_paths = argv + optind; seq_file_t **seq_files = ctx_calloc(num_seq_files, sizeof(seq_file_t*)); for(i = 0; i < num_seq_files; i++) if((seq_files[i] = seq_open(seq_paths[i])) == NULL) die("Cannot read sequence file %s", seq_paths[i]); // Estimate number of bases // set to -1 if we cannot calc int64_t est_num_bases = seq_est_seq_bases(seq_files, num_seq_files); if(est_num_bases < 0) { warn("Cannot get file sizes, using pipes"); est_num_bases = memargs.num_kmers * IDEAL_OCCUPANCY; } status("[memory] Estimated number of bases: %li", (long)est_num_bases); // Use file sizes to decide on memory // // Decide on memory // size_t bits_per_kmer, kmers_in_hash, graph_mem; size_t mem_to_use = memargs.mem_to_use; bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(KONodeList) + sizeof(KOccur); // see kmer_occur.h if(mem_to_use < (size_t)est_num_bases) { warn("You probably need at least %zu bytes (> %zu)", (size_t)est_num_bases, memargs.mem_to_use); } else { mem_to_use -= est_num_bases; } kmers_in_hash = cmd_get_kmers_in_hash(mem_to_use, memargs.mem_to_use_set, memargs.num_kmers, memargs.num_kmers_set, bits_per_kmer, 0, est_num_bases, true, &graph_mem); // 1 byte per kmer for each base to load sequence files size_t total_mem = kmers_in_hash*bits_per_kmer/8 + est_num_bases; char memstr[50]; bytes_to_str(total_mem, 1, memstr); status("[memory] total mem with input: %s\n", memstr); cmd_check_mem_limit(memargs.mem_to_use, total_mem); // // Open output file // if(output_file == NULL) output_file = "-"; FILE *fout = futil_fopen_create(output_file, "w"); // // Set up memory // dBGraph db_graph; db_graph_alloc(&db_graph, kmer_size, 1, 0, kmers_in_hash, DBG_ALLOC_BKTLOCKS); // // Load reference sequence into a read buffer // ReadBuffer rbuf; read_buf_alloc(&rbuf, 1024); seq_load_all_reads(seq_files, num_seq_files, &rbuf); // Check for reads too short for(i = 0; i < rbuf.len && rbuf.b[i].seq.end >= kmer_size; i++) {} if(i < rbuf.len) warn("Reads shorter than kmer size (%zu) will not be filtered", kmer_size); KOGraph kograph = kograph_create(rbuf.b, rbuf.len, true, 0, nthreads, &db_graph); size_t num_reads = rbuf.len, num_reads_printed = 0, num_bad_reads = 0; // Loop over reads printing those that are not substrings int ret; for(i = 0; i < rbuf.len; i++) { ret = _is_substr(&rbuf, i, &kograph, &db_graph); if(ret == -1) num_bad_reads++; else if((ret && invert) || (!ret && !invert)) { seqout_print_read(&rbuf.b[i], fmt, fout); num_reads_printed++; } } char num_reads_str[100], num_reads_printed_str[100], num_bad_reads_str[100]; ulong_to_str(num_reads, num_reads_str); ulong_to_str(num_reads_printed, num_reads_printed_str); ulong_to_str(num_bad_reads, num_bad_reads_str); status("Printed %s / %s (%.1f%%) to %s", num_reads_printed_str, num_reads_str, !num_reads ? 0.0 : (100.0 * num_reads_printed) / num_reads, futil_outpath_str(output_file)); if(num_bad_reads > 0) { status("Bad reads: %s / %s (%.1f%%) - no kmer {ACGT} of length %zu", num_bad_reads_str, num_reads_str, (100.0 * num_bad_reads) / num_reads, kmer_size); } fclose(fout); kograph_dealloc(&kograph); // Free sequence memory for(i = 0; i < rbuf.len; i++) seq_read_dealloc(&rbuf.b[i]); read_buf_dealloc(&rbuf); ctx_free(seq_files); db_graph_dealloc(&db_graph); return EXIT_SUCCESS; }
inline void windows_named_sync::open_or_create ( create_enum_t creation_type , const char *name , const permissions &perm , windows_named_sync_interface &sync_interface) { std::string aux_str(name); m_file_hnd = winapi::invalid_handle_value; //Use a file to emulate POSIX lifetime semantics. After this logic //we'll obtain the ID of the native handle to open in aux_str { create_shared_dir_cleaning_old_and_get_filepath(name, aux_str); //Create a file with required permissions. m_file_hnd = winapi::create_file ( aux_str.c_str() , winapi::generic_read | winapi::generic_write , creation_type == DoOpen ? winapi::open_existing : (creation_type == DoCreate ? winapi::create_new : winapi::open_always) , 0 , (winapi::interprocess_security_attributes*)perm.get_permissions()); //Obtain OS error in case something has failed error_info err; bool success = false; if(m_file_hnd != winapi::invalid_handle_value){ //Now lock the file const std::size_t buflen = sync_interface.get_data_size(); typedef __int64 unique_id_type; const std::size_t sizeof_file_info = sizeof(unique_id_type) + buflen; winapi::interprocess_overlapped overlapped; if(winapi::lock_file_ex (m_file_hnd, winapi::lockfile_exclusive_lock, 0, sizeof_file_info, 0, &overlapped)){ __int64 filesize = 0; //Obtain the unique id to open the native semaphore. //If file size was created if(winapi::get_file_size(m_file_hnd, filesize)){ unsigned long written_or_read = 0; unique_id_type unique_id_val; if(static_cast<std::size_t>(filesize) != sizeof_file_info){ winapi::set_end_of_file(m_file_hnd); winapi::query_performance_counter(&unique_id_val); const void *buf = sync_interface.buffer_with_init_data_to_file(); //Write unique ID in file. This ID will be used to calculate the semaphore name if(winapi::write_file(m_file_hnd, &unique_id_val, sizeof(unique_id_val), &written_or_read, 0) && written_or_read == sizeof(unique_id_val) && winapi::write_file(m_file_hnd, buf, buflen, &written_or_read, 0) && written_or_read == buflen ){ success = true; } winapi::get_file_size(m_file_hnd, filesize); BOOST_ASSERT(std::size_t(filesize) == sizeof_file_info); } else{ void *buf = sync_interface.buffer_to_store_init_data_from_file(); if(winapi::read_file(m_file_hnd, &unique_id_val, sizeof(unique_id_val), &written_or_read, 0) && written_or_read == sizeof(unique_id_val) && winapi::read_file(m_file_hnd, buf, buflen, &written_or_read, 0) && written_or_read == buflen ){ success = true; } } if(success){ //Now create a global semaphore name based on the unique id char unique_id_name[sizeof(unique_id_val)*2+1]; std::size_t name_suffix_length = sizeof(unique_id_name); bytes_to_str(&unique_id_val, sizeof(unique_id_val), &unique_id_name[0], name_suffix_length); success = sync_interface.open(creation_type, unique_id_name); } } //Obtain OS error in case something has failed err = system_error_code(); //If this fails we have no possible rollback so don't check the return if(!winapi::unlock_file_ex(m_file_hnd, 0, sizeof_file_info, 0, &overlapped)){ err = system_error_code(); } } else{ //Obtain OS error in case something has failed err = system_error_code(); } } else{ err = system_error_code(); } if(!success){ if(m_file_hnd != winapi::invalid_handle_value){ winapi::close_handle(m_file_hnd); m_file_hnd = winapi::invalid_handle_value; } //Throw as something went wrong throw interprocess_exception(err); } } }
int ctx_thread(int argc, char **argv) { struct ReadThreadCmdArgs args; read_thread_args_alloc(&args); read_thread_args_parse(&args, argc, argv, longopts, false); GraphFileReader *gfile = &args.gfile; GPathFileBuffer *gpfiles = &args.gpfiles; CorrectAlnInputBuffer *inputs = &args.inputs; size_t i; if(args.zero_link_counts && gpfiles->len == 0) cmd_print_usage("-0,--zero-paths without -p,--paths <in.ctp> has no meaning"); // Check each path file only loads one colour gpaths_only_for_colour(gpfiles->b, gpfiles->len, 0); // // Decide on memory // size_t bits_per_kmer, kmers_in_hash, graph_mem, total_mem; size_t path_hash_mem, path_store_mem, path_mem; bool sep_path_list = (!args.use_new_paths && gpfiles->len > 0); bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(Edges)*8 + sizeof(GPath*)*8 + 2 * args.nthreads; // Have traversed // false -> don't use mem_to_use to decide how many kmers to store in hash // since we need some of that memory for storing paths kmers_in_hash = cmd_get_kmers_in_hash(args.memargs.mem_to_use, args.memargs.mem_to_use_set, args.memargs.num_kmers, args.memargs.num_kmers_set, bits_per_kmer, gfile->num_of_kmers, gfile->num_of_kmers, false, &graph_mem); // Paths memory size_t min_path_mem = 0; gpath_reader_sum_mem(gpfiles->b, gpfiles->len, 1, true, true, &min_path_mem); if(graph_mem + min_path_mem > args.memargs.mem_to_use) { char buf[50]; die("Require at least %s memory", bytes_to_str(graph_mem+min_path_mem, 1, buf)); } path_mem = args.memargs.mem_to_use - graph_mem; size_t pentry_hash_mem = sizeof(GPEntry)/0.7; size_t pentry_store_mem = sizeof(GPath) + 8 + // struct + sequence 1 + // in colour sizeof(uint8_t) + // counts sizeof(uint32_t); // kmer length size_t max_paths = path_mem / (pentry_store_mem + pentry_hash_mem); path_store_mem = max_paths * pentry_store_mem; path_hash_mem = max_paths * pentry_hash_mem; cmd_print_mem(path_hash_mem, "paths hash"); cmd_print_mem(path_store_mem, "paths store"); total_mem = graph_mem + path_mem; cmd_check_mem_limit(args.memargs.mem_to_use, total_mem); // // Open output file // gzFile gzout = futil_gzopen_create(args.out_ctp_path, "w"); status("Creating paths file: %s", futil_outpath_str(args.out_ctp_path)); // // Allocate memory // dBGraph db_graph; size_t kmer_size = gfile->hdr.kmer_size; db_graph_alloc(&db_graph, kmer_size, 1, 1, kmers_in_hash, DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL); // Split path memory 2:1 between store and hash // Create a path store that tracks path counts gpath_store_alloc(&db_graph.gpstore, db_graph.num_of_cols, db_graph.ht.capacity, 0, path_store_mem, true, sep_path_list); // Create path hash table for fast lookup gpath_hash_alloc(&db_graph.gphash, &db_graph.gpstore, path_hash_mem); if(args.use_new_paths) { status("Using paths as they are added (risky)"); } else { status("Not using new paths as they are added (safe)"); } // // Start up workers to add paths to the graph // GenPathWorker *workers; workers = gen_paths_workers_alloc(args.nthreads, &db_graph); // Setup for loading graphs graph LoadingStats gstats; loading_stats_init(&gstats); // Path statistics LoadingStats *load_stats = gen_paths_get_stats(workers); CorrectAlnStats *aln_stats = gen_paths_get_aln_stats(workers); // Load contig hist distribution for(i = 0; i < gpfiles->len; i++) { gpath_reader_load_contig_hist(gpfiles->b[i].json, gpfiles->b[i].fltr.path.b, file_filter_fromcol(&gpfiles->b[i].fltr, 0), &aln_stats->contig_histgrm); } GraphLoadingPrefs gprefs = {.db_graph = &db_graph, .boolean_covgs = false, .must_exist_in_graph = false, .must_exist_in_edges = NULL, .empty_colours = false}; // already loaded paths // Load graph, print stats, close file graph_load(gfile, gprefs, &gstats); hash_table_print_stats_brief(&db_graph.ht); graph_file_close(gfile); // Load existing paths for(i = 0; i < gpfiles->len; i++) gpath_reader_load(&gpfiles->b[i], GPATH_DIE_MISSING_KMERS, &db_graph); // zero link counts of already loaded links if(args.zero_link_counts) { status("Zeroing link counts for loaded links"); gpath_set_zero_nseen(&db_graph.gpstore.gpset); } if(!args.use_new_paths) gpath_store_split_read_write(&db_graph.gpstore); // Deal with a set of files at once // Can have different numbers of inputs vs threads size_t start, end; for(start = 0; start < inputs->len; start += MAX_IO_THREADS) { end = MIN2(inputs->len, start+MAX_IO_THREADS); generate_paths(inputs->b+start, end-start, workers, args.nthreads); } // Print memory statistics gpath_hash_print_stats(&db_graph.gphash); gpath_store_print_stats(&db_graph.gpstore); correct_aln_dump_stats(aln_stats, load_stats, args.dump_seq_sizes, args.dump_frag_sizes, db_graph.ht.num_kmers); // Don't need GPathHash anymore gpath_hash_dealloc(&db_graph.gphash); cJSON **hdrs = ctx_malloc(gpfiles->len * sizeof(cJSON*)); for(i = 0; i < gpfiles->len; i++) hdrs[i] = gpfiles->b[i].json; size_t output_threads = MIN2(args.nthreads, MAX_IO_THREADS); // Generate a cJSON header for all inputs cJSON *thread_hdr = cJSON_CreateObject(); cJSON *inputs_hdr = cJSON_CreateArray(); cJSON_AddItemToObject(thread_hdr, "inputs", inputs_hdr); for(i = 0; i < inputs->len; i++) cJSON_AddItemToArray(inputs_hdr, correct_aln_input_json_hdr(&inputs->b[i])); // Write output file gpath_save(gzout, args.out_ctp_path, output_threads, true, "thread", thread_hdr, hdrs, gpfiles->len, &aln_stats->contig_histgrm, 1, &db_graph); gzclose(gzout); ctx_free(hdrs); // Optionally run path checks for debugging // gpath_checks_all_paths(&db_graph, args.nthreads); // ins_gap, err_gap no longer allocated after this line gen_paths_workers_dealloc(workers, args.nthreads); // Close and free input files etc. read_thread_args_dealloc(&args); db_graph_dealloc(&db_graph); return EXIT_SUCCESS; }
/* This function is only called from the IGMP dissector */ int dissect_igap(tvbuff_t *tvb, packet_info *pinfo, proto_tree *parent_tree, int offset) { proto_tree *tree; proto_item *item; guint8 type, tsecs, subtype, asize, msize; guchar account[ACCOUNT_SIZE+1], message[MESSAGE_SIZE+1]; if (!proto_is_protocol_enabled(find_protocol_by_id(proto_igap))) { /* we are not enabled, skip entire packet to be nice to the igmp layer. (so clicking on IGMP will display the data) */ return offset + tvb_length_remaining(tvb, offset); } item = proto_tree_add_item(parent_tree, proto_igap, tvb, offset, -1, ENC_NA); tree = proto_item_add_subtree(item, ett_igap); col_set_str(pinfo->cinfo, COL_PROTOCOL, "IGAP"); col_clear(pinfo->cinfo, COL_INFO); type = tvb_get_guint8(tvb, offset); col_add_str(pinfo->cinfo, COL_INFO, val_to_str(type, igap_types, "Unknown Type: 0x%02x")); proto_tree_add_uint(tree, hf_type, tvb, offset, 1, type); offset += 1; tsecs = tvb_get_guint8(tvb, offset); proto_tree_add_uint_format_value(tree, hf_max_resp, tvb, offset, 1, tsecs, "%.1f sec (0x%02x)", tsecs * 0.1, tsecs); offset += 1; igmp_checksum(tree, tvb, hf_checksum, hf_checksum_bad, pinfo, 0); offset += 2; proto_tree_add_item(tree, hf_maddr, tvb, offset, 4, ENC_BIG_ENDIAN); offset += 4; proto_tree_add_uint(tree, hf_version, tvb, offset, 1, tvb_get_guint8(tvb, offset)); offset += 1; subtype = tvb_get_guint8(tvb, offset); proto_tree_add_uint(tree, hf_subtype, tvb, offset, 1, subtype); offset += 2; proto_tree_add_uint(tree, hf_challengeid, tvb, offset, 1, tvb_get_guint8(tvb, offset)); offset += 1; asize = tvb_get_guint8(tvb, offset); proto_tree_add_uint(tree, hf_asize, tvb, offset, 1, asize); offset += 1; msize = tvb_get_guint8(tvb, offset); proto_tree_add_uint(tree, hf_msize, tvb, offset, 1, msize); offset += 3; if (asize > 0) { if (asize > ACCOUNT_SIZE) { /* Bogus account size. XXX - flag this? */ asize = ACCOUNT_SIZE; } tvb_memcpy(tvb, account, offset, asize); account[asize] = '\0'; proto_tree_add_string(tree, hf_account, tvb, offset, asize, account); } offset += ACCOUNT_SIZE; if (msize > 0) { if (msize > MESSAGE_SIZE) { /* Bogus message size. XXX - flag this? */ msize = MESSAGE_SIZE; } tvb_memcpy(tvb, message, offset, msize); switch (subtype) { case IGAP_SUBTYPE_PASSWORD_JOIN: case IGAP_SUBTYPE_PASSWORD_LEAVE: /* Challenge field is user's password */ message[msize] = '\0'; proto_tree_add_text(tree, tvb, offset, msize, "User password: %s", message); break; case IGAP_SUBTYPE_CHALLENGE_RESPONSE_JOIN: case IGAP_SUBTYPE_CHALLENGE_RESPONSE_LEAVE: /* Challenge field is the results of MD5 calculation */ proto_tree_add_text(tree, tvb, offset, msize, "Result of MD5 calculation: 0x%s", bytes_to_str(message, msize)); break; case IGAP_SUBTYPE_CHALLENGE: /* Challenge field is the challenge value */ proto_tree_add_text(tree, tvb, offset, msize, "Challenge: 0x%s", bytes_to_str(message, msize)); break; case IGAP_SUBTYPE_AUTH_MESSAGE: /* Challenge field indicates the result of the authenticaion */ proto_tree_add_text(tree, tvb, offset, msize, "Authentication result: %s (0x%x)", val_to_str_const(message[0], igap_auth_result, "Unknown"), message[0]); break; case IGAP_SUBTYPE_ACCOUNTING_MESSAGE: /* Challenge field indicates the accounting status */ proto_tree_add_text(tree, tvb, offset, msize, "Accounting status: %s (0x%x)", val_to_str_const(message[0], igap_account_status, "Unknown"), message[0]); break; default: proto_tree_add_text(tree, tvb, offset, msize, "Message: (Unknown)"); } } offset += MESSAGE_SIZE; if (item) proto_item_set_len(item, offset); return offset; }
int ctx_sort(int argc, char **argv) { const char *out_path = NULL; struct MemArgs memargs = MEM_ARGS_INIT; // Arg parsing char cmd[100]; char shortopts[300]; cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts)); int c; // silence error messages from getopt_long // opterr = 0; while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) { cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd)); switch(c) { case 0: /* flag set */ break; case 'h': cmd_print_usage(NULL); break; case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break; case 'm': cmd_mem_args_set_memory(&memargs, optarg); break; case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break; case 'o': cmd_check(!out_path, cmd); out_path = optarg; break; case ':': /* BADARG */ case '?': /* BADCH getopt_long has already printed error */ // cmd_print_usage(NULL); die("`"CMD" sort -h` for help. Bad option: %s", argv[optind-1]); default: die("Bad option: [%c]: %s", c, cmd); } } if(optind+1 != argc) cmd_print_usage("Require exactly one input graph file (.ctx)"); const char *ctx_path = argv[optind]; // // Open Graph file // GraphFileReader gfile; memset(&gfile, 0, sizeof(GraphFileReader)); graph_file_open2(&gfile, ctx_path, out_path ? "r" : "r+", true, 0); if(!file_filter_is_direct(&gfile.fltr)) die("Cannot open graph file with a filter ('in.ctx:blah' syntax)"); size_t num_kmers, memory; // Reading from a stream if(gfile.num_of_kmers < 0) { if(!memargs.num_kmers_set) die("If reading from a stream, must give -n <num_kmers>"); num_kmers = memargs.num_kmers; } else num_kmers = gfile.num_of_kmers; // Open output path (if given) FILE *fout = out_path ? futil_fopen_create(out_path, "w") : NULL; size_t i; size_t ncols = gfile.hdr.num_of_cols; size_t kmer_mem = sizeof(BinaryKmer) + (sizeof(Edges)+sizeof(Covg))*ncols; memory = (sizeof(char*) + kmer_mem) * num_kmers; char mem_str[50]; bytes_to_str(memory, 1, mem_str); if(memory > memargs.mem_to_use) die("Require at least %s memory", mem_str); status("[memory] Total: %s", mem_str); char *mem = ctx_malloc(kmer_mem * num_kmers); char **kmers = ctx_malloc(num_kmers*sizeof(char*)); // Read in whole file // if(graph_file_fseek(gfile, gfile.hdr_size, SEEK_SET) != 0) die("fseek failed"); size_t nkread = gfr_fread_bytes(&gfile, mem, num_kmers*kmer_mem); if(nkread != num_kmers*kmer_mem) die("Could only read %zu bytes [<%zu]", nkread, num_kmers*kmer_mem); // check we are at the end of the file char tmpc; if(gfr_fread_bytes(&gfile, &tmpc, 1) != 0) { die("More kmers in file than believed (kmers: %zu ncols: %zu).", num_kmers, ncols); } status("Read %zu kmers with %zu colour%s", num_kmers, ncols, util_plural_str(ncols)); for(i = 0; i < num_kmers; i++) kmers[i] = mem + kmer_mem*i; sort_block(kmers, num_kmers); // Print if(out_path != NULL) { // saving to a different destination - write header graph_write_header(fout, &gfile.hdr); } else { // Directly manipulating gfile.fh here, using it to write later // Not doing any more reading if(fseek(gfile.fh, gfile.hdr_size, SEEK_SET) != 0) die("fseek failed"); fout = gfile.fh; } for(i = 0; i < num_kmers; i++) if(fwrite(kmers[i], 1, kmer_mem, fout) != kmer_mem) die("Cannot write to file"); if(out_path) fclose(fout); graph_file_close(&gfile); ctx_free(kmers); ctx_free(mem); return EXIT_SUCCESS; }
static gboolean lbm_uimflow_add_to_graph(seq_analysis_info_t * seq_info, packet_info * pinfo, const lbm_uim_stream_info_t * stream_info) { lbm_uim_stream_endpoint_t epa; lbm_uim_stream_endpoint_t epb; seq_analysis_item_t * item; gchar * ctxinst1 = NULL; gchar * ctxinst2 = NULL; gboolean swap_endpoints = FALSE; int rc; if (stream_info->endpoint_a.type != stream_info->endpoint_b.type) { return (FALSE); } if (stream_info->endpoint_a.type == lbm_uim_instance_stream) { rc = memcmp((void *)stream_info->endpoint_a.stream_info.ctxinst.ctxinst, (void *)stream_info->endpoint_b.stream_info.ctxinst.ctxinst, LBM_CONTEXT_INSTANCE_BLOCK_SZ); if (rc <= 0) { swap_endpoints = FALSE; } else { swap_endpoints = TRUE; } } else { if (stream_info->endpoint_a.stream_info.dest.domain < stream_info->endpoint_b.stream_info.dest.domain) { swap_endpoints = FALSE; } else if (stream_info->endpoint_a.stream_info.dest.domain > stream_info->endpoint_b.stream_info.dest.domain) { swap_endpoints = TRUE; } else { int compare; compare = CMP_ADDRESS(&(stream_info->endpoint_a.stream_info.dest.addr), &(stream_info->endpoint_b.stream_info.dest.addr)); if (compare < 0) { swap_endpoints = FALSE; } else if (compare > 0) { swap_endpoints = TRUE; } else { if (stream_info->endpoint_a.stream_info.dest.port <= stream_info->endpoint_b.stream_info.dest.port) { swap_endpoints = FALSE; } else { swap_endpoints = TRUE; } } } } if (swap_endpoints == FALSE) { epa = stream_info->endpoint_a; epb = stream_info->endpoint_b; } else { epb = stream_info->endpoint_a; epa = stream_info->endpoint_b; } item = (seq_analysis_item_t *)g_malloc(sizeof(seq_analysis_item_t)); COPY_ADDRESS(&(item->src_addr), &(pinfo->src)); COPY_ADDRESS(&(item->dst_addr), &(pinfo->dst)); item->fd = pinfo->fd; item->port_src = pinfo->srcport; item->port_dst = pinfo->destport; if (stream_info->description == NULL) { item->frame_label = g_strdup_printf("(%" G_GUINT32_FORMAT ")", stream_info->sqn); } else { item->frame_label = g_strdup_printf("%s (%" G_GUINT32_FORMAT ")", stream_info->description, stream_info->sqn); } if (epa.type == lbm_uim_instance_stream) { ctxinst1 = bytes_to_str(pinfo->pool, epa.stream_info.ctxinst.ctxinst, sizeof(epa.stream_info.ctxinst.ctxinst)); ctxinst2 = bytes_to_str(pinfo->pool, epb.stream_info.ctxinst.ctxinst, sizeof(epb.stream_info.ctxinst.ctxinst)); item->comment = g_strdup_printf("%s <-> %s [%" G_GUINT64_FORMAT "]", ctxinst1, ctxinst2, stream_info->channel); } else { item->comment = g_strdup_printf("%" G_GUINT32_FORMAT ":%s:%" G_GUINT16_FORMAT " <-> %" G_GUINT32_FORMAT ":%s:%" G_GUINT16_FORMAT " [%" G_GUINT64_FORMAT "]", epa.stream_info.dest.domain, address_to_str(pinfo->pool, &(epa.stream_info.dest.addr)), epa.stream_info.dest.port, epb.stream_info.dest.domain, address_to_str(pinfo->pool, &(epb.stream_info.dest.addr)), epb.stream_info.dest.port, stream_info->channel); } item->conv_num = (guint16)LBM_CHANNEL_ID(stream_info->channel); item->display = TRUE; item->line_style = 1; g_queue_push_tail(seq_info->items, item); return (TRUE); }
int ctx_index(int argc, char **argv) { const char *out_path = NULL; size_t block_size = 0, block_kmers = 0; // Arg parsing char cmd[100]; char shortopts[300]; cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts)); int c; // silence error messages from getopt_long // opterr = 0; while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) { cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd)); switch(c) { case 0: /* flag set */ break; case 'h': cmd_print_usage(NULL); break; case 'o': cmd_check(!out_path, cmd); out_path = optarg; break; case 'b': cmd_check(!block_kmers, cmd); block_kmers = cmd_size_nonzero(cmd, optarg); break; case 's': cmd_check(!block_size, cmd); block_size = cmd_size_nonzero(cmd, optarg); break; case ':': /* BADARG */ case '?': /* BADCH getopt_long has already printed error */ // cmd_print_usage(NULL); die("`"CMD" index -h` for help. Bad option: %s", argv[optind-1]); default: abort(); } } if(optind+1 != argc) cmd_print_usage("Require exactly one input graph file (.ctx)"); if(block_size && block_kmers) cmd_print_usage("Cannot use --block-kmers and --block-size together"); const char *ctx_path = argv[optind]; // // Open Graph file // GraphFileReader gfile; memset(&gfile, 0, sizeof(GraphFileReader)); graph_file_open2(&gfile, ctx_path, "r+", true, 0); if(!file_filter_is_direct(&gfile.fltr)) die("Cannot open graph file with a filter ('in.ctx:blah' syntax)"); // Open output file FILE *fout = out_path ? futil_fopen_create(out_path, "w") : stdout; // Start size_t filencols = gfile.hdr.num_of_cols; size_t kmer_size = gfile.hdr.kmer_size; const char *path = file_filter_path(&gfile.fltr); size_t ncols = file_filter_into_ncols(&gfile.fltr); size_t kmer_mem = sizeof(BinaryKmer) + (sizeof(Edges)+sizeof(Covg))*filencols; if(block_size) { block_kmers = block_size / kmer_mem; } else if(!block_size && !block_kmers) { block_size = 4 * ONE_MEGABYTE; block_kmers = block_size / kmer_mem; } // Update block-size block_size = block_kmers * kmer_mem; status("[index] block bytes: %zu kmers: %zu; kmer bytes: %zu, hdr: %zu", block_size, block_kmers, kmer_mem, (size_t)gfile.hdr_size); if(block_kmers == 0) die("Cannot set block_kmers to zero"); // Print header fputs("#block_start\tnext_block\tfirst_kmer\tkmer_idx\tnext_kmer_idx\n", fout); BinaryKmer bkmer = BINARY_KMER_ZERO_MACRO; BinaryKmer prev_bkmer = BINARY_KMER_ZERO_MACRO; Covg *covgs = ctx_malloc(ncols * sizeof(Covg)); Edges *edges = ctx_malloc(ncols * sizeof(Edges)); char bkmerstr[MAX_KMER_SIZE+1]; size_t rem_block = block_size - kmer_mem; // block after first kmer char *tmp_mem = ctx_malloc(rem_block); // Read in file, print index size_t nblocks = 0; size_t bl_bytes = 0, bl_kmers = 0; size_t bl_byte_offset = gfile.hdr_size, bl_kmer_offset = 0; while(1) { if(!graph_file_read(&gfile, &bkmer, covgs, edges)) { status("Read kmer failed"); break; } binary_kmer_to_str(bkmer, kmer_size, bkmerstr); if(nblocks > 0 && !binary_kmer_less_than(prev_bkmer,bkmer)) die("File is not sorted: %s [%s]", bkmerstr, path); // We've already read one kmer entry, read rest of block bl_bytes = kmer_mem + gfr_fread_bytes(&gfile, tmp_mem, rem_block); bl_kmers = 1 + bl_bytes / kmer_mem; fprintf(fout, "%zu\t%zu\t%s\t%zu\t%zu\n", bl_byte_offset, bl_byte_offset+bl_bytes, bkmerstr, bl_kmer_offset, bl_kmer_offset+bl_kmers); bl_byte_offset += bl_bytes; bl_kmer_offset += bl_kmers; nblocks++; if(bl_kmers < block_kmers) { status("last block %zu < %zu; %zu vs %zu", bl_kmers, block_kmers, bl_bytes, block_size); break; } prev_bkmer = bkmer; } ctx_free(covgs); ctx_free(edges); ctx_free(tmp_mem); // done char num_kmers_str[50], num_blocks_str[50]; char block_mem_str[50], block_kmers_str[50]; ulong_to_str(bl_kmer_offset, num_kmers_str); ulong_to_str(nblocks, num_blocks_str); bytes_to_str(block_size, 1, block_mem_str); ulong_to_str(block_kmers, block_kmers_str); status("Read %s kmers in %s block%s (block size %s / %s kmers)", num_kmers_str, num_blocks_str, util_plural_str(nblocks), block_mem_str, block_kmers_str); if(fout != stdout) status("Saved to %s", out_path); graph_file_close(&gfile); fclose(fout); return EXIT_SUCCESS; }
int main(int argc, char** argv) { char* filepath; if(argc < 2) { print_usage(); } else if(argc > 2) { print_info = 0; print_kmers = 0; parse_kmers = 0; int i; for(i = 1; i < argc-1; i++) { if(strcasecmp(argv[i], "--print_info") == 0) { print_info = 1; } else if(strcasecmp(argv[i], "--print_kmers") == 0) { print_kmers = 1; } else if(strcasecmp(argv[i], "--parse_kmers") == 0) { print_info = 1; parse_kmers = 1; } else print_usage(); } } filepath = argv[argc-1]; if(print_info) printf("Loading file: %s\n", filepath); file_size = get_file_size(filepath); FILE* fh = fopen(filepath, "r"); if(fh == NULL) { report_error("cannot open file '%s'\n", filepath); exit(EXIT_FAILURE); } if(file_size != -1 && print_info) { char str[31]; bytes_to_str(file_size, 0, str); printf("File size: %s\n", str); } buffer = buffer_new(BUFFER_SIZE); /* // Check sizes printf("-- Datatypes --\n"); printf("int: %i\n", (int)sizeof(int)); printf("long: %i\n", (int)sizeof(long)); printf("long long: %i\n", (int)sizeof(long long)); printf("double: %i\n", (int)sizeof(double)); printf("long double: %i\n", (int)sizeof(long double)); */ if(print_info) printf("----\n"); unsigned int i; // Read magic word at the start of header char magic_word[7]; magic_word[6] = '\0'; my_fread(fh, magic_word, strlen("CORTEX"), "Magic word"); if(strcmp(magic_word, "CORTEX") != 0) { fprintf(stderr, "Magic word doesn't match 'CORTEX' (start)\n"); exit(EXIT_FAILURE); } // Read version number my_fread(fh, &version, sizeof(uint32_t), "binary version"); my_fread(fh, &kmer_size, sizeof(uint32_t), "kmer size"); my_fread(fh, &num_of_bitfields, sizeof(uint32_t), "number of bitfields"); my_fread(fh, &num_of_colours, sizeof(uint32_t), "number of colours"); if(print_info) { printf("binary version: %i\n", (int)version); printf("kmer size: %i\n", (int)kmer_size); printf("bitfields: %i\n", (int)num_of_bitfields); printf("colours: %i\n", (int)num_of_colours); } if(version >= 7) { my_fread(fh, &expected_num_of_kmers, sizeof(uint64_t), "number of kmers"); my_fread(fh, &num_of_shades, sizeof(uint32_t), "number of shades"); if(print_info) { char tmp[256]; printf("kmers: %s\n", ulong_to_str(expected_num_of_kmers,tmp)); printf("shades: %i\n", (int)num_of_shades); } } // Checks if(version > 7 || version < 4) report_error("Sorry, we only support binary versions 4, 5, 6 & 7\n"); if(kmer_size % 2 == 0) report_error("kmer size is not an odd number\n"); if(kmer_size < 3) report_error("kmer size is less than three\n"); if(num_of_bitfields * 32 < kmer_size) report_error("Not enough bitfields for kmer size\n"); if((num_of_bitfields-1)*32 >= kmer_size) report_error("using more than the minimum number of bitfields\n"); if(num_of_colours == 0) report_error("number of colours is zero\n"); if(num_of_shades != 0 && (num_of_shades & (num_of_shades-1))) report_error("number of shades is not a power of 2\n"); // // Read array of mean read lengths per colour uint32_t *mean_read_lens_per_colour = malloc(num_of_colours*sizeof(uint32_t)); my_fread(fh, mean_read_lens_per_colour, sizeof(uint32_t) * num_of_colours, "mean read length for each colour"); // Read array of total seq loaded per colour uint64_t *total_seq_loaded_per_colour = malloc(num_of_colours*sizeof(uint64_t)); my_fread(fh, total_seq_loaded_per_colour, sizeof(uint64_t) * num_of_colours, "total sequance loaded for each colour"); for(i = 0; i < num_of_colours; i++) { sum_of_seq_loaded += total_seq_loaded_per_colour[i]; } if(version >= 6) { sample_names = malloc(sizeof(char*) * num_of_colours); for(i = 0; i < num_of_colours; i++) { uint32_t str_length; my_fread(fh, &str_length, sizeof(uint32_t), "sample name length"); if(str_length == 0) { sample_names[i] = NULL; } else { sample_names[i] = (char*)malloc((str_length+1) * sizeof(char)); my_fread(fh, sample_names[i], str_length, "sample name"); sample_names[i][str_length] = '\0'; // Check sample length is as long as we were told size_t sample_name_len = strlen(sample_names[i]); if(sample_name_len != str_length) { // Premature \0 in string report_warning("Sample %i name has length %lu but is only %lu chars " "long (premature '\\0')\n", i, str_length, sample_name_len); } } } seq_error_rates = malloc(sizeof(long double) * num_of_colours); my_fread(fh, seq_error_rates, sizeof(long double) * num_of_colours, "seq error rates"); cleaning_infos = malloc(sizeof(CleaningInfo) * num_of_colours); for(i = 0; i < num_of_colours; i++) { my_fread(fh, &(cleaning_infos[i].tip_cleaning), 1, "tip cleaning"); my_fread(fh, &(cleaning_infos[i].remove_low_covg_supernodes), 1, "remove low covg supernodes"); my_fread(fh, &(cleaning_infos[i].remove_low_covg_kmers), 1, "remove low covg kmers"); my_fread(fh, &(cleaning_infos[i].cleaned_against_graph), 1, "cleaned against graph"); my_fread(fh, &(cleaning_infos[i].remove_low_covg_supernodes_thresh), sizeof(int32_t), "remove low covg supernode threshold"); my_fread(fh, &(cleaning_infos[i].remove_low_covg_kmers_thresh), sizeof(int32_t), "remove low covg kmer threshold"); if(version > 6) { if(cleaning_infos[i].remove_low_covg_supernodes_thresh < 0) { report_warning("Binary header gives sample %i a cleaning threshold of " "%i for supernodes (should be >= 0)\n", i, cleaning_infos[i].remove_low_covg_supernodes_thresh); } if(cleaning_infos[i].remove_low_covg_kmers_thresh < 0) { report_warning("Binary header gives sample %i a cleaning threshold of " "%i for kmers (should be >= 0)\n", i, cleaning_infos[i].remove_low_covg_kmers_thresh); } } if(!cleaning_infos[i].remove_low_covg_supernodes && cleaning_infos[i].remove_low_covg_supernodes_thresh > 0) { report_warning("Binary header gives sample %i a cleaning threshold of " "%i for supernodes when no cleaning was performed\n", i, cleaning_infos[i].remove_low_covg_supernodes_thresh); } if(!cleaning_infos[i].remove_low_covg_kmers && cleaning_infos[i].remove_low_covg_kmers_thresh > 0) { report_warning("Binary header gives sample %i a cleaning threshold of " "%i for kmers when no cleaning was performed\n", i, cleaning_infos[i].remove_low_covg_kmers_thresh); } uint32_t name_length; my_fread(fh, &name_length, sizeof(uint32_t), "graph name length"); if(name_length == 0) { cleaning_infos[i].name_of_graph_clean_against = NULL; } else { cleaning_infos[i].name_of_graph_clean_against = (char*)malloc((name_length + 1) * sizeof(char)); my_fread(fh, cleaning_infos[i].name_of_graph_clean_against, name_length, "graph name length"); cleaning_infos[i].name_of_graph_clean_against[name_length] = '\0'; // Check sample length is as long as we were told size_t cleaned_name_len = strlen(cleaning_infos[i].name_of_graph_clean_against); if(cleaned_name_len != name_length) { // Premature \0 in string report_warning("Sample [%i] cleaned-against-name has length %u but is " "only %u chars long (premature '\\0')\n", i, name_length, cleaned_name_len); } } } } // Print colour info if(print_info) { for(i = 0; i < num_of_colours; i++) { printf("-- Colour %i --\n", i); if(version >= 6) { // Version 6 only output printf(" sample name: '%s'\n", sample_names[i]); } char tmp[32]; printf(" mean read length: %u\n", (unsigned int)mean_read_lens_per_colour[i]); printf(" total sequence loaded: %s\n", ulong_to_str(total_seq_loaded_per_colour[i], tmp)); if(version >= 6) { // Version 6 only output printf(" sequence error rate: %Lf\n", seq_error_rates[i]); printf(" tip clipping: %s\n", (cleaning_infos[i].tip_cleaning == 0 ? "no" : "yes")); printf(" remove low coverage supernodes: %s [threshold: %i]\n", cleaning_infos[i].remove_low_covg_supernodes ? "yes" : "no", cleaning_infos[i].remove_low_covg_supernodes_thresh); printf(" remove low coverage kmers: %s [threshold: %i]\n", cleaning_infos[i].remove_low_covg_kmers ? "yes" : "no", cleaning_infos[i].remove_low_covg_kmers_thresh); printf(" cleaned against graph: %s [against: '%s']\n", cleaning_infos[i].cleaned_against_graph ? "yes" : "no", (cleaning_infos[i].name_of_graph_clean_against == NULL ? "" : cleaning_infos[i].name_of_graph_clean_against)); } } printf("--\n"); } // Read magic word at the end of header my_fread(fh, magic_word, strlen("CORTEX"), "magic word (end)"); if(strcmp(magic_word, "CORTEX") != 0) { report_error("magic word doesn't match 'CORTEX' (end): '%s'\n", magic_word); exit(EXIT_FAILURE); } // Calculate number of kmers if(version < 7 && file_size != -1) { size_t bytes_remaining = file_size - num_bytes_read; size_t num_bytes_per_kmer = sizeof(uint64_t) * num_of_bitfields + sizeof(uint32_t) * num_of_colours + sizeof(uint8_t) * num_of_colours; expected_num_of_kmers = bytes_remaining / num_bytes_per_kmer; size_t excess = bytes_remaining - (expected_num_of_kmers * num_bytes_per_kmer); if(excess > 0) { report_error("Excess bytes. Bytes:\n file size: %lu;\n for kmers: %lu;" "\n num kmers: %lu;\n per kmer: %lu;\n excess: %lu\n", file_size, bytes_remaining, expected_num_of_kmers, num_bytes_per_kmer, excess); } } if(print_info) { char num_str[50]; printf("Expected number of kmers: %s\n", ulong_to_str(expected_num_of_kmers, num_str)); printf("----\n"); } // Finished parsing header if(!parse_kmers && !print_kmers) { print_kmer_stats(); fclose(fh); exit(EXIT_SUCCESS); } shade_bytes = num_of_shades >> 3; size_t shade_array_bytes = shade_bytes * num_of_colours; // Kmer data uint64_t* kmer = malloc(sizeof(uint64_t) * num_of_bitfields); uint32_t* covgs = malloc(sizeof(uint32_t) * num_of_colours); uint8_t* edges = malloc(sizeof(uint8_t) * num_of_colours); uint8_t* shade_data = malloc(shade_array_bytes); uint8_t* shend_data = malloc(shade_array_bytes); if(kmer == NULL || covgs == NULL || edges == NULL || shade_data == NULL || shend_data == NULL) { report_error("Out of memory"); exit(EXIT_SUCCESS); } // Convert values to strings char* seq = malloc(sizeof(char) * kmer_size); char kmer_colour_edge_str[9]; // Check top word of each kmer int bits_in_top_word = 2 * (kmer_size % 32); uint64_t top_word_mask = (~(uint64_t)0) << bits_in_top_word; size_t num_bytes_per_bkmer = sizeof(uint64_t)*num_of_bitfields; // Read kmer in bytes so we can see if there are extra bytes at the end of // the file size_t bytes_read; // while((bytes_read = fread(kmer, 1, num_bytes_per_bkmer, fh)) > 0) while((bytes_read = fread_buf(fh, kmer, num_bytes_per_bkmer, buffer)) > 0) { if(bytes_read != num_bytes_per_bkmer) { report_error("unusual extra bytes [%i] at the end of the file\n", (int)bytes_read); break; } num_bytes_read += bytes_read; my_fread(fh, covgs, sizeof(uint32_t) * num_of_colours, "kmer covg"); my_fread(fh, edges, sizeof(uint8_t) * num_of_colours, "kmer edges"); if(version >= 7) { uint8_t *shades = shade_data, *shends = shend_data; for(i = 0; i < num_of_colours; i++) { my_fread(fh, shades, sizeof(uint8_t) * shade_bytes, "shades"); my_fread(fh, shends, sizeof(uint8_t) * shade_bytes, "shade ends"); shades += shade_bytes; shends += shade_bytes; } } // // Kmer checks // // Check top bits of kmer if(kmer[0] & top_word_mask) { if(num_of_oversized_kmers == 0) { report_error("oversized kmer [index: %lu]\n", num_of_kmers_read); for(i = 0; i < num_of_bitfields; i++) { fprintf(stderr, " word %i: ", i); print_binary(stderr, kmer[i]); fprintf(stderr, "\n"); } } num_of_oversized_kmers++; } // Check for all-zeros (i.e. all As kmer: AAAAAA) uint64_t kmer_words_or = 0; for(i = 0; i < num_of_bitfields; i++) kmer_words_or |= kmer[i]; if(kmer_words_or == 0) { if(num_of_all_zero_kmers == 1) { report_error("more than one all 'A's kmers seen [index: %lu]\n", num_of_kmers_read); } num_of_all_zero_kmers++; } // Check covg is 0 for all colours for(i = 0; i < num_of_colours && covgs[i] == 0; i++); if(i == num_of_colours) { if(num_of_zero_covg_kmers == 0) { report_warning("a kmer has zero coverage in all colours [index: %lu]\n", num_of_kmers_read); } num_of_zero_covg_kmers++; } // Print? if(print_kmers) { binary_kmer_to_seq(kmer, seq, kmer_size, num_of_bitfields); printf("%s", seq); // Print coverages for(i = 0; i < num_of_colours; i++) printf(" %li", (unsigned long)covgs[i]); // Print edges for(i = 0; i < num_of_colours; i++) printf(" %s", get_edges_str(edges[i], kmer_colour_edge_str)); if(version >= 7 && num_of_shades > 0) { for(i = 0; i < num_of_colours; i++) { putc(' ', stdout); print_colour_shades(shade_data + i*shade_bytes, shend_data + i*shade_bytes); } } putc('\n', stdout); } num_of_kmers_read++; for(i = 0; i < num_of_colours; i++) sum_of_covgs_read += covgs[i]; } if(num_of_kmers_read != expected_num_of_kmers) { report_error("Expected %lu kmers, read %lu\n", expected_num_of_kmers, num_of_kmers_read); } if(print_kmers && print_info) printf("----\n"); // check for various reading errors if(errno != 0) { report_error("errno set [%i]\n", (int)errno); } int err; if((err = ferror(fh)) != 0) { report_error("occurred after file reading [%i]\n", err); } // For testing output //num_of_bitfields = 2; //num_of_kmers_read = 3600000000; //num_of_kmers_read = 12345; //num_of_kmers_read = 3581787; //num_of_kmers_read = 0; print_kmer_stats(); fclose(fh); free(kmer); free(covgs); free(edges); free(shade_data); free(shend_data); buffer_free(buffer); if((print_kmers || parse_kmers) && print_info) { printf("----\n"); if(num_warnings > 0 || num_errors > 0) printf("Warnings: %u; Errors: %u\n", num_warnings, num_errors); if(num_errors == 0) printf(num_warnings ? "Binary may be ok\n" : "Binary is valid\n"); } exit(EXIT_SUCCESS); }
static int dissect_yami_parameter(tvbuff_t *tvb, proto_tree *tree, int offset, proto_item *par_ti) { const int orig_offset = offset; proto_tree *yami_param; proto_item *ti; char *name; int name_offset; guint32 name_len; guint32 type; ti = proto_tree_add_item(tree, hf_yami_param, tvb, offset, 0, ENC_NA); yami_param = proto_item_add_subtree(ti, ett_yami_param); name_offset = offset; name_len = tvb_get_letohl(tvb, offset); offset += 4; name = tvb_get_ephemeral_string_enc(tvb, offset, name_len, ENC_ASCII | ENC_NA); proto_item_append_text(ti, ": %s", name); proto_item_append_text(par_ti, "%s, ", name); offset += (name_len + 3) & ~3; proto_tree_add_string(yami_param, hf_yami_param_name, tvb, name_offset, offset - name_offset, name); type = tvb_get_letohl(tvb, offset); proto_tree_add_item(yami_param, hf_yami_param_type, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; switch (type) { case YAMI_TYPE_BOOLEAN: { guint32 val = tvb_get_letohl(tvb, offset); proto_item_append_text(ti, ", Type: boolean, Value: %s", val ? "True" : "False"); proto_tree_add_item(yami_param, hf_yami_param_value_bool, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; break; } case YAMI_TYPE_INTEGER: { gint32 val = tvb_get_letohl(tvb, offset); proto_item_append_text(ti, ", Type: integer, Value: %d", val); proto_tree_add_item(yami_param, hf_yami_param_value_int, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; break; } case YAMI_TYPE_LONGLONG: { gint64 val = tvb_get_letoh64(tvb, offset); proto_item_append_text(ti, ", Type: long, Value: %" G_GINT64_MODIFIER "d", val); proto_tree_add_item(yami_param, hf_yami_param_value_long, tvb, offset, 8, ENC_LITTLE_ENDIAN); offset += 8; break; } case YAMI_TYPE_DOUBLE: { gdouble val = tvb_get_letohieee_double(tvb, offset); proto_item_append_text(ti, ", Type: double, Value: %g", val); proto_tree_add_item(yami_param, hf_yami_param_value_double, tvb, offset, 8, ENC_LITTLE_ENDIAN); offset += 8; break; } case YAMI_TYPE_STRING: { const int val_offset = offset; guint32 val_len; char *val; val_len = tvb_get_letohl(tvb, offset); offset += 4; val = tvb_get_ephemeral_string_enc(tvb, offset, val_len, ENC_ASCII | ENC_NA); proto_item_append_text(ti, ", Type: string, Value: \"%s\"", val); offset += (val_len + 3) & ~3; proto_tree_add_string(yami_param, hf_yami_param_value_str, tvb, val_offset, offset - val_offset, val); break; } case YAMI_TYPE_BINARY: { const int val_offset = offset; guint32 val_len; const guint8 *val; char *repr; val_len = tvb_get_letohl(tvb, offset); offset += 4; val = tvb_get_ptr(tvb, offset, val_len); repr = bytes_to_str(val, val_len); proto_item_append_text(ti, ", Type: binary, Value: %s", repr); offset += (val_len + 3) & ~3; proto_tree_add_bytes_format_value(yami_param, hf_yami_param_value_bin, tvb, val_offset, offset - val_offset, val, "%s", repr); break; } case YAMI_TYPE_BOOLEAN_ARRAY: { guint32 count; guint i; int j; count = tvb_get_letohl(tvb, offset); proto_tree_add_item(yami_param, hf_yami_items_count, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; proto_item_append_text(ti, ", Type: boolean[], %u items: {", count); for (i = 0; i < count/32; i++) { guint32 val = tvb_get_letohl(tvb, offset); for (j = 0; j < 32; j++) { int r = !!(val & (1 << j)); proto_item_append_text(ti, "%s, ", r ? "T" : "F"); proto_tree_add_boolean(yami_param, hf_yami_param_value_bool, tvb, offset+(j/8), 1, r); } offset += 4; } if (count % 32) { guint32 val = tvb_get_letohl(tvb, offset); int tmp = count % 32; for (j = 0; j < tmp; j++) { int r = !!(val & (1 << j)); proto_item_append_text(ti, "%s, ", r ? "T" : "F"); proto_tree_add_boolean(yami_param, hf_yami_param_value_bool, tvb, offset+(j/8), 1, r); } offset += 4; } proto_item_append_text(ti, "}"); break; } case YAMI_TYPE_INTEGER_ARRAY: { guint32 count; guint i; count = tvb_get_letohl(tvb, offset); proto_tree_add_item(yami_param, hf_yami_items_count, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; proto_item_append_text(ti, ", Type: integer[], %u items: {", count); for (i = 0; i < count; i++) { gint32 val = tvb_get_letohl(tvb, offset); proto_item_append_text(ti, "%d, ", val); proto_tree_add_item(yami_param, hf_yami_param_value_int, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; } proto_item_append_text(ti, "}"); break; } case YAMI_TYPE_LONGLONG_ARRAY: { guint32 count; guint i; count = tvb_get_letohl(tvb, offset); proto_tree_add_item(yami_param, hf_yami_items_count, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; proto_item_append_text(ti, ", Type: long long[], %u items: {", count); for (i = 0; i < count; i++) { gint64 val = tvb_get_letoh64(tvb, offset); proto_item_append_text(ti, "%" G_GINT64_MODIFIER "d, ", val); proto_tree_add_item(yami_param, hf_yami_param_value_long, tvb, offset, 8, ENC_LITTLE_ENDIAN); offset += 8; } proto_item_append_text(ti, "}"); break; } case YAMI_TYPE_DOUBLE_ARRAY: { guint32 count; guint i; count = tvb_get_letohl(tvb, offset); proto_tree_add_item(yami_param, hf_yami_items_count, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; proto_item_append_text(ti, ", Type: double[], %u items: {", count); for (i = 0; i < count; i++) { gdouble val = tvb_get_letohieee_double(tvb, offset); proto_item_append_text(ti, "%g, ", val); proto_tree_add_item(yami_param, hf_yami_param_value_double, tvb, offset, 8, ENC_LITTLE_ENDIAN); offset += 8; } proto_item_append_text(ti, "}"); break; } case YAMI_TYPE_STRING_ARRAY: { guint32 count; guint i; count = tvb_get_letohl(tvb, offset); proto_tree_add_item(yami_param, hf_yami_items_count, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; proto_item_append_text(ti, ", Type: string[], %u items: {", count); for (i = 0; i < count; i++) { const int val_offset = offset; guint32 val_len; char *val; val_len = tvb_get_letohl(tvb, offset); offset += 4; val = tvb_get_ephemeral_string_enc(tvb, offset, val_len, ENC_ASCII | ENC_NA); proto_item_append_text(ti, "\"%s\", ", val); proto_tree_add_string(yami_param, hf_yami_param_value_str, tvb, val_offset, offset - val_offset, val); offset += (val_len + 3) & ~3; } proto_item_append_text(ti, "}"); break; } case YAMI_TYPE_BINARY_ARRAY: { guint32 count; guint i; count = tvb_get_letohl(tvb, offset); proto_tree_add_item(yami_param, hf_yami_items_count, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; proto_item_append_text(ti, ", Type: binary[], %u items: {", count); for (i = 0; i < count; i++) { const int val_offset = offset; guint32 val_len; const guint8 *val; char *repr; val_len = tvb_get_letohl(tvb, offset); offset += 4; val = tvb_get_ptr(tvb, offset, val_len); repr = bytes_to_str(val, val_len); proto_item_append_text(ti, "%s, ", repr); offset += (val_len + 3) & ~3; proto_tree_add_bytes_format_value(yami_param, hf_yami_param_value_bin, tvb, val_offset, offset - val_offset, val, "%s", repr); } proto_item_append_text(ti, "}"); break; } case YAMI_TYPE_NESTED: { guint32 count; guint i; count = tvb_get_letohl(tvb, offset); proto_tree_add_item(yami_param, hf_yami_params_count, tvb, offset, 4, ENC_LITTLE_ENDIAN); offset += 4; proto_item_append_text(ti, ", Type: nested, %u parameters: ", count); for (i = 0; i < count; i++) { offset = dissect_yami_parameter(tvb, yami_param, offset, ti); /* smth went wrong */ if (offset == -1) return -1; } break; } default: proto_item_append_text(ti, ", Type: unknown (%d)!", type); return -1; } proto_item_set_len(ti, offset - orig_offset); return offset; }
int ctx_view(int argc, char **argv) { // Arg parsing char cmd[100]; char shortopts[300]; cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts)); int c; // TODO: // print_action actions[argc]; // bool read_kmers = false; // silence error messages from getopt_long // opterr = 0; while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) { cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd)); switch(c) { case 0: /* flag set */ break; case 'h': cmd_print_usage(NULL); break; case ':': /* BADARG */ case '?': /* BADCH getopt_long has already printed error */ // cmd_print_usage(NULL); cmd_print_usage("`"CMD" "SUBCMD" -h` for help. Bad option: %s", argv[optind-1]); default: cmd_print_usage("Programmer fail. Tell Isaac."); } } if(print_kmers) parse_kmers = 1; bool no_flags = (!print_info && !parse_kmers && !print_kmers); if(no_flags) { print_info = parse_kmers = 1; } if(optind+1 != argc) cmd_print_usage("Require one input graph file (.ctx)"); char *path = argv[optind]; size_t num_errors = 0, num_warnings = 0; GraphFileReader gfile; memset(&gfile, 0, sizeof(gfile)); int ret = graph_file_open(&gfile, path); if(ret == 0) die("Cannot open file: %s", path); if(print_info) { char fsize_str[50]; bytes_to_str((size_t)gfile.file_size, 0, fsize_str); printf("Loading file: %s\n", file_filter_path(&gfile.fltr)); printf("File size: %s\n", fsize_str); printf("----\n"); } size_t i, col, ncols = file_filter_into_ncols(&gfile.fltr); size_t kmer_size = gfile.hdr.kmer_size; ctx_assert(ncols > 0); GraphFileHeader hdr; memset(&hdr, 0, sizeof(hdr)); graph_file_merge_header(&hdr, &gfile); uint64_t nkmers_read = 0, nkmers_loaded = 0; uint64_t num_all_zero_kmers = 0, num_zero_covg_kmers = 0; uint64_t *col_nkmers, *col_sum_covgs; col_nkmers = ctx_calloc(ncols, sizeof(col_nkmers[0])); col_sum_covgs = ctx_calloc(ncols, sizeof(col_sum_covgs[0])); // Print header if(print_info) print_header(&hdr, gfile.num_of_kmers); BinaryKmer bkmer; Covg covgs[ncols], keep_kmer; Edges edges[ncols]; bool direct_read = file_filter_is_direct(&gfile.fltr); if(parse_kmers || print_kmers) { if(print_info && print_kmers) printf("----\n"); for(; graph_file_read_reset(&gfile, &bkmer, covgs, edges); nkmers_read++) { // If kmer has no covg in any samples -> don't load keep_kmer = 0; for(col = 0; col < ncols; col++) { col_nkmers[col] += (covgs[col] > 0); col_sum_covgs[col] += covgs[col]; keep_kmer |= covgs[col]; } if(!direct_read && !keep_kmer) continue; nkmers_loaded++; /* Kmer Checks */ // graph_file_read_reset() already checks for: // 1. oversized kmers // 2. kmers with covg 0 in all colours // 3. edges without coverage in a colour // Check for all-zeros (i.e. all As kmer: AAAAAA) uint64_t kmer_words_or = 0; for(i = 0; i < hdr.num_of_bitfields; i++) kmer_words_or |= bkmer.b[i]; if(kmer_words_or == 0) { if(num_all_zero_kmers == 1) { loading_error("more than one all 'A's kmers seen [index: %"PRIu64"]\n", nkmers_read); } num_all_zero_kmers++; } // Check covg is 0 for all colours for(i = 0; i < ncols && covgs[i] == 0; i++); num_zero_covg_kmers += (i == ncols); // Print if(print_kmers) db_graph_print_kmer2(bkmer, covgs, edges, ncols, kmer_size, stdout); } } // check for various reading errors // if(errno != 0) // loading_error("errno set [%i]: %s\n", (int)errno, strerror(errno)); int err = ferror(gfile.fh); if(err != 0) loading_error("occurred after file reading [%i]\n", err); char nstr[50]; if(print_kmers || parse_kmers) { // file_size is set to -1 if we are reading from a stream, // therefore won't be able to check number of kmers read if(gfile.file_size != -1 && nkmers_read != (uint64_t)gfile.num_of_kmers) { loading_warning("Expected %zu kmers, read %zu\n", (size_t)gfile.num_of_kmers, (size_t)nkmers_read); } if(num_all_zero_kmers > 1) { loading_error("%s all-zero-kmers seen\n", ulong_to_str(num_all_zero_kmers, nstr)); } if(num_zero_covg_kmers > 0) { loading_warning("%s kmers have no coverage in any colour\n", ulong_to_str(num_zero_covg_kmers, nstr)); } } // Count warnings printed by graph_file_reader.c num_warnings += gfile.error_zero_covg; num_warnings += gfile.error_missing_covg; // Can only print these stats if we're read in the kmers if((print_kmers || parse_kmers) && print_info) { // print kmer coverage per sample printf("\n---- Per colour stats\n"); printf("num. kmers:"); for(col = 0; col < ncols; col++) printf("\t%s", ulong_to_str(col_nkmers[col], nstr)); printf("\n"); printf("sum coverage:"); for(col = 0; col < ncols; col++) printf("\t%s", ulong_to_str(col_sum_covgs[col], nstr)); printf("\n"); printf("kmer coverage:"); for(col = 0; col < ncols; col++) printf("\t%.2f", safe_frac(col_sum_covgs[col], col_nkmers[col])); printf("\n"); // Overall stats uint64_t sum_covgs = 0; double mean_kmer_covg = 0.0; for(col = 0; col < ncols; col++) sum_covgs += col_sum_covgs[col]; mean_kmer_covg = nkmers_loaded ? (double)sum_covgs / nkmers_loaded : 0.0; printf("\n---- Overall stats\n"); printf("Total kmers: %s\n", ulong_to_str(nkmers_loaded, nstr)); printf("Total coverage: %s\n", ulong_to_str(sum_covgs, nstr)); printf("Mean coverage: %s\n", double_to_str(mean_kmer_covg, 2, nstr)); } if(print_info) { // Print memory stats uint64_t mem, capacity, num_buckets, req_capacity; uint8_t bucket_size; req_capacity = (size_t)(gfile.num_of_kmers / IDEAL_OCCUPANCY); capacity = hash_table_cap(req_capacity, &num_buckets, &bucket_size); mem = ht_mem(bucket_size, num_buckets, sizeof(BinaryKmer)*8 + ncols*(sizeof(Covg)+sizeof(Edges))*8); char memstr[100], capacitystr[100], bucket_size_str[100], num_buckets_str[100]; bytes_to_str(mem, 1, memstr); ulong_to_str(capacity, capacitystr); ulong_to_str(bucket_size, bucket_size_str); ulong_to_str(num_buckets, num_buckets_str); size_t mem_height = (size_t)__builtin_ctzl(num_buckets); printf("\n---- Memory\n"); printf("memory required: %s [capacity: %s]\n", memstr, capacitystr); printf(" bucket size: %s; number of buckets: %s\n", bucket_size_str, num_buckets_str); printf(" --kmer_size %zu --mem_height %zu --mem_width %i\n", kmer_size, mem_height, bucket_size); } if((print_kmers || parse_kmers) && print_info) { printf("\n----\n"); if(num_warnings > 0 || num_errors > 0) { printf("Warnings: %zu; Errors: %zu\n", (size_t)num_warnings, (size_t)num_errors); } if(num_errors == 0) printf(num_warnings ? "Graph may be ok\n" : "Graph is valid\n"); } ctx_free(col_nkmers); ctx_free(col_sum_covgs); // Close file (which zeros it) graph_file_close(&gfile); graph_header_dealloc(&hdr); return num_errors ? EXIT_FAILURE : EXIT_SUCCESS; }