// Check contig entries match reference // We check that these match the reference just loaded static void brkpnt_check_refs_match(cJSON *json, const ChromHash *genome, const char *path) { cJSON *version = json_hdr_get(json, "format_version", cJSON_Number, path); if(version->valueint <= 2) return; cJSON *command = json_hdr_get_curr_cmd(json, path); cJSON *brkpnts = json_hdr_get(command, "breakpoints", cJSON_Object, path); cJSON *contigs = json_hdr_get(brkpnts, "contigs", cJSON_Array, path); cJSON *contig; size_t num_chroms = 0; for(contig = contigs->child; contig; contig = contig->next, num_chroms++) { cJSON *id = json_hdr_get(contig, "id", cJSON_String, path); cJSON *len = json_hdr_get(contig, "length", cJSON_Number, path); // Check chrom is loaded in ref and of expected length khiter_t k = kh_get(kChromHash, genome, id->valuestring); if(k == kh_end(genome)) warn("Cannot find chrom [%s]", id->valuestring); else { const read_t *r = kh_value(genome, k); if(r->seq.end != (size_t)len->valueint) { warn("Chrom lengths do not match %s input:%li ref:%zu", id->valuestring, len->valueint, r->seq.end); } } } if(num_chroms != kh_size(genome)) { warn("Number of chromosomes differ: %zu in header vs %zu in ref", num_chroms, (size_t)kh_size(genome)); } }
/** * Generate a JSON header object for a .ctp file * @param path path to output file * @param cmdstr name of the command being run, to be used to add @cmdhdr * @param cmdhdr JSON header to add under current command->@cmdstr * If cmdstr and cmdhdr are both NULL they are ignored * @param contig_hist histgram of read contig lengths * @param hist_len length of array contig_hist */ cJSON* gpath_save_mkhdr(const char *path, const char *cmdstr, cJSON *cmdhdr, cJSON **hdrs, size_t nhdrs, const ZeroSizeBuffer *contig_hists, size_t ncols, const dBGraph *db_graph) { ctx_assert(!cmdstr == !cmdhdr); const GPathStore *gpstore = &db_graph->gpstore; const GPathSet *gpset = &gpstore->gpset; // using json_hdr_make_std() assumes the following ctx_assert(gpset->ncols == db_graph->num_of_cols); // Construct cJSON cJSON *jsonhdr = cJSON_CreateObject(); cJSON_AddStringToObject(jsonhdr, "file_format", "ctp"); cJSON_AddNumberToObject(jsonhdr, "format_version", CTP_FORMAT_VERSION); // Add standard cortex header info, including the command being run json_hdr_make_std(jsonhdr, path, hdrs, nhdrs, db_graph, hash_table_nkmers(&db_graph->ht)); // Get first command (this one), and command specific extra info if(cmdstr) { cJSON *cmd = json_hdr_get_curr_cmd(jsonhdr, path); cJSON_AddItemToObject(cmd, cmdstr, cmdhdr); } // Paths info cJSON *paths = cJSON_CreateObject(); cJSON_AddItemToObject(jsonhdr, "paths", paths); // Add command specific header fields cJSON_AddNumberToObject(paths, "num_kmers_with_paths", gpstore->num_kmers_with_paths); cJSON_AddNumberToObject(paths, "num_paths", gpstore->num_paths); cJSON_AddNumberToObject(paths, "path_bytes", gpstore->path_bytes); // Add size distribution cJSON *json_hists = cJSON_CreateArray(); cJSON_AddItemToObject(paths, "contig_hists", json_hists); size_t i; for(i = 0; i < ncols; i++) _gpath_save_contig_hist2json(json_hists, contig_hists[i].b, contig_hists[i].len); return jsonhdr; }
// Check contig entries match reference // We check that these match the reference just loaded static void brkpnt_check_refs_match(cJSON *json, const char *path) { cJSON *version = json_hdr_get(json, "format_version", cJSON_Number, path); if(version->valueint <= 2) return; cJSON *command = json_hdr_get_curr_cmd(json, path); cJSON *brkpnts = json_hdr_get(command, "breakpoints", cJSON_Object, path); cJSON *contigs = json_hdr_get(brkpnts, "contigs", cJSON_Array, path); cJSON *contig; size_t num_chroms = 0; for(contig = contigs->child; contig; contig = contig->next, num_chroms++) { cJSON *id = json_hdr_get(contig, "id", cJSON_String, path); cJSON *len = json_hdr_get(contig, "length", cJSON_Number, path); const char *chrom_name = id->valuestring; long chrom_len = len->valueint; size_t reflen; khiter_t k = kh_get(ChromHash, genome, chrom_name); if(k == kh_end(genome)) die("Cannot find ref chrom: %s", chrom_name); else { reflen = kh_value(genome, k)->seq.end; if(reflen != (size_t)chrom_len) { die("Chrom lengths do not match %s input:%li ref:%zu", chrom_name, chrom_len, reflen); } } } if(num_chroms != chroms.len) { die("Number of chromosomes differ: %zu in header vs %zu in ref", num_chroms, chroms.len); } }