static void test_binary_seq_rev_cmp() { test_status("binary_seq_reverse_complement() binary_seq_to_str()"); uint8_t data[TLEN], tmp[TLEN]; char str[4*TLEN+1], rev[4*TLEN+1], restore[4*TLEN+1]; size_t i, j, k, nbases; for(i = 0; i < NTESTS; i++) { // Get random sequence, mask top byte, convert to string rand_bytes(data, TLEN); nbases = rand() & (4*TLEN-1); binary_seq_to_str(data, nbases, str); // Reverse complement, convert to string memcpy(tmp, data, TLEN); binary_seq_reverse_complement(tmp, nbases); binary_seq_to_str(tmp, nbases, rev); // Check strings match for(j = 0, k = nbases-1; j < nbases; j++, k--) TASSERT(str[j] == dna_char_complement(rev[k])); // Reverse complement again, check we get back same binary_seq+string binary_seq_reverse_complement(tmp, nbases); binary_seq_to_str(tmp, nbases, restore); TASSERT(memcmp(data, tmp, TLEN) == 0); TASSERT(strncmp(str, restore, nbases) == 0); } }
static void _binary_seq_str_test(const char *seq) { size_t len = strlen(seq); char str[len+1]; uint8_t data[len]; binary_seq_from_str(seq, len, data); binary_seq_to_str(data, len, str); TASSERT2(strcmp(seq, str) == 0, "1: '%s' vs '%s'", seq, str); }
/** * Print paths to a string buffer. Paths are sorted before being written. * * @param hkey All paths associated with hkey are written to the buffer * @param sbuf paths are written this string buffer * @param subset is a temp variable that is reused each time * @param nbuf temporary buffer, if not NULL, used to add seq=... to output * @param jposbuf temporary buffer, if not NULL, used to add juncpos=... to output */ void gpath_save_sbuf(hkey_t hkey, StrBuf *sbuf, GPathSubset *subset, dBNodeBuffer *nbuf, SizeBuffer *jposbuf, const dBGraph *db_graph) { ctx_assert(db_graph->num_of_cols == 1 || nbuf == NULL); ctx_assert(db_graph->num_of_cols == 1 || jposbuf == NULL); const GPathStore *gpstore = &db_graph->gpstore; const GPathSet *gpset = &gpstore->gpset; const size_t ncols = gpstore->gpset.ncols; GPath *first_gpath = gpath_store_fetch(gpstore, hkey); const GPath *gpath; size_t i, j, col; // Load and sort paths for given kmer gpath_subset_reset(subset); gpath_subset_load_llist(subset, first_gpath); gpath_subset_sort(subset); if(subset->list.len == 0) return; // Print "<kmer> <npaths>" BinaryKmer bkmer = db_graph->ht.table[hkey]; char bkstr[MAX_KMER_SIZE+1]; binary_kmer_to_str(bkmer, db_graph->kmer_size, bkstr); // strbuf_sprintf(sbuf, "%s %zu\n", bkstr, subset->list.len); strbuf_append_strn(sbuf, bkstr, db_graph->kmer_size); strbuf_append_char(sbuf, ' '); strbuf_append_ulong(sbuf, subset->list.len); strbuf_append_char(sbuf, '\n'); char orchar[2] = {0}; orchar[FORWARD] = 'F'; orchar[REVERSE] = 'R'; const uint8_t *nseenptr; for(i = 0; i < subset->list.len; i++) { gpath = subset->list.b[i]; nseenptr = gpath_set_get_nseen(gpset, gpath); // strbuf_sprintf(sbuf, "%c %zu %u %u", orchar[gpath->orient], klen, // gpath->num_juncs, (uint32_t)nseenptr[0]); strbuf_append_char(sbuf, orchar[gpath->orient]); strbuf_append_char(sbuf, ' '); strbuf_append_ulong(sbuf, gpath->num_juncs); strbuf_append_char(sbuf, ' '); strbuf_append_ulong(sbuf, nseenptr[0]); for(col = 1; col < ncols; col++) { // strbuf_sprintf(sbuf, ",%u", (uint32_t)nseenptr[col]); strbuf_append_char(sbuf, ','); strbuf_append_ulong(sbuf, nseenptr[col]); } strbuf_append_char(sbuf, ' '); strbuf_ensure_capacity(sbuf, sbuf->end + gpath->num_juncs + 2); binary_seq_to_str(gpath->seq, gpath->num_juncs, sbuf->b+sbuf->end); sbuf->end += gpath->num_juncs; if(nbuf) { // Trace this path through the graph // First, find a colour this path is in for(col = 0; col < ncols && !gpath_has_colour(gpath, ncols, col); col++) {} if(col == ncols) die("path is not in any colours"); dBNode node = {.key = hkey, .orient = gpath->orient}; db_node_buf_reset(nbuf); if(jposbuf) size_buf_reset(jposbuf); // indices of junctions in nbuf gpath_fetch(node, gpath, nbuf, jposbuf, col, db_graph); strbuf_append_str(sbuf, " seq="); strbuf_ensure_capacity(sbuf, sbuf->end + db_graph->kmer_size + nbuf->len); sbuf->end += db_nodes_to_str(nbuf->b, nbuf->len, db_graph, sbuf->b+sbuf->end); if(jposbuf) { strbuf_append_str(sbuf, " juncpos="); strbuf_append_ulong(sbuf, jposbuf->b[0]); for(j = 1; j < jposbuf->len; j++) { strbuf_append_char(sbuf, ','); strbuf_append_ulong(sbuf, jposbuf->b[j]); } } } strbuf_append_char(sbuf, '\n'); } } // @subset is a temp variable that is reused each time // @sbuf is a temp variable that is reused each time static inline int _gpath_gzsave_node(hkey_t hkey, StrBuf *sbuf, GPathSubset *subset, dBNodeBuffer *nbuf, SizeBuffer *jposbuf, gzFile gzout, pthread_mutex_t *outlock, const dBGraph *db_graph) { gpath_save_sbuf(hkey, sbuf, subset, nbuf, jposbuf, db_graph); if(sbuf->end > DEFAULT_IO_BUFSIZE) _gpath_save_flush(gzout, sbuf, outlock); return 0; // => keep iterating }