void cram_stats_dump(cram_stats *st) { int i; fprintf(stderr, "cram_stats:\n"); for (i = 0; i < MAX_STAT_VAL; i++) { if (!st->freqs[i]) continue; fprintf(stderr, "\t%d\t%d\n", i, st->freqs[i]); } if (st->h) { khint_t k; for (k = kh_begin(st->h); k != kh_end(st->h); k++) { if (!kh_exist(st->h, k)) continue; fprintf(stderr, "\t%d\t%d\n", kh_key(st->h, k), kh_val(st->h, k)); } } }
int start(int id) { khint_t key = kh_get(m32, states, id); if (key != kh_end(states)) { struct instance* inst = kh_val(states, key); printf("Starting script: %s\n", inst->path); if (luaL_dofile(inst->state, inst->path)) { return 0; } return 1; } else { return 0; } }
int forwardIndexTokenFunc(void *ctx, Token t) { ForwardIndex *idx = ctx; // we hash the string ourselves because khash suckz azz u_int32_t hval = fnv_32a_buf((void *)t.s, t.len, 0); ForwardIndexEntry *h = NULL; khiter_t k = kh_get(32, idx->hits, hval); // first have to get ieter if (k == kh_end(idx->hits)) { // k will be equal to kh_end if key not present h = calloc(1, sizeof(ForwardIndexEntry)); h->docId = idx->docId; h->flags = 0; h->term = t.s; h->len = t.len; h->vw = NewVarintVectorWriter(4); h->docScore = idx->docScore; int ret; k = kh_put(32, idx->hits, hval, &ret); kh_value(idx->hits, k) = h; } else { h = kh_val(idx->hits, k); } h->flags |= (t.fieldId & 0xff); float score = (float)t.score; // stem tokens get lower score if (t.type == DT_STEM) { score *= STEM_TOKEN_FACTOR; } h->freq += score; idx->totalFreq += (float)t.score; idx->maxFreq = MAX(h->freq, idx->maxFreq); VVW_Write(h->vw, t.pos); // LG_DEBUG("%d) %s, token freq: %f total freq: %f\n", t.pos, t.s, h->freq, idx->totalFreq); return 0; }
// // Plugin Interface // static void* begin_call(int id, const char* name) { khint_t key = kh_get(m32, states, id); if (key != kh_end(states)) { // TODO: Keep track of which functions have been registered struct instance* inst = kh_val(states, key); struct call_info *info = call_info_init(inst->state, name); // Push the function to call onto the lua stack lua_getglobal(inst->state, name); info->n_args++; // Push our call_info onto our callstack kv_push(struct call_info*, inst->callstack, info); return inst; } else { return 0;
int argument(int id, char* key, char* value) { if (strcmp("script", key) == 0) { khint_t key = kh_get(m32, states, id); if (key != kh_end(states)) { struct instance* inst = kh_val(states, key); inst->path = value; return 1; } else { return 0; } } else { return 1; } }
int bcf_hdr_parse1(bcf_hdr_t *h, const char *str) { khint_t k; if (*str != '#') return -1; if (str[1] == '#') { uint32_t info; int len, ret, id_beg, id_end; char *s; len = bcf_hdr_parse_line2(str, &info, &id_beg, &id_end); if (len < 0) return -1; s = (char*)malloc(id_end - id_beg + 1); strncpy(s, str + id_beg, id_end - id_beg); s[id_end - id_beg] = 0; if (len > 0) { // a contig line vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG]; k = kh_put(vdict, d, s, &ret); if (ret == 0) { if (hts_verbose >= 2) fprintf(stderr, "[W::%s] Duplicated contig name '%s'. Skipped.\n", __func__, s); free(s); } else { kh_val(d, k) = bcf_idinfo_def; kh_val(d, k).id = kh_size(d) - 1; kh_val(d, k).info[0] = len; } } else { // a FILTER/INFO/FORMAT line vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; k = kh_put(vdict, d, s, &ret); if (ret) { // absent from the dict kh_val(d, k) = bcf_idinfo_def; kh_val(d, k).info[info&0xf] = info; kh_val(d, k).id = kh_size(d) - 1; } else { kh_val(d, k).info[info&0xf] = info; free(s); } } } else bcf_hdr_parse_sample_line(h, str); return 0; }
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f) { int i; size_t n = 0; uint32_t thres; khint_t *a, k; if (f <= 0.) return UINT32_MAX; for (i = 0; i < 1<<mi->b; ++i) if (mi->B[i].h) n += kh_size((idxhash_t*)mi->B[i].h); a = (uint32_t*)malloc(n * 4); for (i = n = 0; i < 1<<mi->b; ++i) { idxhash_t *h = (idxhash_t*)mi->B[i].h; if (h == 0) continue; for (k = 0; k < kh_end(h); ++k) { if (!kh_exist(h, k)) continue; a[n++] = kh_key(h, k)&1? 1 : (uint32_t)kh_val(h, k); } } thres = ks_ksmall_uint32_t(n, a, (uint32_t)((1. - f) * n)) + 1; free(a); return thres; }
void cram_stats_del(cram_stats *st, int32_t val) { st->nsamp--; //assert(val >= 0); if (val < MAX_STAT_VAL && val >= 0) { st->freqs[val]--; assert(st->freqs[val] >= 0); } else if (st->h) { khint_t k = kh_get(m_i2i, st->h, val); if (k != kh_end(st->h)) { if (--kh_val(st->h, k) == 0) kh_del(m_i2i, st->h, k); } else { fprintf(stderr, "Failed to remove val %d from cram_stats\n", val); st->nsamp++; } } else { fprintf(stderr, "Failed to remove val %d from cram_stats\n", val); st->nsamp++; } }
void hash_insert(hash_t *hash, char *key, void *record) { int absent; array_t *array; khint_t k; pthread_mutex_lock(&hash->mutex); k = kh_put(str, hash->table, key, &absent); if(absent) { kh_key(hash->table, k) = strdup(key); kh_value(hash->table, k) = array_new(1); } k = kh_get(str, hash->table, key); array = kh_val(hash->table, k); array_append(array, record); pthread_mutex_unlock(&hash->mutex); }
int main(int argc, char *argv[]) { char **mem = 0; int i, l, n = 1000000, ret, block_end = 0, curr = 0, c = 0; khash_t(str) *h; h = kh_init(str); if (argc > 1) n = atoi(argv[1]); mem = malloc(sizeof(void*)); mem[0] = malloc(BLOCK_SIZE); // memory buffer to avoid memory fragmentation curr = block_end = 0; for (i = 1; i <= n; ++i) { char buf[16]; int2str(i, 16, buf); khint_t k = kh_put(str, h, buf, &ret); l = strlen(buf) + 1; if (block_end + l > BLOCK_SIZE) { ++curr; block_end = 0; mem = realloc(mem, (curr + 1) * sizeof(void*)); mem[curr] = malloc(BLOCK_SIZE); } memcpy(mem[curr] + block_end, buf, l); kh_key(h, k) = mem[curr] + block_end; block_end += l; kh_val(h, k) = i; } for (i = 1; i <= n; ++i) { char buf[16]; int2str(i, 10, buf); khint_t k = kh_get(str, h, buf); if (k != kh_end(h)) ++c; } printf("%d\n", c); for (ret = 0; ret <= curr; ++ret) free(mem[ret]); free(mem); kh_destroy(str, h); return 0; }
vertex_buffer_t* get_group_buffer(vertex_buffer_t* buff, const texture_t* tex, const font_t* font, const shader_t* program, blend_func blend) { tex_group_t* tg; blend_group_t* bg; unsigned int bkey; khiter_t i,j; khash_t(hmsp)* tgs; const char* atlas_name = NO_ATLAS; char buffer [128]; vec4* v; int z; shader_group_t* sg; khash_t(hmsp)* sgs; if (tex) atlas_name = sen_texture_atlas(tex); else if (font) atlas_name = sen_font_atlas(font); bkey = (unsigned int)blend; i = kh_get(hmip, g_bgs, bkey); if (i != kh_end(g_bgs)) bg = kh_val(g_bgs, i); else { bg = blend_group_new(bkey); kh_insert(hmip, g_bgs, bkey, bg); } bg->num++; tgs = bg->tgs; j = kh_get(hmsp, tgs, atlas_name); if (j != kh_end(tgs)) tg = kh_val(tgs, j); else { tg = tex_group_new(tex,font); kh_insert(hmsp, tgs, atlas_name, tg); } tg->num++; v = (vec4*) buff->vertices->items; z = (int) (v->z * 10000); sprintf (buffer, "%05d%s",z,program->name); sgs = tg->sgs; i = kh_get(hmsp, sgs, buffer); if (i != kh_end(sgs)) sg = kh_val(sgs, i); else { sg = shader_group_new(program, buff,buffer,z,tg,bg); kh_insert(hmsp, sgs, sg->name, sg); } if (sg->buff == NULL) { sg->buff = vertex_buffer_new(vertex_buffer_format(buff)); } sg->num++; return sg->buff; }
// returns: 1 when hdr needs to be synced, 0 otherwise int bcf_hdr_register_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) { // contig int i,j,k, ret; char *str; if ( !strcmp(hrec->key, "contig") ) { hrec->type = BCF_HL_CTG; // Get the contig ID ($str) and length ($j) i = bcf_hrec_find_key(hrec,"length"); if ( i<0 ) return 0; if ( sscanf(hrec->vals[i],"%d",&j)!=1 ) return 0; i = bcf_hrec_find_key(hrec,"ID"); if ( i<0 ) return 0; str = strdup(hrec->vals[i]); // Register in the dictionary vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_CTG]; k = kh_put(vdict, d, str, &ret); if ( !ret ) { free(str); return 0; } // already present kh_val(d, k) = bcf_idinfo_def; kh_val(d, k).id = kh_size(d) - 1; kh_val(d, k).info[0] = i; kh_val(d, k).hrec[0] = hrec; return 1; } if ( !strcmp(hrec->key, "INFO") ) hrec->type = BCF_HL_INFO; else if ( !strcmp(hrec->key, "FILTER") ) hrec->type = BCF_HL_FLT; else if ( !strcmp(hrec->key, "FORMAT") ) hrec->type = BCF_HL_FMT; else return 0; // INFO/FILTER/FORMAT char *id = NULL; int type = -1, num = -1, var = -1; for (i=0; i<hrec->nkeys; i++) { if ( !strcmp(hrec->keys[i], "ID") ) id = hrec->vals[i]; else if ( !strcmp(hrec->keys[i], "Type") ) { if ( !strcmp(hrec->vals[i], "Integer") ) type = BCF_HT_INT; else if ( !strcmp(hrec->vals[i], "Float") ) type = BCF_HT_REAL; else if ( !strcmp(hrec->vals[i], "String") ) type = BCF_HT_STR; else if ( !strcmp(hrec->vals[i], "Flag") ) type = BCF_HT_FLAG; } else if ( !strcmp(hrec->keys[i], "Number") ) { if ( !strcmp(hrec->vals[i],"A") ) var = BCF_VL_A; else if ( !strcmp(hrec->vals[i],"G") ) var = BCF_VL_G; else if ( !strcmp(hrec->vals[i],".") ) var = BCF_VL_VAR; else { sscanf(hrec->vals[i],"%d",&num); var = BCF_VL_FIXED; } if (var != BCF_VL_FIXED) num = 0xfffff; } } uint32_t info = (uint32_t)num<<12 | var<<8 | type<<4 | hrec->type; if ( !id ) return 0; str = strdup(id); vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_ID]; k = kh_put(vdict, d, str, &ret); if ( !ret ) { // already present free(str); kh_val(d, k).info[info&0xf] = info; kh_val(d, k).hrec[info&0xf] = hrec; return 1; } kh_val(d, k) = bcf_idinfo_def; kh_val(d, k).info[info&0xf] = info; kh_val(d, k).hrec[info&0xf] = hrec; kh_val(d, k).id = kh_size(d) - 1; return 1; }
int split_pe(int argc, char *argv[], char *progname) { int c, bc_len = -1, ret, i, j, bc_idx, only_count = 0; unsigned num_mismatches = DEFAULT_NUM_MISMATCHES, num_spacer_bases = DEFAULT_NUM_SPACER_BASES, dna_alpha_len = strlen(DNA_ALPHA), num_undetermined = 0; char *out_prefix = NULL, *fn, **sptr, bc_id[1024], bc_seq[1024], bc_seq_cpy[1024]; // hello, buffer overflow clock_t t = clock(); BcRec bc; ArrayBcRec bcs; FILE *fp; kseq_t *seq1, *seq2; khash_t(str) *h = kh_init(str); khint_t k, k2; gzFile *fp1, *fp2; ARRAY_INIT(&bcs, BcRec, 1000); while ((c = getopt(argc, argv, "m:s:o:c")) >= 0) { switch (c) { case 'm': if (sscanf(optarg, "%u", &num_mismatches) != 1) { fprintf(stderr, "Error: option -m expects unsigned int\n"); return -1; } break; case 's': if (sscanf(optarg, "%u", &num_spacer_bases) != 1) { fprintf(stderr, "Error: option -s expects unsigned int\n"); return -1; } break; case 'o': out_prefix = strdup(optarg); break; case 'c': only_count = 1; break; } } if (optind + 3 != argc) { print_pe_usage(progname); return -1; } if (num_mismatches != 0 && num_mismatches != 1) { fprintf(stderr, "Error: argument -m has to be 0 or 1\n"); return -1; } if (out_prefix == NULL) { out_prefix = strdup(DEFAULT_OUTPUT_PREFIX); } for (sptr = argv+optind; sptr-argv<argc; sptr++) { if (access(*sptr, F_OK) == -1) { fprintf(stderr, "Error: file %s does not exist\n", *sptr); return -1; } } fprintf(stderr, "[barcode file: %s]\n", argv[optind]); fprintf(stderr, "[fastq file1: %s]\n", argv[optind+1]); fprintf(stderr, "[fastq file2: %s]\n", argv[optind+2]); fprintf(stderr, "[number of mismatches allowed: %u]\n", num_mismatches); fprintf(stderr, "[number of spacer bases: %u]\n", num_spacer_bases); fprintf(stderr, "[output prefix: %s]\n", out_prefix); fprintf(stderr, "[only count: %s]\n", only_count ? "true" : "false"); /* read barcode file */ if ((fp = fopen(argv[optind], "r")) == NULL) { fprintf(stderr, "Error: cannot open barcode file %s\n", argv[optind]); return -1; } while (fscanf(fp, "%s %s", bc_id, bc_seq) == 2) { bc_len = strlen(bc_seq); bc.id = strdup(bc_id); bc.seq = strdup(bc_seq); bc.num_found = 0; if (!only_count) { fn = (char*)calloc(strlen(out_prefix) + 3 + strlen(bc_id) + 6 + 1, sizeof(char)); strcpy(fn, out_prefix); strcat(fn, "_1_"); strcat(fn, bc_id); strcat(fn, ".fq.gz"); bc.fp1 = gzopen(fn, "w"); fn[strlen(out_prefix)+1] = '2'; bc.fp2 = gzopen(fn, "w"); free(fn); } else { bc.fp1 = NULL; bc.fp2 = NULL; } ARRAY_PUSH(&bcs, BcRec, bc); k = kh_put(str, h, strdup(bc_seq), &ret); if (num_mismatches == 0) { kh_val(h, k) = bcs.nextfree - 1; //printf("setting %s to %lu (%s %s)\n", bc_seq, bcs.nextfree - 1, bcs.elems[bcs.nextfree - 1].seq, bcs.elems[bcs.nextfree - 1].id); } else { for (i=0; i<strlen(bc_seq); i++) { strcpy(bc_seq_cpy, bc_seq); for (j=0; j<dna_alpha_len; j++) { bc_seq_cpy[i] = DNA_ALPHA[j]; k = kh_put(str, h, strdup(bc_seq_cpy), &ret); kh_val(h, k) = bcs.nextfree - 1; //printf("setting %s to %lu (%s %s)\n", bc_seq_cpy, bcs.nextfree - 1, bcs.elems[bcs.nextfree - 1].seq, bcs.elems[bcs.nextfree - 1].id); } } } } fclose(fp); if (bc_len == -1) { fprintf(stderr, "Error: could not find any barcodes in file %s\n", argv[optind]); return -1; } fp1 = gzopen(argv[optind+1], "r"); seq1 = kseq_init(fp1); fp2 = gzopen(argv[optind+2], "r"); seq2 = kseq_init(fp2); while (kseq_read(seq1) >= 0) { strncpy(bc_seq, seq1->seq.s, bc_len); k = kh_get(str, h, bc_seq); kseq_read(seq2); strncpy(bc_seq, seq2->seq.s, bc_len); k2 = kh_get(str, h, bc_seq); if (k != kh_end(h) || k2 != kh_end(h)) { bc_idx = k2 != kh_end(h) ? kh_val(h, k2) : kh_val(h, k); if (!only_count) { gzprintf(bcs.elems[bc_idx].fp1, "@%s %s\n%s\n+\n%s\n" , seq1->name.s , seq1->comment.s , seq1->seq.s+bc_len+num_spacer_bases , seq1->qual.s+bc_len+num_spacer_bases); gzprintf(bcs.elems[bc_idx].fp2, "@%s %s\n%s\n+\n%s\n" , seq2->name.s , seq2->comment.s , seq2->seq.s+bc_len+num_spacer_bases , seq2->qual.s+bc_len+num_spacer_bases); } bcs.elems[bc_idx].num_found += 2; } else { num_undetermined += 2; } } gzclose(fp1); gzclose(fp2); kseq_destroy(seq1); kseq_destroy(seq2); for (i=0; i<bcs.nextfree; i++) { printf("%s\t%s\t%u\n", bcs.elems[i].id, bcs.elems[i].seq, bcs.elems[i].num_found); if (!only_count) { gzclose(bcs.elems[i].fp1); gzclose(bcs.elems[i].fp2); } } printf("UNDETERMINED\tNONE\t%u\n", num_undetermined); ARRAY_FREE(&bcs); kh_destroy(str, h); fprintf(stderr, "[CPU time: %.2f sec]\n", (float)(clock() - t) / CLOCKS_PER_SEC); return 0; }
pointer symbol_find(VM, char *str) { khiter_t iter; iter = kh_get(STR, vm->symbol_table, str); return iter == kh_end(vm->symbol_table) ? NULL:kh_val(vm->symbol_table, iter); }
static void gc_mark_phase(pic_state *pic) { struct context *cxt; size_t j; assert(pic->heap->weaks == NULL); /* context */ for (cxt = pic->cxt; cxt != NULL; cxt = cxt->prev) { if (cxt->fp) gc_mark_object(pic, (struct object *)cxt->fp); if (cxt->sp) gc_mark_object(pic, (struct object *)cxt->sp); if (cxt->irep) gc_mark_object(pic, (struct object *)cxt->irep); } /* arena */ for (j = 0; j < pic->ai; ++j) { gc_mark_object(pic, (struct object *)pic->arena[j]); } /* global variables */ gc_mark(pic, pic->globals); /* dynamic environment */ gc_mark(pic, pic->dyn_env); /* top continuation */ gc_mark(pic, pic->halt); /* features */ gc_mark(pic, pic->features); /* weak maps */ do { struct object *key; pic_value val; int it; khash_t(weak) *h; struct weak *weak; j = 0; weak = pic->heap->weaks; while (weak != NULL) { h = &weak->hash; for (it = kh_begin(h); it != kh_end(h); ++it) { if (! kh_exist(h, it)) continue; key = kh_key(h, it); val = kh_val(h, it); if (is_alive(key)) { if (obj_p(pic, val) && ! is_alive(obj_ptr(pic, val))) { gc_mark(pic, val); ++j; } } } weak = weak->prev; } } while (j > 0); }
int stk_mutfa(int argc, char *argv[]) { khash_t(reg) *h = kh_init(reg); gzFile fp; kseq_t *seq; kstream_t *ks; int l, i, dret; kstring_t *str; khint_t k; if (argc < 3) { fprintf(stderr, "Usage: seqtk mutfa <in.fa> <in.snp>\n\n"); fprintf(stderr, "Note: <in.snp> contains at least four columns per line which are:\n"); fprintf(stderr, " 'chr 1-based-pos any base-changed-to'.\n"); return 1; } // read the list str = calloc(1, sizeof(kstring_t)); fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r"); ks = ks_init(fp); while (ks_getuntil(ks, 0, str, &dret) >= 0) { char *s = strdup(str->s); int beg = 0, ret; reglist_t *p; k = kh_get(reg, h, s); if (k == kh_end(h)) { k = kh_put(reg, h, s, &ret); memset(&kh_val(h, k), 0, sizeof(reglist_t)); } p = &kh_val(h, k); if (ks_getuntil(ks, 0, str, &dret) > 0) beg = atol(str->s) - 1; // 2nd col ks_getuntil(ks, 0, str, &dret); // 3rd col ks_getuntil(ks, 0, str, &dret); // 4th col // skip the rest of the line if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n'); if (isalpha(str->s[0]) && str->l == 1) { if (p->n == p->m) { p->m = p->m? p->m<<1 : 4; p->a = realloc(p->a, p->m * 8); } p->a[p->n++] = (uint64_t)beg<<32 | str->s[0]; } } ks_destroy(ks); gzclose(fp); free(str->s); free(str); // mutfa fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r"); seq = kseq_init(fp); while ((l = kseq_read(seq)) >= 0) { reglist_t *p; k = kh_get(reg, h, seq->name.s); if (k != kh_end(h)) { p = &kh_val(h, k); for (i = 0; i < p->n; ++i) { int beg = p->a[i]>>32; if (beg < seq->seq.l) seq->seq.s[beg] = (int)p->a[i]; } } printf(">%s", seq->name.s); for (i = 0; i < l; ++i) { if (i%60 == 0) putchar('\n'); putchar(seq->seq.s[i]); } putchar('\n'); }
static void cachessess_set_val_cb(cache_iter_t it, cache_val_t val) { kh_val(srcsessmap, it) = val; }
/* composition */ int stk_comp(int argc, char *argv[]) { gzFile fp; kseq_t *seq; int l, c, upper_only = 0; reghash_t *h = 0; reglist_t dummy; while ((c = getopt(argc, argv, "ur:")) >= 0) { switch (c) { case 'u': upper_only = 1; break; case 'r': h = stk_reg_read(optarg); break; } } if (argc == optind) { fprintf(stderr, "Usage: seqtk comp [-u] [-r in.bed] <in.fa>\n\n"); fprintf(stderr, "Output format: chr, length, #A, #C, #G, #T, #2, #3, #4, #CpG, #tv, #ts, #CpG-ts\n"); return 1; } fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r"); seq = kseq_init(fp); dummy.n= dummy.m = 1; dummy.a = calloc(1, 8); while ((l = kseq_read(seq)) >= 0) { int i, k; reglist_t *p = 0; if (h) { khint_t k = kh_get(reg, h, seq->name.s); if (k != kh_end(h)) p = &kh_val(h, k); } else { p = &dummy; dummy.a[0] = l; } for (k = 0; p && k < p->n; ++k) { int beg = p->a[k]>>32, end = p->a[k]&0xffffffff; int la, lb, lc, na, nb, nc, cnt[11]; if (beg > 0) la = seq->seq.s[beg-1], lb = seq_nt16_table[la], lc = bitcnt_table[lb]; else la = 'a', lb = -1, lc = 0; na = seq->seq.s[beg]; nb = seq_nt16_table[na]; nc = bitcnt_table[nb]; memset(cnt, 0, 11 * sizeof(int)); for (i = beg; i < end; ++i) { int is_CpG = 0, a, b, c; a = na; b = nb; c = nc; na = seq->seq.s[i+1]; nb = seq_nt16_table[na]; nc = bitcnt_table[nb]; if (b == 2 || b == 10) { // C or Y if (nb == 4 || nb == 5) is_CpG = 1; } else if (b == 4 || b == 5) { // G or R if (lb == 2 || lb == 10) is_CpG = 1; } if (upper_only == 0 || isupper(a)) { if (c > 1) ++cnt[c+2]; if (c == 1) ++cnt[seq_nt16to4_table[b]]; if (b == 10 || b == 5) ++cnt[9]; else if (c == 2) { ++cnt[8]; } if (is_CpG) { ++cnt[7]; if (b == 10 || b == 5) ++cnt[10]; } } la = a; lb = b; lc = c; } if (h) printf("%s\t%d\t%d", seq->name.s, beg, end); else printf("%s\t%d", seq->name.s, l); for (i = 0; i < 11; ++i) printf("\t%d", cnt[i]); putchar('\n'); } fflush(stdout); } free(dummy.a); kseq_destroy(seq); gzclose(fp); return 0; }
static bool split(state_t* state) { if (state->unaccounted_file && sam_hdr_write(state->unaccounted_file, state->unaccounted_header) != 0) { fprintf(pysamerr, "Could not write output file header\n"); return false; } size_t i; for (i = 0; i < state->output_count; i++) { if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) { fprintf(pysamerr, "Could not write output file header\n"); return false; } } bam1_t* file_read = bam_init1(); // Read the first record if (sam_read1(state->merged_input_file, state->merged_input_header, file_read) < 0) { // Nothing more to read? Ignore this file bam_destroy1(file_read); file_read = NULL; } while (file_read != NULL) { // Get RG tag from read and look it up in hash to find file to output it to uint8_t* tag = bam_aux_get(file_read, "RG"); khiter_t iter; if ( tag != NULL ) { char* rg = bam_aux2Z(tag); iter = kh_get_c2i(state->rg_hash, rg); } else { iter = kh_end(state->rg_hash); } // Write the read out to correct file if (iter != kh_end(state->rg_hash)) { // if found write to the appropriate untangled bam int i = kh_val(state->rg_hash,iter); sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read); } else { // otherwise write to the unaccounted bam if there is one or fail if (state->unaccounted_file == NULL) { if (tag) { fprintf(pysamerr, "Read \"%s\" with unaccounted for tag \"%s\".\n", bam_get_qname(file_read), bam_aux2Z(tag)); } else { fprintf(pysamerr, "Read \"%s\" has no RG tag.\n", bam_get_qname(file_read)); } bam_destroy1(file_read); return false; } else { sam_write1(state->unaccounted_file, state->unaccounted_header, file_read); } } // Replace written read with the next one to process if (sam_read1(state->merged_input_file, state->merged_input_header, file_read) < 0) { // Nothing more to read? Ignore this file in future bam_destroy1(file_read); file_read = NULL; } } return true; }
// Set the initial state static state_t* init(parsed_opts_t* opts) { state_t* retval = calloc(sizeof(state_t), 1); if (!retval) { fprintf(pysamerr, "Out of memory"); return NULL; } retval->merged_input_file = sam_open(opts->merged_input_name, "rb"); if (!retval->merged_input_file) { fprintf(pysamerr, "Could not open input file (%s)\n", opts->merged_input_name); free(retval); return NULL; } retval->merged_input_header = sam_hdr_read(retval->merged_input_file); if (opts->unaccounted_name) { if (opts->unaccounted_header_name) { samFile* hdr_load = sam_open(opts->unaccounted_header_name, "r"); if (!hdr_load) { fprintf(pysamerr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name); cleanup_state(retval); return NULL; } retval->unaccounted_header = sam_hdr_read(hdr_load); sam_close(hdr_load); } else { retval->unaccounted_header = bam_hdr_dup(retval->merged_input_header); } retval->unaccounted_file = sam_open(opts->unaccounted_name, "wb"); if (retval->unaccounted_file == NULL) { fprintf(pysamerr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name); cleanup_state(retval); return NULL; } } // Open output files for RGs if (!count_RG(retval->merged_input_header, &retval->output_count, &retval->rg_id)) return NULL; if (opts->verbose) fprintf(pysamerr, "@RG's found %zu\n",retval->output_count); retval->rg_output_file = (samFile**)calloc(retval->output_count, sizeof(samFile*)); retval->rg_output_header = (bam_hdr_t**)calloc(retval->output_count, sizeof(bam_hdr_t*)); retval->rg_hash = kh_init_c2i(); if (!retval->rg_output_file || !retval->rg_output_header) { fprintf(pysamerr, "Could not allocate memory for output file array. Out of memory?"); cleanup_state(retval); return NULL; } char* dirsep = strrchr(opts->merged_input_name, '/'); char* input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name); if (!input_base_name) { fprintf(pysamerr, "Out of memory\n"); cleanup_state(retval); return NULL; } char* extension = strrchr(input_base_name, '.'); if (extension) *extension = '\0'; size_t i; for (i = 0; i < retval->output_count; i++) { char* output_filename = NULL; if ( ( output_filename = expand_format_string(opts->output_format_string, input_base_name, retval->rg_id[i], i) ) == NULL) { fprintf(pysamerr, "Error expanding output filename format string.\r\n"); cleanup_state(retval); free(input_base_name); return NULL; } retval->rg_output_file[i] = sam_open(output_filename, "wb"); if (retval->rg_output_file[i] == NULL) { fprintf(pysamerr, "Could not open output file: %s\r\n", output_filename); cleanup_state(retval); free(input_base_name); return NULL; } // Record index in hash int ret; khiter_t iter = kh_put_c2i(retval->rg_hash, retval->rg_id[i], &ret); kh_val(retval->rg_hash,iter) = i; // Set and edit header retval->rg_output_header[i] = bam_hdr_dup(retval->merged_input_header); if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i]) ) { fprintf(pysamerr, "Could not rewrite header for file: %s\r\n", output_filename); cleanup_state(retval); free(output_filename); free(input_base_name); return NULL; } free(output_filename); } free(input_base_name); return retval; }
int stk_maskseq(int argc, char *argv[]) { khash_t(reg) *h = kh_init(reg); gzFile fp; kseq_t *seq; int l, i, j, c, is_complement = 0, is_lower = 0; khint_t k; while ((c = getopt(argc, argv, "cl")) >= 0) { switch (c) { case 'c': is_complement = 1; break; case 'l': is_lower = 1; break; } } if (argc - optind < 2) { fprintf(pysamerr, "Usage: seqtk maskseq [-cl] <in.fa> <in.bed>\n\n"); fprintf(pysamerr, "Options: -c mask the complement regions\n"); fprintf(pysamerr, " -l soft mask (to lower cases)\n"); return 1; } h = stk_reg_read(argv[optind+1]); // maskseq fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r"); seq = kseq_init(fp); while ((l = kseq_read(seq)) >= 0) { k = kh_get(reg, h, seq->name.s); if (k == kh_end(h)) { // not found in the hash table if (is_complement) { for (j = 0; j < l; ++j) seq->seq.s[j] = is_lower? tolower(seq->seq.s[j]) : 'N'; } } else { reglist_t *p = &kh_val(h, k); if (!is_complement) { for (i = 0; i < p->n; ++i) { int beg = p->a[i]>>32, end = p->a[i]; if (beg >= seq->seq.l) { fprintf(pysamerr, "[maskseq] start position >= the sequence length.\n"); continue; } if (end >= seq->seq.l) end = seq->seq.l; if (is_lower) for (j = beg; j < end; ++j) seq->seq.s[j] = tolower(seq->seq.s[j]); else for (j = beg; j < end; ++j) seq->seq.s[j] = 'N'; } } else { int8_t *mask = calloc(seq->seq.l, 1); for (i = 0; i < p->n; ++i) { int beg = p->a[i]>>32, end = p->a[i]; if (end >= seq->seq.l) end = seq->seq.l; for (j = beg; j < end; ++j) mask[j] = 1; } for (j = 0; j < l; ++j) if (mask[j] == 0) seq->seq.s[j] = is_lower? tolower(seq->seq.s[j]) : 'N'; free(mask); } } printf(">%s", seq->name.s); for (j = 0; j < seq->seq.l; ++j) { if (j%60 == 0) putchar('\n'); putchar(seq->seq.s[j]); } putchar('\n'); }
int faidx_seq_len(const faidx_t *fai, const char *seq) { khint_t k = kh_get(s, fai->hash, seq); if ( k == kh_end(fai->hash) ) return -1; return kh_val(fai->hash, k).len; }
int stk_subseq(int argc, char *argv[]) { khash_t(reg) *h = kh_init(reg); gzFile fp; kseq_t *seq; int l, i, j, c, is_tab = 0, line = 1024; khint_t k; while ((c = getopt(argc, argv, "tl:")) >= 0) { switch (c) { case 't': is_tab = 1; break; case 'l': line = atoi(optarg); break; } } if (optind + 2 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: seqtk subseq [options] <in.fa> <in.bed>|<name.list>\n\n"); fprintf(stderr, "Options: -t TAB delimited output\n"); fprintf(stderr, " -l INT sequence line length [%d]\n\n", line); fprintf(stderr, "Note: Use 'samtools faidx' if only a few regions are intended.\n\n"); return 1; } h = stk_reg_read(argv[optind+1]); // subseq fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r"); seq = kseq_init(fp); while ((l = kseq_read(seq)) >= 0) { reglist_t *p; k = kh_get(reg, h, seq->name.s); if (k == kh_end(h)) continue; p = &kh_val(h, k); for (i = 0; i < p->n; ++i) { int beg = p->a[i]>>32, end = p->a[i]; if (beg >= seq->seq.l) { fprintf(stderr, "[subseq] %s: %d >= %ld\n", seq->name.s, beg, seq->seq.l); continue; } if (end > seq->seq.l) end = seq->seq.l; if (is_tab == 0) { printf("%c%s", seq->qual.l == seq->seq.l? '@' : '>', seq->name.s); if (beg > 0 || (int)p->a[i] != INT_MAX) { if (end == INT_MAX) { if (beg) printf(":%d", beg+1); } else printf(":%d-%d", beg+1, end); } } else printf("%s\t%d\t", seq->name.s, beg + 1); if (end > seq->seq.l) end = seq->seq.l; for (j = 0; j < end - beg; ++j) { if (is_tab == 0 && j % line == 0) putchar('\n'); putchar(seq->seq.s[j + beg]); } putchar('\n'); if (seq->qual.l != seq->seq.l || is_tab) continue; printf("+"); for (j = 0; j < end - beg; ++j) { if (j % line == 0) putchar('\n'); putchar(seq->qual.s[j + beg]); } putchar('\n'); } } // free kseq_destroy(seq); gzclose(fp); stk_reg_destroy(h); return 0; }
size_t sen_render_flush(int clear_buff) { // gl_check_error(); //_logfi("1"); blend_group_t* bg; khint_t i,k,j; size_t total = 0; khash_t(hmsp)* tgs; camera_t* cam = sen_camera(); tex_group_t* tg; khash_t(hmsp)* sgs; shader_group_t* sg; vector_clear(zsorter); for (k = kh_begin(g_bgs); k != kh_end(g_bgs); ++k) { if (!kh_exist(g_bgs,k)) continue; bg = kh_val(g_bgs, k); if (bg->num == 0) { kh_del(hmip,g_bgs,k); continue; } tgs = bg->tgs; // set_blending( (blend_func) (kh_key(g_bgs, k)) ); for (i = kh_begin(tgs); i != kh_end(tgs); ++i) { if (!kh_exist(tgs,i)) continue; tg = kh_val(tgs, i); if (tg->num == 0) { kh_del(hmsp,tgs,i); continue; } /* if (tg->tex) sen_texture_bind(tg->tex); else if (tg->font) sen_font_bind(tg->font); */ sgs = tg->sgs; for (j = kh_begin(sgs); j != kh_end(sgs); ++j) { if (!kh_exist(sgs,j)) continue; sg = kh_val(sgs, j); if (sg->num == 0 || !sg->buff) { kh_del(hmsp,sgs,j); continue; } if (sg->buff) { /* sen_shader_use(sg->program); { if (tg->tex || tg->font) sen_uniform_1iN(sg->program, "u_tex0", 0); sen_uniform_m4fN(sg->program, "u_mvp", cam->view_proj.data); vertex_buffer_render( sg->buff, GL_TRIANGLES); total+=vertex_buffer_size(sg->buff); if (clear_buff) vertex_buffer_clear( sg->buff ); //sen_shader_use(NULL); }*/ vector_push_back( zsorter, &sg ); } sg->num = 0; } tg->num = 0; } bg->num = 0; } if (zsorter->size > 0) vector_sort(zsorter, zcmp); for (j = 0; j < zsorter->size; j++) { shader_group_t* sg = *(shader_group_t**)vector_get(zsorter, j); // _logfi("%s %d",sg->name, sg->z); set_blending( (blend_func) (sg->bg->key) ); if (sg->tg->tex) sen_texture_bind(sg->tg->tex); else if (sg->tg->font) sen_font_bind(sg->tg->font); sen_shader_use(sg->program); { if (sg->tg->tex || sg->tg->font) sen_uniform_1iN(sg->program, "u_tex0", 0); sen_uniform_m4fN(sg->program, "u_mvp", sg->z > 9500 ? cam->proj.data : cam->view_proj.data); vertex_buffer_render( sg->buff, GL_TRIANGLES); total+=vertex_buffer_size(sg->buff); if (clear_buff) vertex_buffer_clear( sg->buff ); //sen_shader_use(NULL); } } // _logfi("-------------------------------------------------"); return total; }
static cache_val_t cachessess_get_val_cb(cache_iter_t it) { return kh_val(srcsessmap, it); }
static cache_val_t cachefkcrt_get_val_cb(cache_iter_t it) { return kh_val(certmap, it); }
void table_insert(VM, pointer table, pointer key, pointer val) { int ret; khiter_t iter; iter = kh_put(ARC, AR_TABLE(table), key, &ret); kh_val(AR_TABLE(table), iter) = val; }
static void cachefkcrt_set_val_cb(cache_iter_t it, cache_val_t val) { kh_val(certmap, it) = val; }
/*-------Table-------*/ pointer table_find(VM, pointer table, pointer key) { khiter_t iter; iter = kh_get(ARC, AR_TABLE(table), key); return iter == kh_end(AR_TABLE(table))?NULL:kh_val(AR_TABLE(table), iter); }
/* * Computes entropy from integer frequencies for various encoding methods and * picks the best encoding. * * FIXME: we could reuse some of the code here for the actual encoding * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman. * * Returns the best codec to use. */ enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st) { enum cram_encoding best_encoding = E_NULL; int best_size = INT_MAX, bits; int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k; int *vals = NULL, *freqs = NULL, vals_alloc = 0, *codes; //cram_stats_dump(st); /* Count number of unique symbols */ for (nvals = i = 0; i < MAX_STAT_VAL; i++) { if (!st->freqs[i]) continue; if (nvals >= vals_alloc) { vals_alloc = vals_alloc ? vals_alloc*2 : 1024; vals = realloc(vals, vals_alloc * sizeof(int)); freqs = realloc(freqs, vals_alloc * sizeof(int)); if (!vals || !freqs) { if (vals) free(vals); if (freqs) free(freqs); return E_HUFFMAN; // Cannot do much else atm } } vals[nvals] = i; freqs[nvals] = st->freqs[i]; ntot += freqs[nvals]; if (max_val < i) max_val = i; if (min_val > i) min_val = i; nvals++; } if (st->h) { khint_t k; int i; for (k = kh_begin(st->h); k != kh_end(st->h); k++) { if (!kh_exist(st->h, k)) continue; if (nvals >= vals_alloc) { vals_alloc = vals_alloc ? vals_alloc*2 : 1024; vals = realloc(vals, vals_alloc * sizeof(int)); freqs = realloc(freqs, vals_alloc * sizeof(int)); if (!vals || !freqs) return E_HUFFMAN; // Cannot do much else atm } i = kh_key(st->h, k); vals[nvals]=i; freqs[nvals] = kh_val(st->h, k); ntot += freqs[nvals]; if (max_val < i) max_val = i; if (min_val > i) min_val = i; nvals++; } } st->nvals = nvals; assert(ntot == st->nsamp); if (nvals <= 1) { free(vals); free(freqs); return E_HUFFMAN; } if (fd->verbose > 1) fprintf(stderr, "Range = %d..%d, nvals=%d, ntot=%d\n", min_val, max_val, nvals, ntot); /* Theoretical entropy */ // if (fd->verbose > 1) { // double dbits = 0; // for (i = 0; i < nvals; i++) { // dbits += freqs[i] * log((double)freqs[i]/ntot); // } // dbits /= -log(2); // if (fd->verbose > 1) // fprintf(stderr, "Entropy = %f\n", dbits); // } if (nvals > 1 && ntot > 256) { #if 0 /* * CRUDE huffman estimator. Round to closest and round up from 0 * to 1 bit. * * With and without ITF8 incase we have a few discrete values but with * large magnitude. * * Note rans0/arith0 and Z_HUFFMAN_ONLY vs internal huffman can be * compared in this way, but order-1 (eg rans1) or maybe LZ77 modes * may detect the correlation of high bytes to low bytes in multi- * byte values. So this predictor breaks down. */ double dbits = 0; // entropy + ~huffman double dbitsH = 0; double dbitsE = 0; // external entropy + ~huffman double dbitsEH = 0; int F[256] = {0}, n = 0; double e = 0; // accumulated error bits for (i = 0; i < nvals; i++) { double x; int X; unsigned int v = vals[i]; //Better encoding would cope with sign. //v = ABS(vals[i])*2+(vals[i]<0); if (!(v & ~0x7f)) { F[v] += freqs[i], n+=freqs[i]; } else if (!(v & ~0x3fff)) { F[(v>>8) |0x80] += freqs[i]; F[ v &0xff] += freqs[i], n+=2*freqs[i]; } else if (!(v & ~0x1fffff)) {