mrb_value mrb_hash_keys(mrb_state *mrb, mrb_value hash) { khash_t(ht) *h = RHASH_TBL(hash); khiter_t k; mrb_value ary, *p; if (!h || kh_size(h) == 0) return mrb_ary_new(mrb); ary = mrb_ary_new_capa(mrb, kh_size(h)); mrb_ary_set(mrb, ary, kh_size(h)-1, mrb_nil_value()); p = RARRAY_PTR(ary); for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)) { mrb_value kv = kh_key(h,k); mrb_hash_value hv = kh_value(h,k); p[hv.n] = kv; } } return ary; }
static mrb_value recursive_eql(mrb_state *mrb, mrb_value hash, mrb_value dt, int recur) { khash_t(ht) *h1 = RHASH_TBL(hash); khash_t(ht) *h2 = RHASH_TBL(dt); khiter_t k1, k2; mrb_value key1; for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) { if (!kh_exist(h1, k1)) continue; key1 = kh_key(h1,k1); k2 = kh_get(ht, h2, key1); if ( k2 != kh_end(h2)) { if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) { continue; /* next key */ } } return mrb_false_value(); } return mrb_true_value(); }
static mrb_value hash_equal(mrb_state *mrb, mrb_value hash1, mrb_value hash2, int eql) { khash_t(ht) *h1, *h2; if (mrb_obj_equal(mrb, hash1, hash2)) return mrb_true_value(); if (!mrb_hash_p(hash2)) { if (!mrb_respond_to(mrb, hash2, mrb_intern_lit(mrb, "to_hash"))) { return mrb_false_value(); } if (eql) return mrb_fixnum_value(mrb_eql(mrb, hash2, hash1)); else return mrb_fixnum_value(mrb_equal(mrb, hash2, hash1)); } h1 = RHASH_TBL(hash1); h2 = RHASH_TBL(hash2); if (!h1) { return mrb_bool_value(!h2); } if (!h2) return mrb_false_value(); if (kh_size(h1) != kh_size(h2)) return mrb_false_value(); else { khiter_t k1, k2; mrb_value key; for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) { if (!kh_exist(h1, k1)) continue; key = kh_key(h1,k1); k2 = kh_get(ht, mrb, h2, key); if (k2 != kh_end(h2)) { if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) { continue; /* next key */ } } return mrb_false_value(); } } return mrb_true_value(); }
/* * call-seq: * obj.instance_variables -> array * * Returns an array of instance variable names for the receiver. Note * that simply defining an accessor does not create the corresponding * instance variable. * * class Fred * attr_accessor :a1 * def initialize * @iv = 3 * end * end * Fred.new.instance_variables #=> [:@iv] */ mrb_value mrb_obj_instance_variables(mrb_state *mrb, mrb_value self) { mrb_value ary; kh_iv_t *h = RCLASS_IV_TBL(self); int i; const char* p; ary = mrb_ary_new(mrb); if (h) { for (i=0;i<kh_end(h);i++) { if (kh_exist(h, i)) { p = mrb_sym2name(mrb, kh_key(h,i)); if (*p == '@') { if (mrb_type(kh_value(h, i)) != MRB_TT_UNDEF) mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, p)); } } } } return ary; }
static mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value hash) { struct RHash* ret; khash_t(ht) *h, *ret_h; khiter_t k, ret_k; mrb_value ifnone, vret; h = RHASH_TBL(hash); ret = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); ret->ht = kh_init(ht, mrb); if (h && kh_size(h) > 0) { ret_h = ret->ht; for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)) { int ai = mrb_gc_arena_save(mrb); ret_k = kh_put(ht, mrb, ret_h, KEY(kh_key(h, k))); mrb_gc_arena_restore(mrb, ai); kh_val(ret_h, ret_k).v = kh_val(h, k).v; kh_val(ret_h, ret_k).n = kh_size(ret_h)-1; } } } if (MRB_RHASH_DEFAULT_P(hash)) { ret->flags |= MRB_HASH_DEFAULT; } if (MRB_RHASH_PROCDEFAULT_P(hash)) { ret->flags |= MRB_HASH_PROC_DEFAULT; } vret = mrb_obj_value(ret); ifnone = RHASH_IFNONE(hash); if (!mrb_nil_p(ifnone)) { mrb_iv_set(mrb, vret, mrb_intern_lit(mrb, "ifnone"), ifnone); } return vret; }
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f) { int i; size_t n = 0; uint32_t thres; khint_t *a, k; if (f <= 0.) return UINT32_MAX; for (i = 0; i < 1<<mi->b; ++i) if (mi->B[i].h) n += kh_size((idxhash_t*)mi->B[i].h); a = (uint32_t*)malloc(n * 4); for (i = n = 0; i < 1<<mi->b; ++i) { idxhash_t *h = (idxhash_t*)mi->B[i].h; if (h == 0) continue; for (k = 0; k < kh_end(h); ++k) { if (!kh_exist(h, k)) continue; a[n++] = kh_key(h, k)&1? 1 : (uint32_t)kh_val(h, k); } } thres = ks_ksmall_uint32_t(n, a, (uint32_t)((1. - f) * n)) + 1; free(a); return thres; }
static mrb_sym class_sym(mrb_state *mrb, struct RClass *c, struct RClass *outer) { mrb_value name; name = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__classid__")); if (mrb_nil_p(name)) { khash_t(iv)* h; khiter_t k; mrb_value v; if (!outer) outer = mrb->object_class; h = outer->iv; for (k = kh_begin(h); k != kh_end(h); k++) { if (!kh_exist(h,k)) continue; v = kh_value(h,k); if (mrb_type(v) == c->tt && mrb_class_ptr(v) == c) { return kh_key(h,k); } } } return SYM2ID(name); }
static mrb_bool iv_foreach(mrb_state *mrb, iv_tbl *t, iv_foreach_func *func, void *p) { if (t == NULL) { return TRUE; } else { khash_t(iv) *h = &t->h; khiter_t k; int n; for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)) { n = (*func)(mrb, kh_key(h, k), kh_value(h, k), p); if (n > 0) return FALSE; if (n < 0) { kh_del(iv, mrb, h, k); } } } } return TRUE; }
mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value hash) { struct RHash* ret; khash_t(ht) *h, *ret_h; khiter_t k, ret_k; h = RHASH_TBL(hash); ret = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); ret->ht = kh_init(ht, mrb); if (kh_size(h) > 0) { ret_h = ret->ht; for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h,k)) { ret_k = kh_put(ht, ret_h, KEY(kh_key(h,k))); kh_val(ret_h, ret_k) = kh_val(h,k); } } } return mrb_obj_value(ret); }
static PyObject* pyext_epoll_free(PyObject *self,PyObject *args){ khiter_t hit; int epfd; struct pyep_data *pyep; if(!PyArg_ParseTuple(args,"i",&epfd)){ PyErr_BadArgument(); return NULL; } if((pyep = pyep_getby_epfd(epfd)) == NULL){ PyErr_SetString(PyExc_KeyError,"epoll file descriptor not found"); return NULL; } if(ev_close(&pyep->evdata)){ PyErr_SetString(PyExc_SystemError,"epoll free failed"); return NULL; } for(hit = kh_begin(pyep->evhdr_ht);hit != kh_end(pyep->evhdr_ht);hit++){ if(kh_exist(pyep->evhdr_ht,hit)){ free((struct ev_header*)kh_value(pyep->evhdr_ht,hit)); } } kh_destroy(ptr,pyep->evhdr_ht); hit = kh_get(ptr,pyep_ht,epfd); kh_del(ptr,pyep_ht,hit); free(pyep); Py_INCREF(Py_None); return Py_None; }
static mrb_value inspect_obj(mrb_state *mrb, mrb_value obj, mrb_value str, int recur) { if (recur) { mrb_str_cat2(mrb, str, " ..."); } else { khiter_t k; kh_iv_t *h = RCLASS_IV_TBL(obj); if (h) { for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)){ mrb_sym id = kh_key(h, k); mrb_value value = kh_value(h, k); /* need not to show internal data */ if (RSTRING_PTR(str)[0] == '-') { /* first element */ RSTRING_PTR(str)[0] = '#'; mrb_str_cat2(mrb, str, " "); } else { mrb_str_cat2(mrb, str, ", "); } mrb_str_cat2(mrb, str, mrb_sym2name(mrb, id)); mrb_str_cat2(mrb, str, "="); mrb_str_append(mrb, str, mrb_inspect(mrb, value)); } } } } mrb_str_cat2(mrb, str, ">"); RSTRING_PTR(str)[0] = '#'; return str; }
int strm_env_copy(strm_state* s1, strm_state* s2) { strm_env *e1 = s1->env; strm_env *e2 = s2->env; khiter_t k, kk; int r; if (!e1) { e1 = s1->env = kh_init(env); } if (!e2) { e2 = s1->env = kh_init(env); } for (k = kh_begin(e2); k != kh_end(e2); k++) { if (kh_exist(e2, k)) { kk = kh_put(env, e1, kh_key(e2, k), &r); if (r <= 0) return STRM_NG; /* r=0 key is present in the hash table */ /* r=-1 operation failed */ kh_value(e1, kk) = kh_value(e2, k); } } return STRM_OK; }
int do_grep() { #ifdef DEBUGa printf("[!]do_grep\n"); #endif BamInfo_t *pbam; kh_cstr_t BamID; khiter_t ki, bami; kstring_t ks1 = { 0, 0, NULL }; kstring_t ks2 = { 0, 0, NULL }; kstring_t ks3 = { 0, 0, NULL }; samFile *in; bam_hdr_t *h; hts_idx_t *idx; bam1_t *b, *d, *d2, *bR1, *bR2, *bR3; bR1 = bam_init1(); bR2 = bam_init1(); bR3 = bam_init1(); //htsFile *out; //hts_opt *in_opts = NULL, *out_opts = NULL; int r = 0, exit_code = 0; kvec_t(bam1_t) R1, R2, RV; pierCluster_t *pierCluster; //samdat_t tmp_samdat; FILE *fs = fopen("./test.txt","w"); for (bami = kh_begin(bamNFOp); bami != kh_end(bamNFOp); ++bami) { //printf(">[%d]:\n",bami); if (kh_exist(bamNFOp, bami)) { kv_init(R1); kv_init(R2); kv_init(RV); //tmp_samdat = (const samdat_t){ 0 }; //memset(&tmp_samdat,0,sizeof(samdat_t)); //printf("-[%d]:\n",bami); BamID = kh_key(bamNFOp, bami); pbam = &kh_value(bamNFOp, bami); fprintf(stderr, "%u [%s]=%s\t%u %u\n",bami,BamID,pbam->fileName,pbam->insertSize,pbam->SD); in = sam_open(pbam->fileName, "r"); if (in == NULL) { fprintf(stderr, "[x]Error opening \"%s\"\n", pbam->fileName); return EXIT_FAILURE; } h = sam_hdr_read(in); /* out = hts_open("-", "w"); if (out == NULL) { fprintf(stderr, "[x]Error opening standard output\n"); return EXIT_FAILURE; } if (sam_hdr_write(out, h) < 0) { fprintf(stderr, "[!]Error writing output header.\n"); exit_code = 1; } */ int8_t *ChrIsHum; if (h == NULL) { fprintf(stderr, "[x]Couldn't read header for \"%s\"\n", pbam->fileName); return EXIT_FAILURE; } else { ChrIsHum = malloc(h->n_targets * sizeof(int8_t)); for (int32_t i=0; i < h->n_targets; ++i) { //ChrIsHum[i] = -1; ki = kh_get(chrNFO, chrNFOp, h->target_name[i]); if (ki == kh_end(chrNFOp)) { errx(4,"[x]Cannot find ChrID for [%s] !",h->target_name[i]); } else { ChrInfo_t * tmp = &kh_value(chrNFOp, ki); ChrIsHum[i] = tmp->isHum; //printf(">>> %d Chr:%s %d\n",i,h->target_name[i],ChrIsHum[i]); } } } h->ignore_sam_err = 0; b = bam_init1(); d = bam_init1(); d2 = bam_init1(); if ((idx = sam_index_load(in, pbam->fileName)) == 0) { fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__); return 1; } pierCluster = sam_plp_init(); while ((r = sam_read1(in, h, b)) >= 0) { int8_t flag = false; const bam1_core_t *c = &b->core; if (c->flag & BAM_FSECONDARY) continue; if (c->n_cigar) { uint32_t *cigar = bam_get_cigar(b); for (int i = 0; i < c->n_cigar; ++i) { if (bam_cigar_opchr(cigar[i])=='S') { // soft clipping if ( bam_cigar_oplen(cigar[i]) >= myConfig.minGrepSlen ) { flag = true; } } } } if (flag && ChrIsHum[c->tid]) { // Now, skip Virus items. //bam_copy1(bR1, b); flag = 0; // recycle //int enoughMapQ = 0; //kstring_t ks = { 0, 0, NULL }; /*if (sam_format1(h, b, &ks1) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } else*/ if ((c->mtid == c->tid && ChrIsHum[c->tid]) || (ChrIsHum[c->tid] ^ ChrIsHum[c->mtid])) { // Only grep those mapped on same Human ChrID, or diff species/一方在病毒的情况. //printf(">[%s]\n",ks_str(&ks1)); flag |= 1; //tmp_samdat.b = bam_dup1(b); //kv_push(samdat_t,R1,tmp_samdat); /*if (checkMapQ(ChrIsHum, b, true)) { ++enoughMapQ; }*/ } if (getPairedSam(in, idx, b, d) != 0) { flag &= ~1; continue; } else { flag |= 2; /*if (checkMapQ(ChrIsHum, d, false)) { ++enoughMapQ; }*/ /*if (c->flag & BAM_FSECONDARY) { if (getPairedSam(in, idx, d, d2) == 0) { //sam_format1(h, d2, &ks3); flag |= 4; if (checkMapQ(ChrIsHum, d2, false)) { ++enoughMapQ; } } }*/ } /* 对于 BAM_FSECONDARY(256) 的 Read,跳两次 与 读 SA 项,效果一样。 >[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90 353 chr2 13996555 0 50S40M chr18 48245109 0ACACAACAATGTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC NM:i:0 MD:Z:40 AS:i:40 XS:i:40 RG:Z:Fsimout_mB SA:Z:rgi|59585|emb|X04615.1|,2000,-,40S46M4S,60,0; YC:Z:CT YD:Z:f] -[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90 177 chr18 48245109 9 40S50M gi|59585|emb|X04615.1|2000 0 GTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAAAGGAATTCAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC NM:i:0 MD:Z:50 AS:i:50 XS:i:46 RG:Z:Fsimout_mB SA:Z:rgi|59585|emb|X04615.1|,2000,+,50S40M,9,0; YC:Z:GA YD:Z:f] +[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90 113 gi|59585|emb|X04615.1| 2000 60 40S46M4S chr18 48245109 0 TTTTTTGGCTGAATAGTATTCCATGGTGTGTGTGTGTGTGGCCTCTGCTCTGTATCGGGAGGCCTTAGAGTCTCCGGAACATTGTTGTGT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC NM:i:0 MD:Z:46 AS:i:46 XS:i:27 RG:Z:Fsimout_mB SA:Z:fchr2,13996555,+,50S40M,0,0; YC:Z:CT YD:Z:r] */ /*if (sam_format1(h, d, &ks2) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; }*/ if (((flag & 3) == 3) /*&& enoughMapQ >= myConfig.samples*/) { /*printf(">%d[%s]\n",checkMapQ(ChrIsHum, b, true),ks_str(&ks1)); printf("-%d[%s]\n",checkMapQ(ChrIsHum, d, false),ks_str(&ks2)); if (flag & 4) { printf("+%d[%s]\n",checkMapQ(ChrIsHum, d2, false),ks_str(&ks3)); } printf("<--%d\n",enoughMapQ);*/ if (sam_plp_push(ChrIsHum, pierCluster, b) == 0) { //printf("--HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos); if ((!ChrIsHum[(d->core).tid]) && (flag & 2)) sam_plp_push(ChrIsHum, pierCluster, d); //if ((!ChrIsHum[(d2->core).tid]) && (flag & 4)) sam_plp_push(ChrIsHum, pierCluster, d2); } else { //print fprintf(fs,"[%s]\nHumRange=%s:%d-%d\n", BamID, h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos); fprintf(fs,"VirRange=%s:%d-%d\n", h->target_name[(pierCluster->VirusRange).tid], (pierCluster->VirusRange).pos, (pierCluster->VirusRange).endpos); for (size_t i=0; i<kv_size(pierCluster->Reads);++i) { bam1_t *bi = kv_A(pierCluster->Reads, i); if (sam_format1(h, bi, &ks1) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } else { fprintf(fs,"%s\n",ks1.s); } } fprintf(fs,"\n"); //printf("HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos); //fflush(fs); sam_plp_dectroy(pierCluster); pierCluster = sam_plp_init(); } } } /*char *qname = bam_get_qname(b); if (sam_write1(out, h, b) < 0) { fprintf(stderr, "[x]Error writing output.\n"); exit_code = 1; break; }*/ } /* r = sam_close(out); // stdout can only be closed once if (r < 0) { fprintf(stderr, "Error closing output.\n"); exit_code = 1; } */ hts_idx_destroy(idx); bam_destroy1(b); bam_destroy1(d); bam_destroy1(d2); bam_hdr_destroy(h); r = sam_close(in); free(ChrIsHum); #ifdef DEBUGa fflush(NULL); //pressAnyKey(); #endif sam_plp_dectroy(pierCluster); //printf("<[%d]:\n",bami); } } fclose(fs); getPairedSam(NULL, NULL, NULL, NULL); // sam_close(fp2); //printf("---[%d]---\n",exit_code); bam_destroy1(bR1); bam_destroy1(bR2); bam_destroy1(bR3); ks_release(&ks1); ks_release(&ks2); ks_release(&ks3); return exit_code; }
/* * Computes entropy from integer frequencies for various encoding methods and * picks the best encoding. * * FIXME: we could reuse some of the code here for the actual encoding * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman. * * Returns the best codec to use. */ enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st) { enum cram_encoding best_encoding = E_NULL; int best_size = INT_MAX, bits; int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k; int *vals = NULL, *freqs = NULL, vals_alloc = 0, *codes; //cram_stats_dump(st); /* Count number of unique symbols */ for (nvals = i = 0; i < MAX_STAT_VAL; i++) { if (!st->freqs[i]) continue; if (nvals >= vals_alloc) { vals_alloc = vals_alloc ? vals_alloc*2 : 1024; vals = realloc(vals, vals_alloc * sizeof(int)); freqs = realloc(freqs, vals_alloc * sizeof(int)); if (!vals || !freqs) { if (vals) free(vals); if (freqs) free(freqs); return E_HUFFMAN; // Cannot do much else atm } } vals[nvals] = i; freqs[nvals] = st->freqs[i]; ntot += freqs[nvals]; if (max_val < i) max_val = i; if (min_val > i) min_val = i; nvals++; } if (st->h) { khint_t k; int i; for (k = kh_begin(st->h); k != kh_end(st->h); k++) { if (!kh_exist(st->h, k)) continue; if (nvals >= vals_alloc) { vals_alloc = vals_alloc ? vals_alloc*2 : 1024; vals = realloc(vals, vals_alloc * sizeof(int)); freqs = realloc(freqs, vals_alloc * sizeof(int)); if (!vals || !freqs) return E_HUFFMAN; // Cannot do much else atm } i = kh_key(st->h, k); vals[nvals]=i; freqs[nvals] = kh_val(st->h, k); ntot += freqs[nvals]; if (max_val < i) max_val = i; if (min_val > i) min_val = i; nvals++; } } st->nvals = nvals; assert(ntot == st->nsamp); if (nvals <= 1) { free(vals); free(freqs); return E_HUFFMAN; } if (fd->verbose > 1) fprintf(stderr, "Range = %d..%d, nvals=%d, ntot=%d\n", min_val, max_val, nvals, ntot); /* Theoretical entropy */ // if (fd->verbose > 1) { // double dbits = 0; // for (i = 0; i < nvals; i++) { // dbits += freqs[i] * log((double)freqs[i]/ntot); // } // dbits /= -log(2); // if (fd->verbose > 1) // fprintf(stderr, "Entropy = %f\n", dbits); // } if (nvals > 1 && ntot > 256) { #if 0 /* * CRUDE huffman estimator. Round to closest and round up from 0 * to 1 bit. * * With and without ITF8 incase we have a few discrete values but with * large magnitude. * * Note rans0/arith0 and Z_HUFFMAN_ONLY vs internal huffman can be * compared in this way, but order-1 (eg rans1) or maybe LZ77 modes * may detect the correlation of high bytes to low bytes in multi- * byte values. So this predictor breaks down. */ double dbits = 0; // entropy + ~huffman double dbitsH = 0; double dbitsE = 0; // external entropy + ~huffman double dbitsEH = 0; int F[256] = {0}, n = 0; double e = 0; // accumulated error bits for (i = 0; i < nvals; i++) { double x; int X; unsigned int v = vals[i]; //Better encoding would cope with sign. //v = ABS(vals[i])*2+(vals[i]<0); if (!(v & ~0x7f)) { F[v] += freqs[i], n+=freqs[i]; } else if (!(v & ~0x3fff)) { F[(v>>8) |0x80] += freqs[i]; F[ v &0xff] += freqs[i], n+=2*freqs[i]; } else if (!(v & ~0x1fffff)) {
static int cachefkcrt_exist_cb(cache_iter_t it) { return kh_exist(certmap, it); }
static void TGM_FragLenHistToMature(TGM_FragLenHist* pHist) { khash_t(fragLen)* pRawHist = pHist->rawHist; pHist->size = kh_size(pRawHist); if (pHist->size > pHist->capacity) { free(pHist->fragLen); free(pHist->freq); pHist->capacity = 2 * pHist->size; pHist->fragLen = (uint32_t*) malloc(pHist->capacity * sizeof(uint32_t)); if(pHist->fragLen == NULL) TGM_ErrQuit("ERROR: Not enough memory for the storage of the fragment length array in the fragment length histogram object.\n"); pHist->freq = (uint64_t*) malloc(pHist->capacity * sizeof(uint64_t)); if(pHist->freq == NULL) TGM_ErrQuit("ERROR: Not enough memory for the storage of the frequency array in the fragment length histogram object.\n"); } unsigned int i = 0; for (khiter_t khIter = kh_begin(pRawHist); khIter != kh_end(pRawHist); ++khIter) { if (kh_exist(pRawHist, khIter)) { pHist->fragLen[i] = kh_key(pRawHist, khIter); ++i; } } qsort(pHist->fragLen, pHist->size, sizeof(uint32_t), CompareFragLenBin); double cumFreq = 0.0; double totalFragLen = 0.0; uint64_t totalFreq = pHist->modeCount[0]; double cdf = 0; uint32_t fragLenQual = 0; TGM_Bool foundMedian = FALSE; for (unsigned int j = 0; j != pHist->size; ++j) { khiter_t khIter = kh_get(fragLen, pRawHist, pHist->fragLen[j]); if (khIter == kh_end(pRawHist)) TGM_ErrQuit("ERROR: Cannot find the fragment length frequency from the hash table.\n"); pHist->freq[j] = kh_value(pRawHist, khIter); totalFragLen += pHist->fragLen[j] * pHist->freq[j]; cumFreq += pHist->freq[j]; cdf = cumFreq / totalFreq; if (!foundMedian && cdf >= 0.5) { pHist->median = pHist->fragLen[j]; foundMedian = TRUE; } cdf = cdf > 0.5 ? 1.0 - cdf : cdf; fragLenQual = DoubleRoundToInt(-10.0 * log10(cdf)); kh_value(pRawHist, khIter) = fragLenQual; } pHist->mean = totalFragLen / totalFreq; pHist->stdev = 0.0; for (unsigned int j = 0; j != pHist->size; ++j) { pHist->stdev += (double) pHist->freq[j] * pow(pHist->mean - pHist->fragLen[j], 2); } if (totalFreq != 1) pHist->stdev = sqrt(pHist->stdev / (double) (totalFreq - 1)); }
static int output_stats_and_reset(struct bgpcorsaro_pfxmonitor_state_t *state, uint32_t interval_start) { khiter_t k; khiter_t p; khiter_t a; int khret; uint8_t pfx_visible; uint32_t unique_pfxs = 0; khash_t(peer_asn_map) * pam; /* origin_asn -> num peer ASns*/ khash_t(asn_count_map) *asn_np = NULL; if ((asn_np = kh_init(asn_count_map)) == NULL) { return -1; } /* for each prefix go through all peers */ for (k = kh_begin(state->pfx_info); k != kh_end(state->pfx_info); ++k) { if (kh_exist(state->pfx_info, k) == 0) { continue; } /* reset counters */ kh_clear(asn_count_map, asn_np); /* get peer-asn map for this prefix */ pam = kh_value(state->pfx_info, k); /* save the origin asn visibility (i.e. how many peers' ASns * observe such information */ /* for each peer, go through all origins */ for (p = kh_begin(pam); p != kh_end(pam); ++p) { if (kh_exist(pam, p) == 0) { continue; } /* increment the counter for this ASN */ if ((a = kh_get(asn_count_map, asn_np, kh_value(pam, p))) == kh_end(asn_np)) { a = kh_put(asn_count_map, asn_np, kh_value(pam, p), &khret); kh_value(asn_np, a) = 1; } else { kh_value(asn_np, a)++; } } /* now asn_np has a complete count of the number of peers' ASns that observed each origin ASN */ /* count the prefix and origins if their visibility * is above the threshold */ pfx_visible = 0; for (a = kh_begin(asn_np); a != kh_end(asn_np); ++a) { if (kh_exist(asn_np, a) == 0) { continue; } /* the information is accounted only if it is * consistent on at least threshold peers' ASns */ if (kh_value(asn_np, a) >= state->peer_asns_th) { pfx_visible = 1; bgpstream_id_set_insert(state->unique_origins, kh_key(asn_np, a)); } } /* updating counters */ unique_pfxs += pfx_visible; } DUMP_METRIC(unique_pfxs, state->interval_start, "%s.%s.%s.%s", state->metric_prefix, PLUGIN_NAME, state->ip_space_name, "prefixes_cnt"); DUMP_METRIC(bgpstream_id_set_size(state->unique_origins), state->interval_start, "%s.%s.%s.%s", state->metric_prefix, PLUGIN_NAME, state->ip_space_name, "origin_ASns_cnt"); bgpstream_id_set_clear(state->unique_origins); kh_destroy(asn_count_map, asn_np); return 0; }
static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int min_pos) { if ( !srt->grp_str2int ) { // first time here, initialize if ( !srt->pair ) { if ( readers->collapse==COLLAPSE_NONE ) readers->collapse = BCF_SR_PAIR_EXACT; bcf_sr_set_opt(readers, BCF_SR_PAIR_LOGIC, readers->collapse); } bcf_sr_init_scores(srt); srt->grp_str2int = khash_str2int_init(); srt->var_str2int = khash_str2int_init(); } int k; khash_t(str2int) *hash; hash = srt->grp_str2int; for (k=0; k < kh_end(hash); k++) if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k)); hash = srt->var_str2int; for (k=0; k < kh_end(hash); k++) if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k)); kh_clear(str2int, srt->grp_str2int); kh_clear(str2int, srt->var_str2int); srt->ngrp = srt->nvar = srt->nvset = 0; grp_t grp; memset(&grp,0,sizeof(grp_t)); // group VCFs into groups, each with a unique combination of variants in the duplicate lines int ireader,ivar,irec,igrp,ivset,iact; for (ireader=0; ireader<readers->nreaders; ireader++) srt->vcf_buf[ireader].nrec = 0; for (iact=0; iact<srt->nactive; iact++) { ireader = srt->active[iact]; bcf_sr_t *reader = &readers->readers[ireader]; int rid = bcf_hdr_name2id(reader->header, chr); grp.nvar = 0; hts_expand(int,reader->nbuffer,srt->moff,srt->off); srt->noff = 0; srt->str.l = 0; for (irec=1; irec<=reader->nbuffer; irec++) { bcf1_t *line = reader->buffer[irec]; if ( line->rid!=rid || line->pos!=min_pos ) break; if ( srt->str.l ) kputc(';',&srt->str); srt->off[srt->noff++] = srt->str.l; size_t beg = srt->str.l; for (ivar=1; ivar<line->n_allele; ivar++) { if ( ivar>1 ) kputc(',',&srt->str); kputs(line->d.allele[0],&srt->str); kputc('>',&srt->str); kputs(line->d.allele[ivar],&srt->str); } if ( line->n_allele==1 ) { kputs(line->d.allele[0],&srt->str); kputsn(">.",2,&srt->str); } // Create new variant or attach to existing one. But careful, there can be duplicate // records with the same POS,REF,ALT (e.g. in dbSNP-b142) char *var_str = beg + srt->str.s; int ret, var_idx = 0, var_end = srt->str.l; while ( 1 ) { ret = khash_str2int_get(srt->var_str2int, var_str, &ivar); if ( ret==-1 ) break; var_t *var = &srt->var[ivar]; if ( var->vcf[var->nvcf-1] != ireader ) break; srt->str.l = var_end; kputw(var_idx, &srt->str); var_str = beg + srt->str.s; var_idx++; } if ( ret==-1 ) { ivar = srt->nvar++; hts_expand0(var_t,srt->nvar,srt->mvar,srt->var); srt->var[ivar].nvcf = 0; khash_str2int_set(srt->var_str2int, strdup(var_str), ivar); free(srt->var[ivar].str); // possible left-over from the previous position } var_t *var = &srt->var[ivar]; var->nalt = line->n_allele - 1; var->type = bcf_get_variant_types(line); srt->str.s[var_end] = 0; if ( ret==-1 ) var->str = strdup(var_str); int mvcf = var->mvcf; var->nvcf++; hts_expand0(int*, var->nvcf, var->mvcf, var->vcf); if ( mvcf != var->mvcf ) var->rec = (bcf1_t **) realloc(var->rec,sizeof(bcf1_t*)*var->mvcf); var->vcf[var->nvcf-1] = ireader; var->rec[var->nvcf-1] = line; grp.nvar++; hts_expand(var_t,grp.nvar,grp.mvar,grp.var); grp.var[grp.nvar-1] = ivar; } char *grp_key = grp_create_key(srt); int ret = khash_str2int_get(srt->grp_str2int, grp_key, &igrp); if ( ret==-1 ) { igrp = srt->ngrp++; hts_expand0(grp_t, srt->ngrp, srt->mgrp, srt->grp); free(srt->grp[igrp].var); srt->grp[igrp] = grp; srt->grp[igrp].key = grp_key; khash_str2int_set(srt->grp_str2int, grp_key, igrp); memset(&grp,0,sizeof(grp_t)); } else free(grp_key); srt->grp[igrp].nvcf++; } free(grp.var); // initialize bitmask - which groups is the variant present in for (ivar=0; ivar<srt->nvar; ivar++) { srt->var[ivar].mask = kbs_resize(srt->var[ivar].mask, srt->ngrp); kbs_clear(srt->var[ivar].mask); } for (igrp=0; igrp<srt->ngrp; igrp++) { for (ivar=0; ivar<srt->grp[igrp].nvar; ivar++) { int i = srt->grp[igrp].var[ivar]; kbs_insert(srt->var[i].mask, igrp); } } // create the initial list of variant sets for (ivar=0; ivar<srt->nvar; ivar++) { ivset = srt->nvset++; hts_expand0(varset_t, srt->nvset, srt->mvset, srt->vset); varset_t *vset = &srt->vset[ivset]; vset->nvar = 1; hts_expand0(var_t, vset->nvar, vset->mvar, vset->var); vset->var[vset->nvar-1] = ivar; var_t *var = &srt->var[ivar]; vset->cnt = var->nvcf; vset->mask = kbs_resize(vset->mask, srt->ngrp); kbs_clear(vset->mask); kbs_bitwise_or(vset->mask, var->mask); int type = 0; if ( var->type==VCF_REF ) type |= SR_REF; else { if ( var->type & VCF_SNP ) type |= SR_SNP; if ( var->type & VCF_MNP ) type |= SR_SNP; if ( var->type & VCF_INDEL ) type |= SR_INDEL; if ( var->type & VCF_OTHER ) type |= SR_OTHER; } var->type = type; } #if DEBUG_VSETS debug_vsets(srt); #endif // initialize the pairing matrix hts_expand(int, srt->ngrp*srt->nvset, srt->mpmat, srt->pmat); hts_expand(int, srt->nvset, srt->mcnt, srt->cnt); memset(srt->pmat, 0, sizeof(*srt->pmat)*srt->ngrp*srt->nvset); for (ivset=0; ivset<srt->nvset; ivset++) { varset_t *vset = &srt->vset[ivset]; for (igrp=0; igrp<srt->ngrp; igrp++) srt->pmat[ivset*srt->ngrp+igrp] = 0; srt->cnt[ivset] = vset->cnt; } // pair the lines while ( srt->nvset ) { #if DEBUG_VSETS fprintf(stderr,"\n"); debug_vsets(srt); #endif int imax = 0; for (ivset=1; ivset<srt->nvset; ivset++) if ( srt->cnt[imax] < srt->cnt[ivset] ) imax = ivset; int ipair = -1; uint32_t max_score = 0; for (ivset=0; ivset<srt->nvset; ivset++) { if ( kbs_logical_and(srt->vset[imax].mask,srt->vset[ivset].mask) ) continue; // cannot be merged uint32_t score = pairing_score(srt, imax, ivset); // fprintf(stderr,"score: %d %d, logic=%d \t..\t %u\n", imax,ivset,srt->pair,score); if ( max_score < score ) { max_score = score; ipair = ivset; } } // merge rows creating a new variant set this way if ( ipair!=-1 && ipair!=imax ) { imax = merge_vsets(srt, imax, ipair); continue; } push_vset(srt, imax); } srt->chr = chr; srt->pos = min_pos; }
static void gc_mark_phase(pic_state *pic) { struct context *cxt; size_t j; assert(pic->heap->weaks == NULL); /* context */ for (cxt = pic->cxt; cxt != NULL; cxt = cxt->prev) { if (cxt->fp) gc_mark_object(pic, (struct object *)cxt->fp); if (cxt->sp) gc_mark_object(pic, (struct object *)cxt->sp); if (cxt->irep) gc_mark_object(pic, (struct object *)cxt->irep); } /* arena */ for (j = 0; j < pic->ai; ++j) { gc_mark_object(pic, (struct object *)pic->arena[j]); } /* global variables */ gc_mark(pic, pic->globals); /* dynamic environment */ gc_mark(pic, pic->dyn_env); /* top continuation */ gc_mark(pic, pic->halt); /* features */ gc_mark(pic, pic->features); /* weak maps */ do { struct object *key; pic_value val; int it; khash_t(weak) *h; struct weak *weak; j = 0; weak = pic->heap->weaks; while (weak != NULL) { h = &weak->hash; for (it = kh_begin(h); it != kh_end(h); ++it) { if (! kh_exist(h, it)) continue; key = kh_key(h, it); val = kh_val(h, it); if (is_alive(key)) { if (obj_p(pic, val) && ! is_alive(obj_ptr(pic, val))) { gc_mark(pic, val); ++j; } } } weak = weak->prev; } } while (j > 0); }
static void rmdupse_buf(buffer_t *buf) { khash_t(32) *h; uint32_t key; khint_t k; int mpos, i, upper; listelem_t *p; mpos = 0x7fffffff; mpos = (buf->x == buf->n)? buf->buf[buf->x-1].b->core.pos : 0x7fffffff; upper = (buf->x < 0)? buf->n : buf->x; // fill the hash table h = kh_init(32); for (i = 0; i < upper; ++i) { elem_t *e = buf->buf + i; int ret; if (e->score < 0) continue; if (e->rpos >= 0) { if (e->rpos <= mpos) key = (uint32_t)e->rpos<<1 | 1; else continue; } else { if (e->b->core.pos < mpos) key = (uint32_t)e->b->core.pos<<1; else continue; } k = kh_put(32, h, key, &ret); p = &kh_val(h, k); if (ret == 0) { // present in the hash table if (p->n == p->m) { p->m <<= 1; p->a = (int*)realloc(p->a, p->m * sizeof(int)); } p->a[p->n++] = i; } else { p->m = p->n = 1; p->a = (int*)calloc(p->m, sizeof(int)); p->a[0] = i; } } // rmdup for (k = kh_begin(h); k < kh_end(h); ++k) { if (kh_exist(h, k)) { int max, maxi; p = &kh_val(h, k); // get the max for (i = max = 0, maxi = -1; i < p->n; ++i) { if (buf->buf[p->a[i]].score > max) { max = buf->buf[p->a[i]].score; maxi = i; } } // mark the elements for (i = 0; i < p->n; ++i) { buf->buf[p->a[i]].score = -1; if (i != maxi) { bam_destroy1(buf->buf[p->a[i]].b); buf->buf[p->a[i]].b = 0; } } // free free(p->a); } } kh_destroy(32, h); }
int git_strmap_has_data(git_strmap *map, size_t idx) { return kh_exist(map, idx); }
bool HashMap_exists(HashMap* map, bstring key) { khint_t k = kh_get(str, map->h, bdata(key)); return kh_exist(map->h, k); }
int main_samview(int argc, char *argv[]) { int index; for(index = 0; index < argc; index++) { printf("The %d is %s\n",index,argv[index]); } getchar();return 0; int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0; int is_long_help = 0, n_threads = 0; int64_t count = 0; samFile *in = 0, *out = 0, *un_out=0; bam_hdr_t *header = NULL; char out_mode[5], out_un_mode[5], *out_format = ""; char *fn_in = 0, *fn_out = 0, *fn_list = 0, *q, *fn_un_out = 0; sam_global_args ga = SAM_GLOBAL_ARGS_INIT; samview_settings_t settings = { .rghash = NULL, .min_mapQ = 0, .flag_on = 0, .flag_off = 0, .min_qlen = 0, .remove_B = 0, .subsam_seed = 0, .subsam_frac = -1., .library = NULL, .bed = NULL, }; static const struct option lopts[] = { SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T'), { "threads", required_argument, NULL, '@' }, { NULL, 0, NULL, 0 } }; /* parse command-line options */ strcpy(out_mode, "w"); strcpy(out_un_mode, "w"); while ((c = getopt_long(argc, argv, "SbBcCt:h1Ho:O:q:f:F:ul:r:?T:R:L:s:@:m:x:U:", lopts, NULL)) >= 0) { switch (c) { case 's': if ((settings.subsam_seed = strtol(optarg, &q, 10)) != 0) { srand(settings.subsam_seed); settings.subsam_seed = rand(); } settings.subsam_frac = strtod(q, &q); break; case 'm': settings.min_qlen = atoi(optarg); break; case 'c': is_count = 1; break; case 'S': break; case 'b': out_format = "b"; break; case 'C': out_format = "c"; break; case 't': fn_list = strdup(optarg); break; case 'h': is_header = 1; break; case 'H': is_header_only = 1; break; case 'o': fn_out = strdup(optarg); break; case 'U': fn_un_out = strdup(optarg); break; case 'f': settings.flag_on |= strtol(optarg, 0, 0); break; case 'F': settings.flag_off |= strtol(optarg, 0, 0); break; case 'q': settings.min_mapQ = atoi(optarg); break; case 'u': compress_level = 0; break; case '1': compress_level = 1; break; case 'l': settings.library = strdup(optarg); break; case 'L': if ((settings.bed = bed_read(optarg)) == NULL) { print_error_errno("view", "Could not read file \"%s\"", optarg); ret = 1; goto view_end; } break; case 'r': if (add_read_group_single("view", &settings, optarg) != 0) { ret = 1; goto view_end; } break; case 'R': if (add_read_groups_file("view", &settings, optarg) != 0) { ret = 1; goto view_end; } break; /* REMOVED as htslib doesn't support this //case 'x': out_format = "x"; break; //case 'X': out_format = "X"; break; */ case '?': is_long_help = 1; break; case 'B': settings.remove_B = 1; break; case '@': n_threads = strtol(optarg, 0, 0); break; case 'x': { if (strlen(optarg) != 2) { fprintf(stderr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n"); return usage(stderr, EXIT_FAILURE, is_long_help); } settings.remove_aux = (char**)realloc(settings.remove_aux, sizeof(char*) * (++settings.remove_aux_len)); settings.remove_aux[settings.remove_aux_len-1] = optarg; } break; default: if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0) return usage(stderr, EXIT_FAILURE, is_long_help); break; } } if (compress_level >= 0 && !*out_format) out_format = "b"; if (is_header_only) is_header = 1; // File format auto-detection first if (fn_out) sam_open_mode(out_mode+1, fn_out, NULL); if (fn_un_out) sam_open_mode(out_un_mode+1, fn_un_out, NULL); // Overridden by manual -b, -C if (*out_format) out_mode[1] = out_un_mode[1] = *out_format; out_mode[2] = out_un_mode[2] = '\0'; // out_(un_)mode now 1 or 2 bytes long, followed by nul. if (compress_level >= 0) { char tmp[2]; tmp[0] = compress_level + '0'; tmp[1] = '\0'; strcat(out_mode, tmp); strcat(out_un_mode, tmp); } if (argc == optind && isatty(STDIN_FILENO)) return usage(stdout, EXIT_SUCCESS, is_long_help); // potential memory leak... fn_in = (optind < argc)? argv[optind] : "-"; // generate the fn_list if necessary if (fn_list == 0 && ga.reference) fn_list = samfaipath(ga.reference); // open file handlers if ((in = sam_open_format(fn_in, "r", &ga.in)) == 0) { print_error_errno("view", "failed to open \"%s\" for reading", fn_in); ret = 1; goto view_end; } if (fn_list) { if (hts_set_fai_filename(in, fn_list) != 0) { fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list); ret = 1; goto view_end; } } if ((header = sam_hdr_read(in)) == 0) { fprintf(stderr, "[main_samview] fail to read the header from \"%s\".\n", fn_in); ret = 1; goto view_end; } if (settings.rghash) { // FIXME: I do not know what "bam_header_t::n_text" is for... char *tmp; int l; tmp = drop_rg(header->text, settings.rghash, &l); free(header->text); header->text = tmp; header->l_text = l; } if (!is_count) { if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) { print_error_errno("view", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output"); ret = 1; goto view_end; } if (fn_list) { if (hts_set_fai_filename(out, fn_list) != 0) { fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list); ret = 1; goto view_end; } } if (*out_format || is_header || out_mode[1] == 'b' || out_mode[1] == 'c' || (ga.out.format != sam && ga.out.format != unknown_format)) { if (sam_hdr_write(out, header) != 0) { fprintf(stderr, "[main_samview] failed to write the SAM header\n"); ret = 1; goto view_end; } } if (fn_un_out) { if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) { print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out); ret = 1; goto view_end; } if (fn_list) { if (hts_set_fai_filename(un_out, fn_list) != 0) { fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list); ret = 1; goto view_end; } } if (*out_format || is_header || out_un_mode[1] == 'b' || out_un_mode[1] == 'c' || (ga.out.format != sam && ga.out.format != unknown_format)) { if (sam_hdr_write(un_out, header) != 0) { fprintf(stderr, "[main_samview] failed to write the SAM header\n"); ret = 1; goto view_end; } } } } if (n_threads > 1) { if (out) hts_set_threads(out, n_threads); } if (is_header_only) goto view_end; // no need to print alignments if (optind + 1 >= argc) { // convert/print the entire file bam1_t *b = bam_init1(); int r; while ((r = sam_read1(in, header, b)) >= 0) { // read one alignment from `in' if (!process_aln(header, b, &settings)) { if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; } count++; } else { if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; } } } if (r < -1) { fprintf(stderr, "[main_samview] truncated file.\n"); ret = 1; } bam_destroy1(b); } else { // retrieve alignments in specified regions int i; bam1_t *b; hts_idx_t *idx = sam_index_load(in, fn_in); // load index if (idx == 0) { // index is unavailable fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n"); ret = 1; goto view_end; } b = bam_init1(); for (i = optind + 1; i < argc; ++i) { int result; hts_itr_t *iter = sam_itr_querys(idx, header, argv[i]); // parse a region in the format like `chr2:100-200' if (iter == NULL) { // region invalid or reference name not found int beg, end; if (hts_parse_reg(argv[i], &beg, &end)) fprintf(stderr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]); else fprintf(stderr, "[main_samview] region \"%s\" could not be parsed. Continue anyway.\n", argv[i]); continue; } // fetch alignments while ((result = sam_itr_next(in, iter, b)) >= 0) { if (!process_aln(header, b, &settings)) { if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; } count++; } else { if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; } } } hts_itr_destroy(iter); if (result < -1) { fprintf(stderr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]); ret = 1; break; } } bam_destroy1(b); hts_idx_destroy(idx); // destroy the BAM index } view_end: if (is_count && ret == 0) printf("%" PRId64 "\n", count); // close files, free and return if (in) check_sam_close("view", in, fn_in, "standard input", &ret); if (out) check_sam_close("view", out, fn_out, "standard output", &ret); if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret); free(fn_list); free(fn_out); free(settings.library); free(fn_un_out); sam_global_args_free(&ga); if ( header ) bam_hdr_destroy(header); if (settings.bed) bed_destroy(settings.bed); if (settings.rghash) { khint_t k; for (k = 0; k < kh_end(settings.rghash); ++k) if (kh_exist(settings.rghash, k)) free((char*)kh_key(settings.rghash, k)); kh_destroy(rg, settings.rghash); } if (settings.remove_aux_len) { free(settings.remove_aux); } return ret; } static int usage(FILE *fp, int exit_status, int is_long_help) { fprintf(fp, "\n" "Usage: samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]\n" "\n" "Options:\n" // output options " -b output BAM\n" " -C output CRAM (requires -T)\n" " -1 use fast BAM compression (implies -b)\n" " -u uncompressed BAM output (implies -b)\n" " -h include header in SAM output\n" " -H print SAM header only (no alignments)\n" " -c print only the count of matching records\n" " -o FILE output file name [stdout]\n" " -U FILE output reads not selected by filters to FILE [null]\n" // extra input " -t FILE FILE listing reference names and lengths (see long help) [null]\n" // read filters " -L FILE only include reads overlapping this BED FILE [null]\n" " -r STR only include reads in read group STR [null]\n" " -R FILE only include reads with read group listed in FILE [null]\n" " -q INT only include reads with mapping quality >= INT [0]\n" " -l STR only include reads in library STR [null]\n" " -m INT only include reads with number of CIGAR operations consuming\n" " query sequence >= INT [0]\n" " -f INT only include reads with all bits set in INT set in FLAG [0]\n" " -F INT only include reads with none of the bits set in INT set in FLAG [0]\n" // read processing " -x STR read tag to strip (repeatable) [null]\n" " -B collapse the backward CIGAR operation\n" " -s FLOAT integer part sets seed of random number generator [0];\n" " rest sets fraction of templates to subsample [no subsampling]\n" // general options " -@, --threads INT\n" " number of BAM/CRAM compression threads [0]\n" " -? print long help, including note about region specification\n" " -S ignored (input format is auto-detected)\n"); sam_global_opt_help(fp, "-.O.T"); fprintf(fp, "\n"); if (is_long_help) fprintf(fp, "Notes:\n" "\n" "1. This command now auto-detects the input format (BAM/CRAM/SAM).\n" " Further control over the CRAM format can be specified by using the\n" " --output-fmt-option, e.g. to specify the number of sequences per slice\n" " and to use avoid reference based compression:\n" "\n" "\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n" "\t --output-fmt-option no_ref -o out.cram in.bam\n" "\n" " Options can also be specified as a comma separated list within the\n" " --output-fmt value too. For example this is equivalent to the above\n" "\n" "\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n" "\t -o out.cram in.bam\n" "\n" "2. The file supplied with `-t' is SPACE/TAB delimited with the first\n" " two fields of each line consisting of the reference name and the\n" " corresponding sequence length. The `.fai' file generated by \n" " `samtools faidx' is suitable for use as this file. This may be an\n" " empty file if reads are unaligned.\n" "\n" "3. SAM->BAM conversion: samtools view -bT ref.fa in.sam.gz\n" "\n" "4. BAM->SAM conversion: samtools view -h in.bam\n" "\n" "5. A region should be presented in one of the following formats:\n" " `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n" " specified, the input alignment file must be a sorted and indexed\n" " alignment (BAM/CRAM) file.\n" "\n" "6. Option `-u' is preferred over `-b' when the output is piped to\n" " another samtools command.\n" "\n"); return exit_status; }
/** Implements the end_interval function of the plugin API */ int corsaro_dos_end_interval(corsaro_t *corsaro, corsaro_interval_t *int_end) { int this_interval = int_end->time-STATE(corsaro)->first_interval; khiter_t i; attack_vector_t *vector; attack_vector_t **attack_arr = NULL; int attack_arr_cnt = 0; uint8_t gbuf[12]; uint8_t cntbuf[4]; if(this_interval < CORSARO_DOS_INTERVAL) { /* we haven't run for long enough to dump */ return 0; } else { /* we either have hit exactly the right amount of time, or we have gone for too long, dump now and reset the counter */ STATE(corsaro)->first_interval = int_end->time; /* fall through and continue to dump */ } /* this is an interval we care about */ /* malloc an array big enough to hold the entire hash even though we wont need it to be that big */ if((attack_arr = malloc(sizeof(attack_vector_t *)* kh_size(STATE(corsaro)->attack_hash))) == NULL) { corsaro_log(__func__, corsaro, "could not malloc array for attack vectors"); return -1; } /* classify the flows and dump the attack ones */ for(i = kh_begin(STATE(corsaro)->attack_hash); i != kh_end(STATE(corsaro)->attack_hash); ++i) { if(kh_exist(STATE(corsaro)->attack_hash, i)) { vector = kh_key(STATE(corsaro)->attack_hash, i); if(attack_vector_is_expired(vector, int_end->time) != 0) { kh_del(av, STATE(corsaro)->attack_hash, i); attack_vector_free(vector); vector = NULL; } else if(attack_vector_is_attack(corsaro, vector, int_end->time) != 0) { /* this is an attack */ /* add it to the attack array so we can know how many before we dump it */ attack_arr[attack_arr_cnt] = vector; attack_arr_cnt++; } else { attack_vector_reset(vector); } } } corsaro_io_write_interval_start(corsaro, STATE(corsaro)->outfile, &corsaro->interval_start); if(corsaro->global_file != NULL) { corsaro_io_write_plugin_start(corsaro, corsaro->global_file, PLUGIN(corsaro)); } if(CORSARO_FILE_MODE(STATE(corsaro)->outfile) == CORSARO_FILE_MODE_ASCII) { if(corsaro->global_file != NULL) { /* global stats */ /* dump the number of mismatched packets and vectors */ corsaro_file_printf(corsaro, corsaro->global_file, "mismatch: %"PRIu32"\n" "attack_vectors: %"PRIu32"\n" "non-attack_vectors: %"PRIu32"\n", STATE(corsaro)->number_mismatched_packets, attack_arr_cnt, kh_size(STATE(corsaro)->attack_hash) -attack_arr_cnt); } /* dump the number of vectors */ corsaro_file_printf(corsaro, STATE(corsaro)->outfile, "%"PRIu32"\n", attack_arr_cnt); /* dump the vectors */ for(i = 0; i < attack_arr_cnt; i++) { if(ascii_dump(corsaro, attack_arr[i]) != 0) { corsaro_log(__func__, corsaro, "could not dump hash"); return -1; } /* reset the interval stats */ attack_vector_reset(attack_arr[i]); } } else if(CORSARO_FILE_MODE(STATE(corsaro)->outfile) == CORSARO_FILE_MODE_BINARY) { if(corsaro->global_file != NULL) { /* global stats */ bytes_htonl(&gbuf[0], STATE(corsaro)->number_mismatched_packets); bytes_htonl(&gbuf[4], attack_arr_cnt); bytes_htonl(&gbuf[8], kh_size(STATE(corsaro)->attack_hash)-attack_arr_cnt); if(corsaro_file_write(corsaro, corsaro->global_file, &gbuf[0], 12) != 12) { corsaro_log(__func__, corsaro, "could not dump global stats to file"); return -1; } } /* dump the number of vectors */ bytes_htonl(&cntbuf[0], attack_arr_cnt); if(corsaro_file_write(corsaro, STATE(corsaro)->outfile, &cntbuf[0], 4) != 4) { corsaro_log(__func__, corsaro, "could not dump vector count to file"); return -1; } /* dump the vectors */ for(i = 0; i < attack_arr_cnt; i++) { if(binary_dump(corsaro, attack_arr[i]) != 0) { corsaro_log(__func__, corsaro, "could not dump hash"); return -1; } attack_vector_reset(attack_arr[i]); } } else { corsaro_log(__func__, corsaro, "invalid mode"); return -1; } if(corsaro->global_file != NULL) { corsaro_io_write_plugin_end(corsaro, corsaro->global_file, PLUGIN(corsaro)); } corsaro_io_write_interval_end(corsaro, STATE(corsaro)->outfile, int_end); STATE(corsaro)->number_mismatched_packets = 0; free(attack_arr); /* if we are rotating, now is when we should do it */ if(corsaro_is_rotate_interval(corsaro)) { /* close the current file */ if(STATE(corsaro)->outfile != NULL) { corsaro_file_close(corsaro, STATE(corsaro)->outfile); STATE(corsaro)->outfile = NULL; } } return 0; }
void pic_gc(pic_state *pic) { struct context *cxt; size_t j; khash_t(oblist) *s = &pic->oblist; struct symbol *sym; int it; struct object *obj, *prev, *next; assert(pic->gc_attrs == NULL); if (! pic->gc_enable) { return; } /* scan objects */ for (cxt = pic->cxt; cxt != NULL; cxt = cxt->prev) { if (cxt->fp) gc_mark_object(pic, (struct object *)cxt->fp); if (cxt->sp) gc_mark_object(pic, (struct object *)cxt->sp); if (cxt->irep) gc_mark_object(pic, (struct object *)cxt->irep); gc_mark(pic, cxt->conts); } for (j = 0; j < pic->ai; ++j) { gc_mark_object(pic, (struct object *)pic->arena[j]); } gc_mark(pic, pic->globals); gc_mark(pic, pic->halt); /* scan weak references */ do { struct object *key; pic_value val; int it; khash_t(attr) *h; struct attr *attr; j = 0; attr = pic->gc_attrs; while (attr != NULL) { h = &attr->hash; for (it = kh_begin(h); it != kh_end(h); ++it) { if (! kh_exist(h, it)) continue; key = kh_key(h, it); val = kh_val(h, it); if (is_alive(key)) { if (pic_obj_p(pic, val) && ! is_alive((struct object *) pic_ptr(pic, val))) { gc_mark(pic, val); ++j; } } } attr = attr->prev; } } while (j > 0); /* reclaim dead weak references */ while (pic->gc_attrs != NULL) { khash_t(attr) *h = &pic->gc_attrs->hash; for (it = kh_begin(h); it != kh_end(h); ++it) { if (! kh_exist(h, it)) continue; obj = kh_key(h, it); if (! is_alive(obj)) { kh_del(attr, h, it); } } pic->gc_attrs = pic->gc_attrs->prev; } for (it = kh_begin(s); it != kh_end(s); ++it) { if (! kh_exist(s, it)) continue; sym = kh_val(s, it); if (sym && ! is_alive((struct object *)sym)) { kh_del(oblist, s, it); } } /* reclaim dead objects */ for (prev = &pic->gc_head, obj = prev->next; obj != &pic->gc_head; prev = obj, obj = next) { next = obj->next; if (is_alive(obj)) { unmark(obj); } else { gc_finalize_object(pic, obj); pic_free(pic, obj); prev->next = next; obj = prev; } } }
ERR_VALUE kmer_freq_distribution(const PROGRAM_OPTIONS *Options, const uint32_t KMerSize, const ONE_READ *Reads, const size_t ReadCount) { int err; size_t maxValue = 0; khiter_t it; size_t kmerCount = 0; char *kmerString = NULL; khash_t(kc) *table = kh_init(kc); ERR_VALUE ret = ERR_INTERNAL_ERROR; ret = utils_calloc(KMerSize + 1, sizeof(char), &kmerString); if (ret == ERR_SUCCESS) { const ONE_READ *r = Reads; kmerString[KMerSize] = '\0'; for (size_t i = 0; i < ReadCount; ++i) { const READ_PART *p = &r->Part; read_split(r); if (p->ReadSequenceLength >= KMerSize) { for (size_t j = 0; j < p->ReadSequenceLength - KMerSize + 1; ++j) { char *s = NULL; memcpy(kmerString, p->ReadSequence + j, KMerSize*sizeof(char)); ret = utils_copy_string(kmerString, &s); if (ret == ERR_SUCCESS) { it = kh_put(kc, table, s, &err); switch (err) { case 0: kh_value(table, it) += 1; if (kh_value(table, it) > maxValue) maxValue = kh_value(table, it); utils_free(s); break; case 1: case 2: kh_value(table, it) = 1; break; default: ret = ERR_OUT_OF_MEMORY; break; } ++kmerCount; if (ret != ERR_SUCCESS) utils_free(s); } if (ret != ERR_SUCCESS) break; } } if (ret != ERR_SUCCESS) break; ++r; } if (ret == ERR_SUCCESS) { size_t *freqArray = NULL; ++maxValue; ret = utils_calloc(maxValue, sizeof(size_t), &freqArray); if (ret == ERR_SUCCESS) { memset(freqArray, 0, maxValue*sizeof(size_t)); for (it = kh_begin(table); it != kh_end(table); ++it) { if (kh_exist(table, it)) ++freqArray[kh_value(table, it)]; } for (size_t i = 0; i < maxValue; ++i) { if (freqArray[i] > 0) fprintf(stdout, "%Iu, %Iu, %lf\n", i, freqArray[i], (double)freqArray[i]*100/ (double)kmerCount); } utils_free(freqArray); } } utils_free(kmerString); } for (size_t i = kh_begin(table); i < kh_end(table); ++i) { if (kh_exist(table, i)) utils_free(kh_key(table, i)); } kh_destroy(kc, table); return ret; }
void printSegCounter() { for (khint_t k = kh_begin(seg_counter); k != kh_end(seg_counter); ++k) // traverse if (kh_exist(seg_counter, k)) // test if a bucket contains data printf("%04X %*d |%s", kh_key(seg_counter, k), 6, kh_value(seg_counter, k), !(k%16) ? "\n": " "); }
int main(int argc, char *argv[]) { int c, i, n, ret, res; int tid, pos, *n_plp; cmdopt_t o; bam_mplp_t mplp; const bam_pileup1_t **plp; aux_t **data; bam_hdr_t *h = 0; sv_t sv1; qual_sum_t qual2; khiter_t k_iter; khash_t(sv_hash) *sv_h = kh_init(sv_hash); khash_t(sv_geno) *geno_h = kh_init(sv_geno); khash_t(colmap) *smp_cols; khash_t(ped) *ped_h = 0; mempool_t *mp; char **samples; o.min_q = 40; o.min_s = 80; o.min_len = 150; o.min_dp = 10; o.bed = 0, o.fnped = 0, o.mi_prob=0.005; while ((c = getopt(argc, argv, "hq:s:l:d:b:p:m:")) >= 0) { if (c == 'h') { usage(stderr, &o); return 0; } else if (c == 'q') o.min_q = atoi(optarg); else if (c == 's') o.min_s = atoi(optarg); else if (c == 'l') o.min_len = atoi(optarg); else if (c == 'd') o.min_dp = atoi(optarg); else if (c == 'p') o.fnped = optarg; else if (c == 'm') o.mi_prob = atof(optarg); else if (c == 'b') { if ((o.bed = bed_read(optarg)) == NULL) { return -1; } } } if (o.mi_prob < 0.0000000000001 || o.mi_prob > 0.1) { fprintf(stderr, "Error. Probability of a mendelian inconsistency must be between 0.1 and 0.0000000000001.\n"); } if (argc - optind < 1) { usage(stderr, &o); return 1; } // Open files and initalize aux data // n = argc - optind; data = calloc(n, sizeof(aux_t*)); samples = (char**)malloc(n * sizeof(char*)); for (i = 0; i < n; ++i) { data[i] = calloc(1, sizeof (aux_t)); data[i]->fp = sam_open(argv[optind + i], "r"); if (!data[i]->fp) { fprintf(stderr, "Input file \"%s\" could not be opened.\n", argv[optind + 1]); return 1; } data[i]->min_mapq = o.min_q; data[i]->min_as = o.min_s; data[i]->min_len = o.min_len; data[i]->hdr = sam_hdr_read(data[i]->fp); if (!data[i]->hdr) { fprintf(stderr, "Could not read the header for input file \"%s\".\n", argv[optind + 1]); return 1; } samples[i] = find_sample(data[i]->hdr, &res); if (!samples[i]) { fprintf(stderr, "Warning. No sample name detected for bam %s. Using filename\n", argv[optind + i]); samples[i] = argv[optind + i]; } } h = data[0]->hdr; smp_cols = map_samples(samples, n); if (o.fnped) { if ((ped_h = read_ped(o.fnped, smp_cols)) == 0) { return -1; } } // The core data processing loop // mplp = bam_mplp_init(n, read_bam, (void**)data); n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM plp = calloc(n, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads in mplp //quals = (qual_vec_t*)calloc(n, sizeof(qual_vec_t)); mp = mp_init(); while ((ret = bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // iterate of positions with coverage int n_sv; if (o.bed && tid >= 0 && !bed_overlap(o.bed, h->target_name[tid], pos, pos+1)) continue; n_sv = plp2sv(h, tid, pos, n, n_plp, plp, sv_h); if (n_sv > 1) { fprintf(stderr, "Warning: more than two alleles detected at %s:%d\n", h->target_name[tid], pos); } if (n_sv) { fprintf(stderr, "SV detected at %d:%d\n", tid, pos); for (k_iter = kh_begin(sv_h); k_iter != kh_end(sv_h); ++k_iter) { if (kh_exist(sv_h, k_iter)) { sv1 = kh_value(sv_h, k_iter); fprintf(stderr, "SV tid1=%d, tid2=%d, pos1=%d, pos2=%d, ori1=%d, ori2=%d, allele=%d\n", sv1.tid1, sv1.tid2, sv1.pos1, sv1.pos2, sv1.ori1, sv1.ori2, sv1.allele); } } res = get_qual_data(h, tid, pos, n, n_plp, plp, n_sv + 1, sv_h, geno_h, mp); if (res < 0) { fprintf(stderr, "Error collecting quality data from reads\n"); return -1; } kh_clear(sv_hash, sv_h); } } print_header(h, optind, n, argv); genotype_sv(h, n, geno_h, o.min_dp, ped_h, o.mi_prob); free(n_plp); free(plp); bam_mplp_destroy(mplp); mp_destroy(mp); if (o.bed) bed_destroy(o.bed); for (i = 0; i < n; ++i) { bam_hdr_destroy(data[i]->hdr); sam_close(data[i]->fp); free(data[i]); free(samples[i]); } free(data); free(samples); kh_destroy(sv_hash, sv_h); kh_destroy(sv_geno, geno_h); kh_destroy(colmap, smp_cols); kh_destroy(ped, ped_h); return 0; }
static int cachessess_exist_cb(cache_iter_t it) { return kh_exist(srcsessmap, it); }
size_t sen_render_flush(int clear_buff) { // gl_check_error(); //_logfi("1"); blend_group_t* bg; khint_t i,k,j; size_t total = 0; khash_t(hmsp)* tgs; camera_t* cam = sen_camera(); tex_group_t* tg; khash_t(hmsp)* sgs; shader_group_t* sg; vector_clear(zsorter); for (k = kh_begin(g_bgs); k != kh_end(g_bgs); ++k) { if (!kh_exist(g_bgs,k)) continue; bg = kh_val(g_bgs, k); if (bg->num == 0) { kh_del(hmip,g_bgs,k); continue; } tgs = bg->tgs; // set_blending( (blend_func) (kh_key(g_bgs, k)) ); for (i = kh_begin(tgs); i != kh_end(tgs); ++i) { if (!kh_exist(tgs,i)) continue; tg = kh_val(tgs, i); if (tg->num == 0) { kh_del(hmsp,tgs,i); continue; } /* if (tg->tex) sen_texture_bind(tg->tex); else if (tg->font) sen_font_bind(tg->font); */ sgs = tg->sgs; for (j = kh_begin(sgs); j != kh_end(sgs); ++j) { if (!kh_exist(sgs,j)) continue; sg = kh_val(sgs, j); if (sg->num == 0 || !sg->buff) { kh_del(hmsp,sgs,j); continue; } if (sg->buff) { /* sen_shader_use(sg->program); { if (tg->tex || tg->font) sen_uniform_1iN(sg->program, "u_tex0", 0); sen_uniform_m4fN(sg->program, "u_mvp", cam->view_proj.data); vertex_buffer_render( sg->buff, GL_TRIANGLES); total+=vertex_buffer_size(sg->buff); if (clear_buff) vertex_buffer_clear( sg->buff ); //sen_shader_use(NULL); }*/ vector_push_back( zsorter, &sg ); } sg->num = 0; } tg->num = 0; } bg->num = 0; } if (zsorter->size > 0) vector_sort(zsorter, zcmp); for (j = 0; j < zsorter->size; j++) { shader_group_t* sg = *(shader_group_t**)vector_get(zsorter, j); // _logfi("%s %d",sg->name, sg->z); set_blending( (blend_func) (sg->bg->key) ); if (sg->tg->tex) sen_texture_bind(sg->tg->tex); else if (sg->tg->font) sen_font_bind(sg->tg->font); sen_shader_use(sg->program); { if (sg->tg->tex || sg->tg->font) sen_uniform_1iN(sg->program, "u_tex0", 0); sen_uniform_m4fN(sg->program, "u_mvp", sg->z > 9500 ? cam->proj.data : cam->view_proj.data); vertex_buffer_render( sg->buff, GL_TRIANGLES); total+=vertex_buffer_size(sg->buff); if (clear_buff) vertex_buffer_clear( sg->buff ); //sen_shader_use(NULL); } } // _logfi("-------------------------------------------------"); return total; }