void mrb_gc_mark_hash(mrb_state *mrb, struct RHash *hash) { khiter_t k; khash_t(ht) *h = hash->ht; if (!h) return; for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)) { mrb_value key = kh_key(h, k); mrb_value val = kh_value(h, k); mrb_gc_mark_value(mrb, key); mrb_gc_mark_value(mrb, val); } } }
static int rbk_finish(strm_stream* strm, strm_value data) { struct rbk_data *d = strm->data; khiter_t i; for (i=kh_begin(d->tbl); i!=kh_end(d->tbl); i++) { if (kh_exist(d->tbl, i)) { strm_value values[2]; values[0] = kh_key(d->tbl, i); values[1] = kh_value(d->tbl, i); strm_emit(strm, strm_ary_new(values, 2), NULL); } } return STRM_OK; }
static mrb_value mrb_hash_values(mrb_state *mrb, mrb_value hash) { khash_t(ht) *h = RHASH_TBL(hash); khiter_t k; mrb_value ary; if (!h) return mrb_ary_new(mrb); ary = mrb_ary_new_capa(mrb, kh_size(h)); for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)){ mrb_value v = kh_value(h,k); mrb_ary_push(mrb, ary, v); } } return ary; }
static bam_header_t *hash2header(const kh_ref_t *hash) { bam_header_t *header; khiter_t k; header = bam_header_init(); header->n_targets = kh_size(hash); header->target_name = (char**)calloc(kh_size(hash), sizeof(char*)); header->target_len = (uint32_t*)calloc(kh_size(hash), 4); for (k = kh_begin(hash); k != kh_end(hash); ++k) { if (kh_exist(hash, k)) { int i = (int)kh_value(hash, k); header->target_name[i] = (char*)kh_key(hash, k); header->target_len[i] = kh_value(hash, k)>>32; } } bam_init_header_hash(header); return header; }
static mrb_value mrb_hash_values(mrb_state *mrb, mrb_value hash) { khash_t(ht) *h = RHASH_TBL(hash); khiter_t k; mrb_value ary = mrb_ary_new(mrb); if (!h) return ary; for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)){ mrb_value v = kh_value(h,k); if ( !mrb_special_const_p(v) ) v = mrb_obj_dup(mrb, v); mrb_ary_push(mrb, ary, v); } } return ary; }
void cram_stats_dump(cram_stats *st) { int i; fprintf(stderr, "cram_stats:\n"); for (i = 0; i < MAX_STAT_VAL; i++) { if (!st->freqs[i]) continue; fprintf(stderr, "\t%d\t%d\n", i, st->freqs[i]); } if (st->h) { khint_t k; for (k = kh_begin(st->h); k != kh_end(st->h); k++) { if (!kh_exist(st->h, k)) continue; fprintf(stderr, "\t%d\t%d\n", kh_key(st->h, k), kh_val(st->h, k)); } } }
static mrb_value mrb_hash_has_valueWithvalue(mrb_state *mrb, mrb_value hash, mrb_value value) { khash_t(ht) *h = RHASH_TBL(hash); khiter_t k; if (h) { for (k = kh_begin(h); k != kh_end(h); k++) { if (!kh_exist(h, k)) continue; if (mrb_equal(mrb, kh_value(h,k), value)) { return mrb_true_value(); } } } return mrb_false_value(); }
// Called when the manager sends a complete message void messageArrivedFromManager() { printf ("Message arrived: >%s< for >%s<\r\n", commandMessage, commandClientId); // See if the client is connected, if so immediately forward khiter_t k = kh_get(clientStatuses, clientStatuses, (char*)commandClientId); // Find it in the hash if (k != kh_end(clientStatuses)) { // Was it in the hash? clientStatus* status = kh_value(clientStatuses, k); // Grab the clientStatus from the hash snprintf(httpResponse, HTTP_RESPONSE_SIZE, HTTP_TEMPLATE, commandMessageLen, commandMessage); // Compose the response message write(status->io.fd, httpResponse, strlen(httpResponse)); // Send it closeConnection((ev_io*)status); // Close the conn return; } // If not, add to a queue khiter_t q = kh_get(queue, queue, (char*)commandClientId); // See if this client is already in the queue if (q == kh_end(queue)) { printf("Creating queue for %s\r\n", commandClientId); // This client needs to be added to the queue // First make a new list klist_t(messages) *newMessageList = kl_init(messages); *kl_pushp(messages, newMessageList) = strdup((char*)commandMessage); // Add the message to the list // Now make a new hash entry pointing to this new list int ret; q = kh_put(queue, queue, strdup((char*)commandClientId), &ret); kh_value(queue, q) = newMessageList; } else { printf("Adding to the queue for %s\r\n", commandClientId); // This client is in the queue already eg it has a hash entry // Pushp puts this message at the end of the queue, so that shift will grab the oldest first (like a FIFO) *kl_pushp(messages, kh_value(queue, q)) = strdup((char*)commandMessage); } // Now do a printout of the hash list for (khiter_t qi = kh_begin(queue); qi < kh_end(queue); qi++) { if (kh_exist(queue, qi)) { printf("Queue for %s\n", kh_key(queue,qi)); klist_t(messages) *list = kh_value(queue, qi); kliter_t(messages) *li; for (li = kl_begin(list); li != kl_end(list); li = kl_next(li)) printf("%s\n", kl_val(li)); printf("----\n"); } } }
void bcf_hdr_destroy(bcf_hdr_t *h) { int i; khint_t k; for (i = 0; i < 3; ++i) { vdict_t *d = (vdict_t*)h->dict[i]; if (d == 0) continue; for (k = kh_begin(d); k != kh_end(d); ++k) if (kh_exist(d, k)) free((char*)kh_key(d, k)); kh_destroy(vdict, d); free(h->id[i]); } for (i=0; i<h->nhrec; i++) bcf_hrec_destroy(h->hrec[i]); if (h->nhrec) free(h->hrec); if (h->samples) free(h->samples); free(h->mem.s); free(h->text); free(h); }
/* lenp must be a pointer to a size_t variable */ const char* mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, size_t *lenp) { khash_t(n2s) *h = mrb->name2sym; khiter_t k; symbol_name sname; for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)) { if (kh_value(h, k) == sym) { sname = kh_key(h, k); *lenp = sname.len; return sname.name; } } } *lenp = 0; return NULL; /* missing */ }
int merge_interval(kh_pos_t* positions_read, char *max_chromosome_merged, unsigned long max_position_merged, char **chromosome_order, int num_chromosomes, vcf_file_t **files, shared_options_data_t *shared_options_data, merge_options_data_t *options_data, list_t *output_list) { int num_entries = 0; #pragma omp parallel for num_threads(shared_options_data->num_threads) reduction(+:num_entries) for (int k = kh_begin(positions_read); k < kh_end(positions_read); k++) { if (kh_exist(positions_read, k)) { array_list_t *records_in_position = kh_value(positions_read, k); assert(records_in_position); vcf_record_t *record = ((vcf_record_file_link*) array_list_get(0, records_in_position))->record; vcf_record_file_link **links = NULL; int num_links = 0; // Remove positions prior to the last chromosome:position to merge int cmp_chrom = compare_chromosomes(record->chromosome, max_chromosome_merged, chromosome_order, num_chromosomes); if (cmp_chrom < 0 || (cmp_chrom == 0 && compare_positions(record->position, max_position_merged) <= 0)) { links = records_in_position->items; num_links = records_in_position->size; } // Launch merge if (num_links > 0) { // printf("links[0] = %s:%ld in file %s\n", links[0]->record->chromosome, links[0]->record->position, links[0]->file->filename); int err_code = 0; vcf_record_t *merged = merge_position(links, num_links, files, options_data->num_files, options_data, &err_code); if (!err_code) { list_item_t *item = list_item_new(k, MERGED_RECORD, merged); list_insert_item(item, output_list); num_entries += 1; } // Free empty nodes (lists of records in the same position) array_list_free(records_in_position, vcf_record_file_link_free); kh_del(pos, positions_read, k); } } // End kh_exist } return num_entries; }
void int_htable_destroy(int_htable *ht) { khiter_t k; if (!ht) { return; } for (k = kh_begin(sh->htable); k != kh_end(ht->htable); ++k) { if (kh_exist(ht->htable, k)) { if (ht->free_value_fn) { ht->free_value_fn(kh_value(ht->htable,k)); } } } kh_destroy(int, ht->htable); free(ht); }
static pic_value pic_dict_dictionary_for_each(pic_state *pic) { struct pic_proc *proc; struct pic_dict *dict; khiter_t it; khash_t(dict) *kh; pic_get_args(pic, "ld", &proc, &dict); kh = &dict->hash; for (it = kh_begin(kh); it != kh_end(kh); ++it) { if (kh_exist(kh, it)) { pic_apply1(pic, proc, pic_obj_value(kh_key(kh, it))); } } return pic_undef_value(); }
static mrb_bool iv_foreach(mrb_state *mrb, iv_tbl *t, iv_foreach_func *func, void *p) { khash_t(iv) *h = &t->h; khiter_t k; int n; if (h) { for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)) { n = (*func)(mrb, kh_key(h, k), kh_value(h, k), p); if (n > 0) return FALSE; if (n < 0) { kh_del(iv, h, k); } } } } return TRUE; }
static mrb_value hash_equal(mrb_state *mrb, mrb_value hash1, mrb_value hash2, int eql) { khash_t(ht) *h1, *h2; if (mrb_obj_equal(mrb, hash1, hash2)) return mrb_true_value(); if (!mrb_hash_p(hash2)) { if (!mrb_respond_to(mrb, hash2, mrb_intern(mrb, "to_hash"))) { return mrb_false_value(); } if (eql) return mrb_fixnum_value(mrb_eql(mrb, hash2, hash1)); else return mrb_fixnum_value(mrb_equal(mrb, hash2, hash1)); } h1 = RHASH_TBL(hash1); h2 = RHASH_TBL(hash2); if (!h1) { if (!h2) return mrb_true_value(); return mrb_false_value(); } if (!h2) return mrb_false_value(); if (kh_size(h1) != kh_size(h2)) return mrb_false_value(); else { khiter_t k1, k2; mrb_value key; for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) { if (!kh_exist(h1, k1)) continue; key = kh_key(h1,k1); k2 = kh_get(ht, h2, key); if (k2 != kh_end(h2)) { if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) { continue; /* next key */ } } return mrb_false_value(); } } return mrb_true_value(); }
mrb_value mrb_hash_keys(mrb_state *mrb, mrb_value hash) { khash_t(ht) *h = RHASH_TBL(hash); khiter_t k; mrb_value ary, *p; if (!h || kh_size(h) == 0) return mrb_ary_new(mrb); ary = mrb_ary_new_capa(mrb, kh_size(h)); mrb_ary_set(mrb, ary, kh_size(h)-1, mrb_nil_value()); p = RARRAY_PTR(ary); for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)) { mrb_value kv = kh_key(h,k); mrb_hash_value hv = kh_value(h,k); p[hv.n] = kv; } } return ary; }
static mrb_value recursive_eql(mrb_state *mrb, mrb_value hash, mrb_value dt, int recur) { khash_t(ht) *h1 = RHASH_TBL(hash); khash_t(ht) *h2 = RHASH_TBL(dt); khiter_t k1, k2; mrb_value key1; for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) { if (!kh_exist(h1, k1)) continue; key1 = kh_key(h1,k1); k2 = kh_get(ht, h2, key1); if ( k2 != kh_end(h2)) { if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) { continue; /* next key */ } } return mrb_false_value(); } return mrb_true_value(); }
static mrb_value mrb_hash_has_value(mrb_state *mrb, mrb_value hash) { mrb_value val; khash_t(ht) *h; khiter_t k; mrb_get_args(mrb, "o", &val); h = RHASH_TBL(hash); if (h) { for (k = kh_begin(h); k != kh_end(h); k++) { if (!kh_exist(h, k)) continue; if (mrb_equal(mrb, kh_value(h, k).v, val)) { return mrb_true_value(); } } } return mrb_false_value(); }
static mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value hash) { struct RHash* ret; khash_t(ht) *h, *ret_h; khiter_t k, ret_k; mrb_value ifnone, vret; h = RHASH_TBL(hash); ret = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); ret->ht = kh_init(ht, mrb); if (h && kh_size(h) > 0) { ret_h = ret->ht; for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)) { int ai = mrb_gc_arena_save(mrb); ret_k = kh_put(ht, mrb, ret_h, KEY(kh_key(h, k))); mrb_gc_arena_restore(mrb, ai); kh_val(ret_h, ret_k).v = kh_val(h, k).v; kh_val(ret_h, ret_k).n = kh_size(ret_h)-1; } } } if (MRB_RHASH_DEFAULT_P(hash)) { ret->flags |= MRB_HASH_DEFAULT; } if (MRB_RHASH_PROCDEFAULT_P(hash)) { ret->flags |= MRB_HASH_PROC_DEFAULT; } vret = mrb_obj_value(ret); ifnone = RHASH_IFNONE(hash); if (!mrb_nil_p(ifnone)) { mrb_iv_set(mrb, vret, mrb_intern_lit(mrb, "ifnone"), ifnone); } return vret; }
static mrb_sym class_sym(mrb_state *mrb, struct RClass *c, struct RClass *outer) { mrb_value name; name = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__classid__")); if (mrb_nil_p(name)) { khash_t(iv)* h; khiter_t k; mrb_value v; if (!outer) outer = mrb->object_class; h = outer->iv; for (k = kh_begin(h); k != kh_end(h); k++) { if (!kh_exist(h,k)) continue; v = kh_value(h,k); if (mrb_type(v) == c->tt && mrb_class_ptr(v) == c) { return kh_key(h,k); } } } return SYM2ID(name); }
static mrb_value inspect_obj(mrb_state *mrb, mrb_value obj, mrb_value str, int recur) { if (recur) { mrb_str_cat2(mrb, str, " ..."); } else { khiter_t k; kh_iv_t *h = RCLASS_IV_TBL(obj); if (h) { for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h, k)){ mrb_sym id = kh_key(h, k); mrb_value value = kh_value(h, k); /* need not to show internal data */ if (RSTRING_PTR(str)[0] == '-') { /* first element */ RSTRING_PTR(str)[0] = '#'; mrb_str_cat2(mrb, str, " "); } else { mrb_str_cat2(mrb, str, ", "); } mrb_str_cat2(mrb, str, mrb_sym2name(mrb, id)); mrb_str_cat2(mrb, str, "="); mrb_str_append(mrb, str, mrb_inspect(mrb, value)); } } } } mrb_str_cat2(mrb, str, ">"); RSTRING_PTR(str)[0] = '#'; return str; }
mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value hash) { struct RHash* ret; khash_t(ht) *h, *ret_h; khiter_t k, ret_k; h = RHASH_TBL(hash); ret = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); ret->ht = kh_init(ht, mrb); if (kh_size(h) > 0) { ret_h = ret->ht; for (k = kh_begin(h); k != kh_end(h); k++) { if (kh_exist(h,k)) { ret_k = kh_put(ht, ret_h, KEY(kh_key(h,k))); kh_val(ret_h, ret_k) = kh_val(h,k); } } } return mrb_obj_value(ret); }
int strm_env_copy(strm_state* s1, strm_state* s2) { strm_env *e1 = s1->env; strm_env *e2 = s2->env; khiter_t k, kk; int r; if (!e1) { e1 = s1->env = kh_init(env); } if (!e2) { e2 = s1->env = kh_init(env); } for (k = kh_begin(e2); k != kh_end(e2); k++) { if (kh_exist(e2, k)) { kk = kh_put(env, e1, kh_key(e2, k), &r); if (r <= 0) return STRM_NG; /* r=0 key is present in the hash table */ /* r=-1 operation failed */ kh_value(e1, kk) = kh_value(e2, k); } } return STRM_OK; }
static PyObject* pyext_epoll_free(PyObject *self,PyObject *args){ khiter_t hit; int epfd; struct pyep_data *pyep; if(!PyArg_ParseTuple(args,"i",&epfd)){ PyErr_BadArgument(); return NULL; } if((pyep = pyep_getby_epfd(epfd)) == NULL){ PyErr_SetString(PyExc_KeyError,"epoll file descriptor not found"); return NULL; } if(ev_close(&pyep->evdata)){ PyErr_SetString(PyExc_SystemError,"epoll free failed"); return NULL; } for(hit = kh_begin(pyep->evhdr_ht);hit != kh_end(pyep->evhdr_ht);hit++){ if(kh_exist(pyep->evhdr_ht,hit)){ free((struct ev_header*)kh_value(pyep->evhdr_ht,hit)); } } kh_destroy(ptr,pyep->evhdr_ht); hit = kh_get(ptr,pyep_ht,epfd); kh_del(ptr,pyep_ht,hit); free(pyep); Py_INCREF(Py_None); return Py_None; }
void printSegCounter() { for (khint_t k = kh_begin(seg_counter); k != kh_end(seg_counter); ++k) // traverse if (kh_exist(seg_counter, k)) // test if a bucket contains data printf("%04X %*d |%s", kh_key(seg_counter, k), 6, kh_value(seg_counter, k), !(k%16) ? "\n": " "); }
int do_grep() { #ifdef DEBUGa printf("[!]do_grep\n"); #endif BamInfo_t *pbam; kh_cstr_t BamID; khiter_t ki, bami; kstring_t ks1 = { 0, 0, NULL }; kstring_t ks2 = { 0, 0, NULL }; kstring_t ks3 = { 0, 0, NULL }; samFile *in; bam_hdr_t *h; hts_idx_t *idx; bam1_t *b, *d, *d2, *bR1, *bR2, *bR3; bR1 = bam_init1(); bR2 = bam_init1(); bR3 = bam_init1(); //htsFile *out; //hts_opt *in_opts = NULL, *out_opts = NULL; int r = 0, exit_code = 0; kvec_t(bam1_t) R1, R2, RV; pierCluster_t *pierCluster; //samdat_t tmp_samdat; FILE *fs = fopen("./test.txt","w"); for (bami = kh_begin(bamNFOp); bami != kh_end(bamNFOp); ++bami) { //printf(">[%d]:\n",bami); if (kh_exist(bamNFOp, bami)) { kv_init(R1); kv_init(R2); kv_init(RV); //tmp_samdat = (const samdat_t){ 0 }; //memset(&tmp_samdat,0,sizeof(samdat_t)); //printf("-[%d]:\n",bami); BamID = kh_key(bamNFOp, bami); pbam = &kh_value(bamNFOp, bami); fprintf(stderr, "%u [%s]=%s\t%u %u\n",bami,BamID,pbam->fileName,pbam->insertSize,pbam->SD); in = sam_open(pbam->fileName, "r"); if (in == NULL) { fprintf(stderr, "[x]Error opening \"%s\"\n", pbam->fileName); return EXIT_FAILURE; } h = sam_hdr_read(in); /* out = hts_open("-", "w"); if (out == NULL) { fprintf(stderr, "[x]Error opening standard output\n"); return EXIT_FAILURE; } if (sam_hdr_write(out, h) < 0) { fprintf(stderr, "[!]Error writing output header.\n"); exit_code = 1; } */ int8_t *ChrIsHum; if (h == NULL) { fprintf(stderr, "[x]Couldn't read header for \"%s\"\n", pbam->fileName); return EXIT_FAILURE; } else { ChrIsHum = malloc(h->n_targets * sizeof(int8_t)); for (int32_t i=0; i < h->n_targets; ++i) { //ChrIsHum[i] = -1; ki = kh_get(chrNFO, chrNFOp, h->target_name[i]); if (ki == kh_end(chrNFOp)) { errx(4,"[x]Cannot find ChrID for [%s] !",h->target_name[i]); } else { ChrInfo_t * tmp = &kh_value(chrNFOp, ki); ChrIsHum[i] = tmp->isHum; //printf(">>> %d Chr:%s %d\n",i,h->target_name[i],ChrIsHum[i]); } } } h->ignore_sam_err = 0; b = bam_init1(); d = bam_init1(); d2 = bam_init1(); if ((idx = sam_index_load(in, pbam->fileName)) == 0) { fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__); return 1; } pierCluster = sam_plp_init(); while ((r = sam_read1(in, h, b)) >= 0) { int8_t flag = false; const bam1_core_t *c = &b->core; if (c->flag & BAM_FSECONDARY) continue; if (c->n_cigar) { uint32_t *cigar = bam_get_cigar(b); for (int i = 0; i < c->n_cigar; ++i) { if (bam_cigar_opchr(cigar[i])=='S') { // soft clipping if ( bam_cigar_oplen(cigar[i]) >= myConfig.minGrepSlen ) { flag = true; } } } } if (flag && ChrIsHum[c->tid]) { // Now, skip Virus items. //bam_copy1(bR1, b); flag = 0; // recycle //int enoughMapQ = 0; //kstring_t ks = { 0, 0, NULL }; /*if (sam_format1(h, b, &ks1) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } else*/ if ((c->mtid == c->tid && ChrIsHum[c->tid]) || (ChrIsHum[c->tid] ^ ChrIsHum[c->mtid])) { // Only grep those mapped on same Human ChrID, or diff species/一方在病毒的情况. //printf(">[%s]\n",ks_str(&ks1)); flag |= 1; //tmp_samdat.b = bam_dup1(b); //kv_push(samdat_t,R1,tmp_samdat); /*if (checkMapQ(ChrIsHum, b, true)) { ++enoughMapQ; }*/ } if (getPairedSam(in, idx, b, d) != 0) { flag &= ~1; continue; } else { flag |= 2; /*if (checkMapQ(ChrIsHum, d, false)) { ++enoughMapQ; }*/ /*if (c->flag & BAM_FSECONDARY) { if (getPairedSam(in, idx, d, d2) == 0) { //sam_format1(h, d2, &ks3); flag |= 4; if (checkMapQ(ChrIsHum, d2, false)) { ++enoughMapQ; } } }*/ } /* 对于 BAM_FSECONDARY(256) 的 Read,跳两次 与 读 SA 项,效果一样。 >[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90 353 chr2 13996555 0 50S40M chr18 48245109 0ACACAACAATGTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC NM:i:0 MD:Z:40 AS:i:40 XS:i:40 RG:Z:Fsimout_mB SA:Z:rgi|59585|emb|X04615.1|,2000,-,40S46M4S,60,0; YC:Z:CT YD:Z:f] -[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90 177 chr18 48245109 9 40S50M gi|59585|emb|X04615.1|2000 0 GTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAAAGGAATTCAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC NM:i:0 MD:Z:50 AS:i:50 XS:i:46 RG:Z:Fsimout_mB SA:Z:rgi|59585|emb|X04615.1|,2000,+,50S40M,9,0; YC:Z:GA YD:Z:f] +[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90 113 gi|59585|emb|X04615.1| 2000 60 40S46M4S chr18 48245109 0 TTTTTTGGCTGAATAGTATTCCATGGTGTGTGTGTGTGTGGCCTCTGCTCTGTATCGGGAGGCCTTAGAGTCTCCGGAACATTGTTGTGT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC NM:i:0 MD:Z:46 AS:i:46 XS:i:27 RG:Z:Fsimout_mB SA:Z:fchr2,13996555,+,50S40M,0,0; YC:Z:CT YD:Z:r] */ /*if (sam_format1(h, d, &ks2) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; }*/ if (((flag & 3) == 3) /*&& enoughMapQ >= myConfig.samples*/) { /*printf(">%d[%s]\n",checkMapQ(ChrIsHum, b, true),ks_str(&ks1)); printf("-%d[%s]\n",checkMapQ(ChrIsHum, d, false),ks_str(&ks2)); if (flag & 4) { printf("+%d[%s]\n",checkMapQ(ChrIsHum, d2, false),ks_str(&ks3)); } printf("<--%d\n",enoughMapQ);*/ if (sam_plp_push(ChrIsHum, pierCluster, b) == 0) { //printf("--HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos); if ((!ChrIsHum[(d->core).tid]) && (flag & 2)) sam_plp_push(ChrIsHum, pierCluster, d); //if ((!ChrIsHum[(d2->core).tid]) && (flag & 4)) sam_plp_push(ChrIsHum, pierCluster, d2); } else { //print fprintf(fs,"[%s]\nHumRange=%s:%d-%d\n", BamID, h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos); fprintf(fs,"VirRange=%s:%d-%d\n", h->target_name[(pierCluster->VirusRange).tid], (pierCluster->VirusRange).pos, (pierCluster->VirusRange).endpos); for (size_t i=0; i<kv_size(pierCluster->Reads);++i) { bam1_t *bi = kv_A(pierCluster->Reads, i); if (sam_format1(h, bi, &ks1) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } else { fprintf(fs,"%s\n",ks1.s); } } fprintf(fs,"\n"); //printf("HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos); //fflush(fs); sam_plp_dectroy(pierCluster); pierCluster = sam_plp_init(); } } } /*char *qname = bam_get_qname(b); if (sam_write1(out, h, b) < 0) { fprintf(stderr, "[x]Error writing output.\n"); exit_code = 1; break; }*/ } /* r = sam_close(out); // stdout can only be closed once if (r < 0) { fprintf(stderr, "Error closing output.\n"); exit_code = 1; } */ hts_idx_destroy(idx); bam_destroy1(b); bam_destroy1(d); bam_destroy1(d2); bam_hdr_destroy(h); r = sam_close(in); free(ChrIsHum); #ifdef DEBUGa fflush(NULL); //pressAnyKey(); #endif sam_plp_dectroy(pierCluster); //printf("<[%d]:\n",bami); } } fclose(fs); getPairedSam(NULL, NULL, NULL, NULL); // sam_close(fp2); //printf("---[%d]---\n",exit_code); bam_destroy1(bR1); bam_destroy1(bR2); bam_destroy1(bR3); ks_release(&ks1); ks_release(&ks2); ks_release(&ks3); return exit_code; }
ERR_VALUE kmer_freq_distribution(const PROGRAM_OPTIONS *Options, const uint32_t KMerSize, const ONE_READ *Reads, const size_t ReadCount) { int err; size_t maxValue = 0; khiter_t it; size_t kmerCount = 0; char *kmerString = NULL; khash_t(kc) *table = kh_init(kc); ERR_VALUE ret = ERR_INTERNAL_ERROR; ret = utils_calloc(KMerSize + 1, sizeof(char), &kmerString); if (ret == ERR_SUCCESS) { const ONE_READ *r = Reads; kmerString[KMerSize] = '\0'; for (size_t i = 0; i < ReadCount; ++i) { const READ_PART *p = &r->Part; read_split(r); if (p->ReadSequenceLength >= KMerSize) { for (size_t j = 0; j < p->ReadSequenceLength - KMerSize + 1; ++j) { char *s = NULL; memcpy(kmerString, p->ReadSequence + j, KMerSize*sizeof(char)); ret = utils_copy_string(kmerString, &s); if (ret == ERR_SUCCESS) { it = kh_put(kc, table, s, &err); switch (err) { case 0: kh_value(table, it) += 1; if (kh_value(table, it) > maxValue) maxValue = kh_value(table, it); utils_free(s); break; case 1: case 2: kh_value(table, it) = 1; break; default: ret = ERR_OUT_OF_MEMORY; break; } ++kmerCount; if (ret != ERR_SUCCESS) utils_free(s); } if (ret != ERR_SUCCESS) break; } } if (ret != ERR_SUCCESS) break; ++r; } if (ret == ERR_SUCCESS) { size_t *freqArray = NULL; ++maxValue; ret = utils_calloc(maxValue, sizeof(size_t), &freqArray); if (ret == ERR_SUCCESS) { memset(freqArray, 0, maxValue*sizeof(size_t)); for (it = kh_begin(table); it != kh_end(table); ++it) { if (kh_exist(table, it)) ++freqArray[kh_value(table, it)]; } for (size_t i = 0; i < maxValue; ++i) { if (freqArray[i] > 0) fprintf(stdout, "%Iu, %Iu, %lf\n", i, freqArray[i], (double)freqArray[i]*100/ (double)kmerCount); } utils_free(freqArray); } } utils_free(kmerString); } for (size_t i = kh_begin(table); i < kh_end(table); ++i) { if (kh_exist(table, i)) utils_free(kh_key(table, i)); } kh_destroy(kc, table); return ret; }
/* * Computes entropy from integer frequencies for various encoding methods and * picks the best encoding. * * FIXME: we could reuse some of the code here for the actual encoding * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman. * * Returns the best codec to use. */ enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st) { enum cram_encoding best_encoding = E_NULL; int best_size = INT_MAX, bits; int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k; int *vals = NULL, *freqs = NULL, vals_alloc = 0, *codes; //cram_stats_dump(st); /* Count number of unique symbols */ for (nvals = i = 0; i < MAX_STAT_VAL; i++) { if (!st->freqs[i]) continue; if (nvals >= vals_alloc) { vals_alloc = vals_alloc ? vals_alloc*2 : 1024; vals = realloc(vals, vals_alloc * sizeof(int)); freqs = realloc(freqs, vals_alloc * sizeof(int)); if (!vals || !freqs) { if (vals) free(vals); if (freqs) free(freqs); return E_HUFFMAN; // Cannot do much else atm } } vals[nvals] = i; freqs[nvals] = st->freqs[i]; ntot += freqs[nvals]; if (max_val < i) max_val = i; if (min_val > i) min_val = i; nvals++; } if (st->h) { khint_t k; int i; for (k = kh_begin(st->h); k != kh_end(st->h); k++) { if (!kh_exist(st->h, k)) continue; if (nvals >= vals_alloc) { vals_alloc = vals_alloc ? vals_alloc*2 : 1024; vals = realloc(vals, vals_alloc * sizeof(int)); freqs = realloc(freqs, vals_alloc * sizeof(int)); if (!vals || !freqs) return E_HUFFMAN; // Cannot do much else atm } i = kh_key(st->h, k); vals[nvals]=i; freqs[nvals] = kh_val(st->h, k); ntot += freqs[nvals]; if (max_val < i) max_val = i; if (min_val > i) min_val = i; nvals++; } } st->nvals = nvals; assert(ntot == st->nsamp); if (nvals <= 1) { free(vals); free(freqs); return E_HUFFMAN; } if (fd->verbose > 1) fprintf(stderr, "Range = %d..%d, nvals=%d, ntot=%d\n", min_val, max_val, nvals, ntot); /* Theoretical entropy */ // if (fd->verbose > 1) { // double dbits = 0; // for (i = 0; i < nvals; i++) { // dbits += freqs[i] * log((double)freqs[i]/ntot); // } // dbits /= -log(2); // if (fd->verbose > 1) // fprintf(stderr, "Entropy = %f\n", dbits); // } if (nvals > 1 && ntot > 256) { #if 0 /* * CRUDE huffman estimator. Round to closest and round up from 0 * to 1 bit. * * With and without ITF8 incase we have a few discrete values but with * large magnitude. * * Note rans0/arith0 and Z_HUFFMAN_ONLY vs internal huffman can be * compared in this way, but order-1 (eg rans1) or maybe LZ77 modes * may detect the correlation of high bytes to low bytes in multi- * byte values. So this predictor breaks down. */ double dbits = 0; // entropy + ~huffman double dbitsH = 0; double dbitsE = 0; // external entropy + ~huffman double dbitsEH = 0; int F[256] = {0}, n = 0; double e = 0; // accumulated error bits for (i = 0; i < nvals; i++) { double x; int X; unsigned int v = vals[i]; //Better encoding would cope with sign. //v = ABS(vals[i])*2+(vals[i]<0); if (!(v & ~0x7f)) { F[v] += freqs[i], n+=freqs[i]; } else if (!(v & ~0x3fff)) { F[(v>>8) |0x80] += freqs[i]; F[ v &0xff] += freqs[i], n+=2*freqs[i]; } else if (!(v & ~0x1fffff)) {
static cache_iter_t cachessess_begin_cb(void) { return kh_begin(srcsessmap); }
static cache_iter_t cachefkcrt_begin_cb(void) { return kh_begin(certmap); }