Exemple #1
0
void
mrb_gc_mark_hash(mrb_state *mrb, struct RHash *hash)
{
  khiter_t k;
  khash_t(ht) *h = hash->ht;

  if (!h) return;
  for (k = kh_begin(h); k != kh_end(h); k++) {
    if (kh_exist(h, k)) {
      mrb_value key = kh_key(h, k);
      mrb_value val = kh_value(h, k);

      mrb_gc_mark_value(mrb, key);
      mrb_gc_mark_value(mrb, val);
    }
  }
}
Exemple #2
0
static int
rbk_finish(strm_stream* strm, strm_value data)
{
  struct rbk_data *d = strm->data;
  khiter_t i;

  for (i=kh_begin(d->tbl); i!=kh_end(d->tbl); i++) {
    if (kh_exist(d->tbl, i)) {
      strm_value values[2];

      values[0] = kh_key(d->tbl, i);
      values[1] = kh_value(d->tbl, i);
      strm_emit(strm, strm_ary_new(values, 2), NULL);
    }
  }
  return STRM_OK;
}
Exemple #3
0
static mrb_value
mrb_hash_values(mrb_state *mrb, mrb_value hash)
{
  khash_t(ht) *h = RHASH_TBL(hash);
  khiter_t k;
  mrb_value ary;

  if (!h) return mrb_ary_new(mrb);
  ary = mrb_ary_new_capa(mrb, kh_size(h));
  for (k = kh_begin(h); k != kh_end(h); k++) {
    if (kh_exist(h, k)){
      mrb_value v = kh_value(h,k);
      mrb_ary_push(mrb, ary, v);
    }
  }
  return ary;
}
Exemple #4
0
static bam_header_t *hash2header(const kh_ref_t *hash)
{
	bam_header_t *header;
	khiter_t k;
	header = bam_header_init();
	header->n_targets = kh_size(hash);
	header->target_name = (char**)calloc(kh_size(hash), sizeof(char*));
	header->target_len = (uint32_t*)calloc(kh_size(hash), 4);
	for (k = kh_begin(hash); k != kh_end(hash); ++k) {
		if (kh_exist(hash, k)) {
			int i = (int)kh_value(hash, k);
			header->target_name[i] = (char*)kh_key(hash, k);
			header->target_len[i] = kh_value(hash, k)>>32;
		}
	}
	bam_init_header_hash(header);
	return header;
}
Exemple #5
0
static mrb_value
mrb_hash_values(mrb_state *mrb, mrb_value hash)
{
  khash_t(ht) *h = RHASH_TBL(hash);
  khiter_t k;
  mrb_value ary = mrb_ary_new(mrb);

  if (!h) return ary;
  for (k = kh_begin(h); k != kh_end(h); k++) {
    if (kh_exist(h, k)){
      mrb_value v = kh_value(h,k);
      if ( !mrb_special_const_p(v) )
        v = mrb_obj_dup(mrb, v);
      mrb_ary_push(mrb, ary, v);
    }
  }
  return ary;
}
void cram_stats_dump(cram_stats *st) {
    int i;
    fprintf(stderr, "cram_stats:\n");
    for (i = 0; i < MAX_STAT_VAL; i++) {
	if (!st->freqs[i])
	    continue;
	fprintf(stderr, "\t%d\t%d\n", i, st->freqs[i]);
    }
    if (st->h) {
	khint_t k;
	for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
	    if (!kh_exist(st->h, k))
		continue;

	    fprintf(stderr, "\t%d\t%d\n", kh_key(st->h, k), kh_val(st->h, k));
	}
    }
}
Exemple #7
0
static mrb_value
mrb_hash_has_valueWithvalue(mrb_state *mrb, mrb_value hash, mrb_value value)
{
  khash_t(ht) *h = RHASH_TBL(hash);
  khiter_t k;

  if (h) {
    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (!kh_exist(h, k)) continue;

      if (mrb_equal(mrb, kh_value(h,k), value)) {
        return mrb_true_value();
      }
    }
  }

  return mrb_false_value();
}
Exemple #8
0
// Called when the manager sends a complete message
void messageArrivedFromManager() {
	printf ("Message arrived: >%s< for >%s<\r\n", commandMessage, commandClientId);

	// See if the client is connected, if so immediately forward
	khiter_t k = kh_get(clientStatuses, clientStatuses, (char*)commandClientId); // Find it in the hash
	if (k != kh_end(clientStatuses)) { // Was it in the hash?
		clientStatus* status = kh_value(clientStatuses, k); // Grab the clientStatus from the hash
		snprintf(httpResponse, HTTP_RESPONSE_SIZE, HTTP_TEMPLATE, commandMessageLen, commandMessage); // Compose the response message
		write(status->io.fd, httpResponse, strlen(httpResponse)); // Send it
		closeConnection((ev_io*)status); // Close the conn
		return;
	}

	// If not, add to a queue
	khiter_t q = kh_get(queue, queue, (char*)commandClientId); // See if this client is already in the queue
	if (q == kh_end(queue)) {
		printf("Creating queue for %s\r\n", commandClientId);
		// This client needs to be added to the queue
		// First make a new list
		klist_t(messages) *newMessageList = kl_init(messages);
		*kl_pushp(messages, newMessageList) = strdup((char*)commandMessage); // Add the message to the list
		// Now make a new hash entry pointing to this new list
		int ret;
		q = kh_put(queue, queue, strdup((char*)commandClientId), &ret);
		kh_value(queue, q) = newMessageList;
	} else {
		printf("Adding to the queue for %s\r\n", commandClientId);
		// This client is in the queue already eg it has a hash entry
		// Pushp puts this message at the end of the queue, so that shift will grab the oldest first (like a FIFO)
		*kl_pushp(messages, kh_value(queue, q)) = strdup((char*)commandMessage);
	}

	// Now do a printout of the hash list
	for (khiter_t qi = kh_begin(queue); qi < kh_end(queue); qi++) {
		if (kh_exist(queue, qi)) {
			printf("Queue for %s\n", kh_key(queue,qi));
			klist_t(messages) *list = kh_value(queue, qi);
			kliter_t(messages) *li;
			for (li = kl_begin(list); li != kl_end(list); li = kl_next(li))
				printf("%s\n", kl_val(li));
			printf("----\n");
		}
	}
}
Exemple #9
0
void bcf_hdr_destroy(bcf_hdr_t *h)
{
	int i;
	khint_t k;
	for (i = 0; i < 3; ++i) {
		vdict_t *d = (vdict_t*)h->dict[i];
		if (d == 0) continue;
		for (k = kh_begin(d); k != kh_end(d); ++k)
			if (kh_exist(d, k)) free((char*)kh_key(d, k));
		kh_destroy(vdict, d);
		free(h->id[i]);
	}
    for (i=0; i<h->nhrec; i++)
        bcf_hrec_destroy(h->hrec[i]);
    if (h->nhrec) free(h->hrec);
    if (h->samples) free(h->samples);
	free(h->mem.s); free(h->text);
	free(h);
}
Exemple #10
0
/* lenp must be a pointer to a size_t variable */
const char*
mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, size_t *lenp)
{
  khash_t(n2s) *h = mrb->name2sym;
  khiter_t k;
  symbol_name sname;

  for (k = kh_begin(h); k != kh_end(h); k++) {
    if (kh_exist(h, k)) {
      if (kh_value(h, k) == sym) {
        sname = kh_key(h, k);
        *lenp = sname.len;
        return sname.name;
      }
    }
  }
  *lenp = 0;
  return NULL;  /* missing */
}
Exemple #11
0
int merge_interval(kh_pos_t* positions_read, char *max_chromosome_merged, unsigned long max_position_merged,
                    char **chromosome_order, int num_chromosomes, vcf_file_t **files, 
                    shared_options_data_t *shared_options_data, merge_options_data_t *options_data, list_t *output_list) {
	int num_entries = 0;

    #pragma omp parallel for num_threads(shared_options_data->num_threads) reduction(+:num_entries)
    for (int k = kh_begin(positions_read); k < kh_end(positions_read); k++) {
        if (kh_exist(positions_read, k)) {
            array_list_t *records_in_position = kh_value(positions_read, k);
            assert(records_in_position);
            
            vcf_record_t *record = ((vcf_record_file_link*) array_list_get(0, records_in_position))->record;
            vcf_record_file_link **links = NULL;
            int num_links = 0;
            
            // Remove positions prior to the last chromosome:position to merge
            int cmp_chrom = compare_chromosomes(record->chromosome, max_chromosome_merged, chromosome_order, num_chromosomes);
            if (cmp_chrom < 0 || (cmp_chrom == 0 && compare_positions(record->position, max_position_merged) <= 0)) {
                links = records_in_position->items;
                num_links = records_in_position->size;
            }
            
            // Launch merge
            if (num_links > 0) {
//                 printf("links[0] = %s:%ld in file %s\n", links[0]->record->chromosome, links[0]->record->position, links[0]->file->filename);
                int err_code = 0;
                vcf_record_t *merged = merge_position(links, num_links, files, options_data->num_files, options_data, &err_code);
                
                if (!err_code) {
                    list_item_t *item = list_item_new(k, MERGED_RECORD, merged);
                    list_insert_item(item, output_list);
                    num_entries += 1;
                }
                
                // Free empty nodes (lists of records in the same position)
                array_list_free(records_in_position, vcf_record_file_link_free);
                kh_del(pos, positions_read, k);
            }
        } // End kh_exist
    }

    return num_entries;
}
Exemple #12
0
void
int_htable_destroy(int_htable *ht)
{
    khiter_t k;

    if (!ht) {
        return;
    }

    for (k = kh_begin(sh->htable); k != kh_end(ht->htable); ++k) {
        if (kh_exist(ht->htable, k)) {
            if (ht->free_value_fn) {
                ht->free_value_fn(kh_value(ht->htable,k));
            }
        }
    }
    kh_destroy(int, ht->htable);
    free(ht);
}
Exemple #13
0
static pic_value
pic_dict_dictionary_for_each(pic_state *pic)
{
  struct pic_proc *proc;
  struct pic_dict *dict;
  khiter_t it;
  khash_t(dict) *kh;

  pic_get_args(pic, "ld", &proc, &dict);

  kh = &dict->hash;

  for (it = kh_begin(kh); it != kh_end(kh); ++it) {
    if (kh_exist(kh, it)) {
      pic_apply1(pic, proc, pic_obj_value(kh_key(kh, it)));
    }
  }

  return pic_undef_value();
}
Exemple #14
0
static mrb_bool
iv_foreach(mrb_state *mrb, iv_tbl *t, iv_foreach_func *func, void *p)
{
    khash_t(iv) *h = &t->h;
    khiter_t k;
    int n;

    if (h) {
        for (k = kh_begin(h); k != kh_end(h); k++) {
            if (kh_exist(h, k)) {
                n = (*func)(mrb, kh_key(h, k), kh_value(h, k), p);
                if (n > 0) return FALSE;
                if (n < 0) {
                    kh_del(iv, h, k);
                }
            }
        }
    }
    return TRUE;
}
Exemple #15
0
static mrb_value
hash_equal(mrb_state *mrb, mrb_value hash1, mrb_value hash2, int eql)
{
  khash_t(ht) *h1, *h2;

  if (mrb_obj_equal(mrb, hash1, hash2)) return mrb_true_value();
  if (!mrb_hash_p(hash2)) {
      if (!mrb_respond_to(mrb, hash2, mrb_intern(mrb, "to_hash"))) {
          return mrb_false_value();
      }
      if (eql)
          return mrb_fixnum_value(mrb_eql(mrb, hash2, hash1));
      else
          return mrb_fixnum_value(mrb_equal(mrb, hash2, hash1));
  }
  h1 = RHASH_TBL(hash1);
  h2 = RHASH_TBL(hash2);
  if (!h1) {
    if (!h2)  return mrb_true_value();
    return mrb_false_value();
  }
  if (!h2) return mrb_false_value();
  if (kh_size(h1) != kh_size(h2)) return mrb_false_value();
  else {
    khiter_t k1, k2;
    mrb_value key;

    for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) {
      if (!kh_exist(h1, k1)) continue;
      key = kh_key(h1,k1);
      k2 = kh_get(ht, h2, key);
      if (k2 != kh_end(h2)) {
        if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) {
          continue; /* next key */
        }
      }
      return mrb_false_value();
    }
  }
  return mrb_true_value();
}
Exemple #16
0
mrb_value
mrb_hash_keys(mrb_state *mrb, mrb_value hash)
{
    khash_t(ht) *h = RHASH_TBL(hash);
    khiter_t k;
    mrb_value ary, *p;

    if (!h || kh_size(h) == 0) return mrb_ary_new(mrb);
    ary = mrb_ary_new_capa(mrb, kh_size(h));
    mrb_ary_set(mrb, ary, kh_size(h)-1, mrb_nil_value());
    p = RARRAY_PTR(ary);
    for (k = kh_begin(h); k != kh_end(h); k++) {
        if (kh_exist(h, k)) {
            mrb_value kv = kh_key(h,k);
            mrb_hash_value hv = kh_value(h,k);

            p[hv.n] = kv;
        }
    }
    return ary;
}
Exemple #17
0
static mrb_value
recursive_eql(mrb_state *mrb, mrb_value hash, mrb_value dt, int recur)
{
  khash_t(ht) *h1 = RHASH_TBL(hash);
  khash_t(ht) *h2 = RHASH_TBL(dt);
  khiter_t k1, k2;
  mrb_value key1;

  for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) {
    if (!kh_exist(h1, k1)) continue;
    key1 = kh_key(h1,k1);
    k2 = kh_get(ht, h2, key1);
    if ( k2 != kh_end(h2)) {
      if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) {
        continue; /* next key */
      }
    }
    return mrb_false_value();
  }
  return mrb_true_value();
}
Exemple #18
0
static mrb_value
mrb_hash_has_value(mrb_state *mrb, mrb_value hash)
{
  mrb_value val;
  khash_t(ht) *h;
  khiter_t k;

  mrb_get_args(mrb, "o", &val);
  h = RHASH_TBL(hash);

  if (h) {
    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (!kh_exist(h, k)) continue;

      if (mrb_equal(mrb, kh_value(h, k).v, val)) {
        return mrb_true_value();
      }
    }
  }
  return mrb_false_value();
}
Exemple #19
0
static mrb_value
mrb_hash_dup(mrb_state *mrb, mrb_value hash)
{
  struct RHash* ret;
  khash_t(ht) *h, *ret_h;
  khiter_t k, ret_k;
  mrb_value ifnone, vret;

  h = RHASH_TBL(hash);
  ret = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
  ret->ht = kh_init(ht, mrb);

  if (h && kh_size(h) > 0) {
    ret_h = ret->ht;

    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (kh_exist(h, k)) {
        int ai = mrb_gc_arena_save(mrb);
        ret_k = kh_put(ht, mrb, ret_h, KEY(kh_key(h, k)));
        mrb_gc_arena_restore(mrb, ai);
        kh_val(ret_h, ret_k).v = kh_val(h, k).v;
        kh_val(ret_h, ret_k).n = kh_size(ret_h)-1;
      }
    }
  }

  if (MRB_RHASH_DEFAULT_P(hash)) {
    ret->flags |= MRB_HASH_DEFAULT;
  }
  if (MRB_RHASH_PROCDEFAULT_P(hash)) {
    ret->flags |= MRB_HASH_PROC_DEFAULT;
  }
  vret = mrb_obj_value(ret);
  ifnone = RHASH_IFNONE(hash);
  if (!mrb_nil_p(ifnone)) {
      mrb_iv_set(mrb, vret, mrb_intern_lit(mrb, "ifnone"), ifnone);
  }
  return vret;
}
Exemple #20
0
static mrb_sym
class_sym(mrb_state *mrb, struct RClass *c, struct RClass *outer)
{
  mrb_value name;

  name = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__classid__"));
  if (mrb_nil_p(name)) {
    khash_t(iv)* h;
    khiter_t k;
    mrb_value v;

    if (!outer) outer = mrb->object_class;
    h = outer->iv;
    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (!kh_exist(h,k)) continue;
      v = kh_value(h,k);
      if (mrb_type(v) == c->tt && mrb_class_ptr(v) == c) {
        return kh_key(h,k);
      }
    }
  }
  return SYM2ID(name);
}
Exemple #21
0
static mrb_value
inspect_obj(mrb_state *mrb, mrb_value obj, mrb_value str, int recur)
{
  if (recur) {
    mrb_str_cat2(mrb, str, " ...");
  }
  else {
    khiter_t k;
    kh_iv_t *h = RCLASS_IV_TBL(obj);

    if (h) {
      for (k = kh_begin(h); k != kh_end(h); k++) {
        if (kh_exist(h, k)){
          mrb_sym id = kh_key(h, k);
          mrb_value value = kh_value(h, k);

          /* need not to show internal data */
          if (RSTRING_PTR(str)[0] == '-') { /* first element */
            RSTRING_PTR(str)[0] = '#';
            mrb_str_cat2(mrb, str, " ");
          }
          else {
            mrb_str_cat2(mrb, str, ", ");
          }
          mrb_str_cat2(mrb, str, mrb_sym2name(mrb, id));
          mrb_str_cat2(mrb, str, "=");
          mrb_str_append(mrb, str, mrb_inspect(mrb, value));
        }
      }
    }
  }
  mrb_str_cat2(mrb, str, ">");
  RSTRING_PTR(str)[0] = '#';

  return str;
}
Exemple #22
0
mrb_value
mrb_hash_dup(mrb_state *mrb, mrb_value hash)
{
  struct RHash* ret;
  khash_t(ht) *h, *ret_h;
  khiter_t k, ret_k;

  h = RHASH_TBL(hash);
  ret = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
  ret->ht = kh_init(ht, mrb);

  if (kh_size(h) > 0) {
    ret_h = ret->ht;

    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (kh_exist(h,k)) {
        ret_k = kh_put(ht, ret_h, KEY(kh_key(h,k)));
        kh_val(ret_h, ret_k) = kh_val(h,k);
      }
    }
  }

  return mrb_obj_value(ret);
}
Exemple #23
0
int
strm_env_copy(strm_state* s1, strm_state* s2)
{
  strm_env *e1 = s1->env;
  strm_env *e2 = s2->env;
  khiter_t k, kk;
  int r;

  if (!e1) {
    e1 = s1->env = kh_init(env);
  }
  if (!e2) {
    e2 = s1->env = kh_init(env);
  }
  for (k = kh_begin(e2); k != kh_end(e2); k++) {
    if (kh_exist(e2, k)) {
      kk = kh_put(env, e1, kh_key(e2, k), &r);
      if (r <= 0) return STRM_NG; /* r=0  key is present in the hash table */
                                  /* r=-1 operation failed */
      kh_value(e1, kk) = kh_value(e2, k);
    }
  }
  return STRM_OK;
}
Exemple #24
0
static PyObject* pyext_epoll_free(PyObject *self,PyObject *args){
    khiter_t hit;

    int epfd;
    struct pyep_data *pyep;

    if(!PyArg_ParseTuple(args,"i",&epfd)){
        PyErr_BadArgument();
        return NULL;
    }
    if((pyep = pyep_getby_epfd(epfd)) == NULL){
        PyErr_SetString(PyExc_KeyError,"epoll file descriptor not found");
        return NULL;
    }

    if(ev_close(&pyep->evdata)){
        PyErr_SetString(PyExc_SystemError,"epoll free failed");
        return NULL;
    }

    for(hit = kh_begin(pyep->evhdr_ht);hit != kh_end(pyep->evhdr_ht);hit++){
        if(kh_exist(pyep->evhdr_ht,hit)){
            free((struct ev_header*)kh_value(pyep->evhdr_ht,hit));
        }
    }
    
    kh_destroy(ptr,pyep->evhdr_ht);

    hit = kh_get(ptr,pyep_ht,epfd);
    kh_del(ptr,pyep_ht,hit);
    
    free(pyep);

    Py_INCREF(Py_None);
    return Py_None;
}
void printSegCounter() {
    for (khint_t k = kh_begin(seg_counter); k != kh_end(seg_counter); ++k)  // traverse
        if (kh_exist(seg_counter, k))                             // test if a bucket contains data
            printf("%04X %*d |%s", kh_key(seg_counter, k), 6, kh_value(seg_counter, k), !(k%16) ? "\n": " ");
}
Exemple #26
0
int do_grep() {
#ifdef DEBUGa
	printf("[!]do_grep\n");
#endif
	BamInfo_t *pbam;
	kh_cstr_t BamID;
	khiter_t ki, bami;
	kstring_t ks1 = { 0, 0, NULL };
	kstring_t ks2 = { 0, 0, NULL };
	kstring_t ks3 = { 0, 0, NULL };

	samFile *in;
	bam_hdr_t *h;
	hts_idx_t *idx;
	bam1_t *b, *d, *d2, *bR1, *bR2, *bR3;
	bR1 = bam_init1(); bR2 = bam_init1(); bR3 = bam_init1();
	//htsFile *out;
	//hts_opt *in_opts = NULL, *out_opts = NULL;
	int r = 0, exit_code = 0;

	kvec_t(bam1_t) R1, R2, RV;
	pierCluster_t *pierCluster;
	//samdat_t tmp_samdat;
	FILE *fs = fopen("./test.txt","w");

	for (bami = kh_begin(bamNFOp); bami != kh_end(bamNFOp); ++bami) {
		//printf(">[%d]:\n",bami);
		if (kh_exist(bamNFOp, bami)) {
			kv_init(R1); kv_init(R2); kv_init(RV);
			//tmp_samdat = (const samdat_t){ 0 };
			//memset(&tmp_samdat,0,sizeof(samdat_t));
			//printf("-[%d]:\n",bami);
			BamID = kh_key(bamNFOp, bami);
			pbam = &kh_value(bamNFOp, bami);
			fprintf(stderr, "%u [%s]=%s\t%u %u\n",bami,BamID,pbam->fileName,pbam->insertSize,pbam->SD);

			in = sam_open(pbam->fileName, "r");
			if (in == NULL) {
				fprintf(stderr, "[x]Error opening \"%s\"\n", pbam->fileName);
				return EXIT_FAILURE;
			}
			h = sam_hdr_read(in);
/*			out = hts_open("-", "w");
			if (out == NULL) {
				fprintf(stderr, "[x]Error opening standard output\n");
				return EXIT_FAILURE;
			}
			if (sam_hdr_write(out, h) < 0) {
				fprintf(stderr, "[!]Error writing output header.\n");
				exit_code = 1;
			}
*/
			int8_t *ChrIsHum;
			if (h == NULL) {
				fprintf(stderr, "[x]Couldn't read header for \"%s\"\n", pbam->fileName);
				return EXIT_FAILURE;
			} else {
				ChrIsHum = malloc(h->n_targets * sizeof(int8_t));
				for (int32_t i=0; i < h->n_targets; ++i) {
					//ChrIsHum[i] = -1;
					ki = kh_get(chrNFO, chrNFOp, h->target_name[i]);
					if (ki == kh_end(chrNFOp)) {
						errx(4,"[x]Cannot find ChrID for [%s] !",h->target_name[i]);
					} else {
						ChrInfo_t * tmp = &kh_value(chrNFOp, ki);
						ChrIsHum[i] = tmp->isHum;
						//printf(">>> %d Chr:%s %d\n",i,h->target_name[i],ChrIsHum[i]);
					}
				}
			}
			h->ignore_sam_err = 0;
			b = bam_init1();
			d = bam_init1();
			d2 = bam_init1();
			if ((idx = sam_index_load(in, pbam->fileName)) == 0) {
				fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
				return 1;
			}
			pierCluster = sam_plp_init();
			while ((r = sam_read1(in, h, b)) >= 0) {
				int8_t flag = false;
				const bam1_core_t *c = &b->core;
				if (c->flag & BAM_FSECONDARY) continue;
				if (c->n_cigar) {
					uint32_t *cigar = bam_get_cigar(b);
					for (int i = 0; i < c->n_cigar; ++i) {
						if (bam_cigar_opchr(cigar[i])=='S') {	// soft clipping
							if ( bam_cigar_oplen(cigar[i]) >= myConfig.minGrepSlen ) {
								flag = true;
							}
						}
					}
				}
				if (flag && ChrIsHum[c->tid]) {	// Now, skip Virus items.
					//bam_copy1(bR1, b);
					flag = 0;	// recycle
					//int enoughMapQ = 0;
					//kstring_t ks = { 0, 0, NULL };
					/*if (sam_format1(h, b, &ks1) < 0) {
						fprintf(stderr, "Error writing output.\n");
						exit_code = 1;
						break;
					} else*/ if ((c->mtid == c->tid && ChrIsHum[c->tid]) || (ChrIsHum[c->tid] ^ ChrIsHum[c->mtid])) {	// Only grep those mapped on same Human ChrID, or diff species/一方在病毒的情况.
						//printf(">[%s]\n",ks_str(&ks1));
						flag |= 1;
						//tmp_samdat.b = bam_dup1(b);
						//kv_push(samdat_t,R1,tmp_samdat);
						/*if (checkMapQ(ChrIsHum, b, true)) {
							++enoughMapQ;
						}*/
					}
					if (getPairedSam(in, idx, b, d) != 0) {
						flag &= ~1;
						continue;
					} else {
						flag |= 2;
						/*if (checkMapQ(ChrIsHum, d, false)) {
							++enoughMapQ;
						}*/
						/*if (c->flag & BAM_FSECONDARY) {
							if (getPairedSam(in, idx, d, d2) == 0) {
								//sam_format1(h, d2, &ks3);
								flag |= 4;
								if (checkMapQ(ChrIsHum, d2, false)) {
									++enoughMapQ;
								}
							}
						}*/
					}
/*
对于 BAM_FSECONDARY(256) 的 Read,跳两次 与 读 SA 项,效果一样。
>[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	353	chr2	13996555	0	50S40M	chr18	48245109	0ACACAACAATGTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAA	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:40	AS:i:40	XS:i:40	RG:Z:Fsimout_mB	SA:Z:rgi|59585|emb|X04615.1|,2000,-,40S46M4S,60,0;	YC:Z:CT	YD:Z:f]
-[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	177	chr18	48245109	9	40S50M	gi|59585|emb|X04615.1|2000	0	GTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAAAGGAATTCAA	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:50	AS:i:50	XS:i:46	RG:Z:Fsimout_mB	SA:Z:rgi|59585|emb|X04615.1|,2000,+,50S40M,9,0;	YC:Z:GA	YD:Z:f]
+[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	113	gi|59585|emb|X04615.1|	2000	60	40S46M4S	chr18	48245109	0	TTTTTTGGCTGAATAGTATTCCATGGTGTGTGTGTGTGTGGCCTCTGCTCTGTATCGGGAGGCCTTAGAGTCTCCGGAACATTGTTGTGT	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:46	AS:i:46	XS:i:27	RG:Z:Fsimout_mB	SA:Z:fchr2,13996555,+,50S40M,0,0;	YC:Z:CT	YD:Z:r]
*/
					/*if (sam_format1(h, d, &ks2) < 0) {
						fprintf(stderr, "Error writing output.\n");
						exit_code = 1;
						break;
					}*/
					if (((flag & 3) == 3) /*&& enoughMapQ >= myConfig.samples*/) {
						/*printf(">%d[%s]\n",checkMapQ(ChrIsHum, b, true),ks_str(&ks1));
						printf("-%d[%s]\n",checkMapQ(ChrIsHum, d, false),ks_str(&ks2));
						if (flag & 4) {
							printf("+%d[%s]\n",checkMapQ(ChrIsHum, d2, false),ks_str(&ks3));
						}
						printf("<--%d\n",enoughMapQ);*/
						if (sam_plp_push(ChrIsHum, pierCluster, b) == 0) {
							//printf("--HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							if ((!ChrIsHum[(d->core).tid]) && (flag & 2)) sam_plp_push(ChrIsHum, pierCluster, d);
							//if ((!ChrIsHum[(d2->core).tid]) && (flag & 4)) sam_plp_push(ChrIsHum, pierCluster, d2);
						} else {
							//print
							fprintf(fs,"[%s]\nHumRange=%s:%d-%d\n", BamID, h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							fprintf(fs,"VirRange=%s:%d-%d\n", h->target_name[(pierCluster->VirusRange).tid], (pierCluster->VirusRange).pos, (pierCluster->VirusRange).endpos);
							for (size_t i=0; i<kv_size(pierCluster->Reads);++i) {
								bam1_t *bi = kv_A(pierCluster->Reads, i);
								if (sam_format1(h, bi, &ks1) < 0) {
									fprintf(stderr, "Error writing output.\n");
									exit_code = 1;
									break;
								} else {
									fprintf(fs,"%s\n",ks1.s);
								}
							}
							fprintf(fs,"\n");
							//printf("HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							//fflush(fs);
							sam_plp_dectroy(pierCluster);
							pierCluster = sam_plp_init();
						}
					}
				}
				/*char *qname = bam_get_qname(b);
				if (sam_write1(out, h, b) < 0) {
					fprintf(stderr, "[x]Error writing output.\n");
					exit_code = 1;
					break;
				}*/
			}
/*			r = sam_close(out);   // stdout can only be closed once
			if (r < 0) {
				fprintf(stderr, "Error closing output.\n");
				exit_code = 1;
			}
*/
			hts_idx_destroy(idx);
			bam_destroy1(b);
			bam_destroy1(d);
			bam_destroy1(d2);
			bam_hdr_destroy(h);
			r = sam_close(in);
			free(ChrIsHum);
#ifdef DEBUGa
			fflush(NULL);
			//pressAnyKey();
#endif
			sam_plp_dectroy(pierCluster);
			//printf("<[%d]:\n",bami);
		}
	}
	fclose(fs);
	getPairedSam(NULL, NULL, NULL, NULL);	// sam_close(fp2);
	//printf("---[%d]---\n",exit_code);
	bam_destroy1(bR1); bam_destroy1(bR2); bam_destroy1(bR3);
	ks_release(&ks1);
	ks_release(&ks2);
	ks_release(&ks3);
	return exit_code;
}
Exemple #27
0
ERR_VALUE kmer_freq_distribution(const PROGRAM_OPTIONS *Options, const uint32_t KMerSize, const ONE_READ *Reads, const size_t ReadCount)
{
	int err;
	size_t maxValue = 0;
	khiter_t it;
	size_t kmerCount = 0;
	char *kmerString = NULL;
	khash_t(kc) *table = kh_init(kc);
	ERR_VALUE ret = ERR_INTERNAL_ERROR;

	ret = utils_calloc(KMerSize + 1, sizeof(char), &kmerString);
	if (ret == ERR_SUCCESS) {
		const ONE_READ *r = Reads;
		
		kmerString[KMerSize] = '\0';
		for (size_t i = 0; i < ReadCount; ++i) {
			const READ_PART *p = &r->Part;
			
				read_split(r);
				if (p->ReadSequenceLength >= KMerSize) {
					for (size_t j = 0; j < p->ReadSequenceLength - KMerSize + 1; ++j) {
						char *s = NULL;

						memcpy(kmerString, p->ReadSequence + j, KMerSize*sizeof(char));
						ret = utils_copy_string(kmerString, &s);
						if (ret == ERR_SUCCESS) {
							it = kh_put(kc, table, s, &err);
							switch (err) {
							case 0:
								kh_value(table, it) += 1;
								if (kh_value(table, it) > maxValue)
									maxValue = kh_value(table, it);

								utils_free(s);
								break;
							case 1:
							case 2:
								kh_value(table, it) = 1;
								break;
							default:
								ret = ERR_OUT_OF_MEMORY;
								break;
							}

							++kmerCount;
							if (ret != ERR_SUCCESS)
								utils_free(s);
						}

						if (ret != ERR_SUCCESS)
							break;
					}
				}

			if (ret != ERR_SUCCESS)
				break;

			++r;
		}

		if (ret == ERR_SUCCESS) {
			size_t *freqArray = NULL;

			++maxValue;
			ret = utils_calloc(maxValue, sizeof(size_t), &freqArray);
			if (ret == ERR_SUCCESS) {
				memset(freqArray, 0, maxValue*sizeof(size_t));
				for (it = kh_begin(table); it != kh_end(table); ++it) {
					if (kh_exist(table, it))
						++freqArray[kh_value(table, it)];
				}

				for (size_t i = 0; i < maxValue; ++i) {
					if (freqArray[i] > 0)
						fprintf(stdout, "%Iu, %Iu, %lf\n", i, freqArray[i], (double)freqArray[i]*100/ (double)kmerCount);
				}

				utils_free(freqArray);
			}
		}

		utils_free(kmerString);
	}

	for (size_t i = kh_begin(table); i < kh_end(table); ++i) {
		if (kh_exist(table, i))
			utils_free(kh_key(table, i));
	}

	kh_destroy(kc, table);

	return ret;
}
Exemple #28
0
/*
 * Computes entropy from integer frequencies for various encoding methods and
 * picks the best encoding.
 *
 * FIXME: we could reuse some of the code here for the actual encoding
 * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman.
 *
 * Returns the best codec to use.
 */
enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st) {
    enum cram_encoding best_encoding = E_NULL;
    int best_size = INT_MAX, bits;
    int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k;
    int *vals = NULL, *freqs = NULL, vals_alloc = 0, *codes;

    //cram_stats_dump(st);

    /* Count number of unique symbols */
    for (nvals = i = 0; i < MAX_STAT_VAL; i++) {
	if (!st->freqs[i])
	    continue;
	if (nvals >= vals_alloc) {
	    vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
	    vals  = realloc(vals,  vals_alloc * sizeof(int));
	    freqs = realloc(freqs, vals_alloc * sizeof(int));
	    if (!vals || !freqs) {
		if (vals)  free(vals);
		if (freqs) free(freqs);
		return E_HUFFMAN; // Cannot do much else atm
	    }
	}
	vals[nvals] = i;
	freqs[nvals] = st->freqs[i];
	ntot += freqs[nvals];
	if (max_val < i) max_val = i;
	if (min_val > i) min_val = i;
	nvals++;
    }
    if (st->h) {
	khint_t k;
	int i;

	for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
	    if (!kh_exist(st->h, k))
		continue;

	    if (nvals >= vals_alloc) {
		vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
		vals  = realloc(vals,  vals_alloc * sizeof(int));
		freqs = realloc(freqs, vals_alloc * sizeof(int));
		if (!vals || !freqs)
		    return E_HUFFMAN; // Cannot do much else atm
	    }
	    i = kh_key(st->h, k);
	    vals[nvals]=i;
	    freqs[nvals] = kh_val(st->h, k);
	    ntot += freqs[nvals];
	    if (max_val < i) max_val = i;
	    if (min_val > i) min_val = i;
	    nvals++;
	}
    }

    st->nvals = nvals;
    assert(ntot == st->nsamp);

    if (nvals <= 1) {
	free(vals);
	free(freqs);
	return E_HUFFMAN;
    }

    if (fd->verbose > 1)
	fprintf(stderr, "Range = %d..%d, nvals=%d, ntot=%d\n",
		min_val, max_val, nvals, ntot);

    /* Theoretical entropy */
//    if (fd->verbose > 1) {
//	double dbits = 0;
//	for (i = 0; i < nvals; i++) {
//	    dbits += freqs[i] * log((double)freqs[i]/ntot);
//	}
//	dbits /= -log(2);
//	if (fd->verbose > 1)
//	    fprintf(stderr, "Entropy = %f\n", dbits);
//    }

    if (nvals > 1 && ntot > 256) {
#if 0
	/*
	 * CRUDE huffman estimator. Round to closest and round up from 0
	 * to 1 bit.
	 *
	 * With and without ITF8 incase we have a few discrete values but with
	 * large magnitude.
	 *
	 * Note rans0/arith0 and Z_HUFFMAN_ONLY vs internal huffman can be
	 * compared in this way, but order-1 (eg rans1) or maybe LZ77 modes
	 * may detect the correlation of high bytes to low bytes in multi-
	 * byte values. So this predictor breaks down.
	 */
	double dbits = 0;  // entropy + ~huffman
	double dbitsH = 0;
	double dbitsE = 0; // external entropy + ~huffman
	double dbitsEH = 0;
	int F[256] = {0}, n = 0;
	double e = 0; // accumulated error bits
	for (i = 0; i < nvals; i++) {
	    double x; int X;
	    unsigned int v = vals[i];

	    //Better encoding would cope with sign.
	    //v = ABS(vals[i])*2+(vals[i]<0);

	    if (!(v & ~0x7f)) {
		F[v]             += freqs[i], n+=freqs[i];
	    } else if (!(v & ~0x3fff)) {
		F[(v>>8) |0x80] += freqs[i];
		F[ v     &0xff] += freqs[i], n+=2*freqs[i];
	    } else if (!(v & ~0x1fffff)) {
Exemple #29
0
static cache_iter_t
cachessess_begin_cb(void)
{
	return kh_begin(srcsessmap);
}
Exemple #30
0
static cache_iter_t
cachefkcrt_begin_cb(void)
{
	return kh_begin(certmap);
}