Exemple #1
0
mrb_value
mrb_hash_keys(mrb_state *mrb, mrb_value hash)
{
    khash_t(ht) *h = RHASH_TBL(hash);
    khiter_t k;
    mrb_value ary, *p;

    if (!h || kh_size(h) == 0) return mrb_ary_new(mrb);
    ary = mrb_ary_new_capa(mrb, kh_size(h));
    mrb_ary_set(mrb, ary, kh_size(h)-1, mrb_nil_value());
    p = RARRAY_PTR(ary);
    for (k = kh_begin(h); k != kh_end(h); k++) {
        if (kh_exist(h, k)) {
            mrb_value kv = kh_key(h,k);
            mrb_hash_value hv = kh_value(h,k);

            p[hv.n] = kv;
        }
    }
    return ary;
}
Exemple #2
0
static mrb_value
recursive_eql(mrb_state *mrb, mrb_value hash, mrb_value dt, int recur)
{
  khash_t(ht) *h1 = RHASH_TBL(hash);
  khash_t(ht) *h2 = RHASH_TBL(dt);
  khiter_t k1, k2;
  mrb_value key1;

  for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) {
    if (!kh_exist(h1, k1)) continue;
    key1 = kh_key(h1,k1);
    k2 = kh_get(ht, h2, key1);
    if ( k2 != kh_end(h2)) {
      if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) {
        continue; /* next key */
      }
    }
    return mrb_false_value();
  }
  return mrb_true_value();
}
Exemple #3
0
static mrb_value
hash_equal(mrb_state *mrb, mrb_value hash1, mrb_value hash2, int eql)
{
  khash_t(ht) *h1, *h2;

  if (mrb_obj_equal(mrb, hash1, hash2)) return mrb_true_value();
  if (!mrb_hash_p(hash2)) {
      if (!mrb_respond_to(mrb, hash2, mrb_intern_lit(mrb, "to_hash"))) {
          return mrb_false_value();
      }
      if (eql)
          return mrb_fixnum_value(mrb_eql(mrb, hash2, hash1));
      else
          return mrb_fixnum_value(mrb_equal(mrb, hash2, hash1));
  }
  h1 = RHASH_TBL(hash1);
  h2 = RHASH_TBL(hash2);
  if (!h1) {
    return mrb_bool_value(!h2);
  }
  if (!h2) return mrb_false_value();
  if (kh_size(h1) != kh_size(h2)) return mrb_false_value();
  else {
    khiter_t k1, k2;
    mrb_value key;

    for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) {
      if (!kh_exist(h1, k1)) continue;
      key = kh_key(h1,k1);
      k2 = kh_get(ht, mrb, h2, key);
      if (k2 != kh_end(h2)) {
        if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) {
          continue; /* next key */
        }
      }
      return mrb_false_value();
    }
  }
  return mrb_true_value();
}
Exemple #4
0
/*
 *  call-seq:
 *     obj.instance_variables    -> array
 *
 *  Returns an array of instance variable names for the receiver. Note
 *  that simply defining an accessor does not create the corresponding
 *  instance variable.
 *
 *     class Fred
 *       attr_accessor :a1
 *       def initialize
 *         @iv = 3
 *       end
 *     end
 *     Fred.new.instance_variables   #=> [:@iv]
 */
mrb_value
mrb_obj_instance_variables(mrb_state *mrb, mrb_value self)
{
    mrb_value ary;
    kh_iv_t *h = RCLASS_IV_TBL(self);
    int i;
    const char* p;

    ary = mrb_ary_new(mrb);
    if (h) {
      for (i=0;i<kh_end(h);i++) {
        if (kh_exist(h, i)) {
          p = mrb_sym2name(mrb, kh_key(h,i));
          if (*p == '@') {
            if (mrb_type(kh_value(h, i)) != MRB_TT_UNDEF)
              mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, p));
          }
        }
      }
    }
    return ary;
}
Exemple #5
0
static mrb_value
mrb_hash_dup(mrb_state *mrb, mrb_value hash)
{
  struct RHash* ret;
  khash_t(ht) *h, *ret_h;
  khiter_t k, ret_k;
  mrb_value ifnone, vret;

  h = RHASH_TBL(hash);
  ret = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
  ret->ht = kh_init(ht, mrb);

  if (h && kh_size(h) > 0) {
    ret_h = ret->ht;

    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (kh_exist(h, k)) {
        int ai = mrb_gc_arena_save(mrb);
        ret_k = kh_put(ht, mrb, ret_h, KEY(kh_key(h, k)));
        mrb_gc_arena_restore(mrb, ai);
        kh_val(ret_h, ret_k).v = kh_val(h, k).v;
        kh_val(ret_h, ret_k).n = kh_size(ret_h)-1;
      }
    }
  }

  if (MRB_RHASH_DEFAULT_P(hash)) {
    ret->flags |= MRB_HASH_DEFAULT;
  }
  if (MRB_RHASH_PROCDEFAULT_P(hash)) {
    ret->flags |= MRB_HASH_PROC_DEFAULT;
  }
  vret = mrb_obj_value(ret);
  ifnone = RHASH_IFNONE(hash);
  if (!mrb_nil_p(ifnone)) {
      mrb_iv_set(mrb, vret, mrb_intern_lit(mrb, "ifnone"), ifnone);
  }
  return vret;
}
Exemple #6
0
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f)
{
	int i;
	size_t n = 0;
	uint32_t thres;
	khint_t *a, k;
	if (f <= 0.) return UINT32_MAX;
	for (i = 0; i < 1<<mi->b; ++i)
		if (mi->B[i].h) n += kh_size((idxhash_t*)mi->B[i].h);
	a = (uint32_t*)malloc(n * 4);
	for (i = n = 0; i < 1<<mi->b; ++i) {
		idxhash_t *h = (idxhash_t*)mi->B[i].h;
		if (h == 0) continue;
		for (k = 0; k < kh_end(h); ++k) {
			if (!kh_exist(h, k)) continue;
			a[n++] = kh_key(h, k)&1? 1 : (uint32_t)kh_val(h, k);
		}
	}
	thres = ks_ksmall_uint32_t(n, a, (uint32_t)((1. - f) * n)) + 1;
	free(a);
	return thres;
}
Exemple #7
0
static mrb_sym
class_sym(mrb_state *mrb, struct RClass *c, struct RClass *outer)
{
  mrb_value name;

  name = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__classid__"));
  if (mrb_nil_p(name)) {
    khash_t(iv)* h;
    khiter_t k;
    mrb_value v;

    if (!outer) outer = mrb->object_class;
    h = outer->iv;
    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (!kh_exist(h,k)) continue;
      v = kh_value(h,k);
      if (mrb_type(v) == c->tt && mrb_class_ptr(v) == c) {
        return kh_key(h,k);
      }
    }
  }
  return SYM2ID(name);
}
Exemple #8
0
static mrb_bool
iv_foreach(mrb_state *mrb, iv_tbl *t, iv_foreach_func *func, void *p)
{
  if (t == NULL) {
    return TRUE;
  }
  else {
    khash_t(iv) *h = &t->h;
    khiter_t k;
    int n;

    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (kh_exist(h, k)) {
        n = (*func)(mrb, kh_key(h, k), kh_value(h, k), p);
        if (n > 0) return FALSE;
        if (n < 0) {
          kh_del(iv, mrb, h, k);
        }
      }
    }
  }
  return TRUE;
}
Exemple #9
0
mrb_value
mrb_hash_dup(mrb_state *mrb, mrb_value hash)
{
  struct RHash* ret;
  khash_t(ht) *h, *ret_h;
  khiter_t k, ret_k;

  h = RHASH_TBL(hash);
  ret = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
  ret->ht = kh_init(ht, mrb);

  if (kh_size(h) > 0) {
    ret_h = ret->ht;

    for (k = kh_begin(h); k != kh_end(h); k++) {
      if (kh_exist(h,k)) {
        ret_k = kh_put(ht, ret_h, KEY(kh_key(h,k)));
        kh_val(ret_h, ret_k) = kh_val(h,k);
      }
    }
  }

  return mrb_obj_value(ret);
}
Exemple #10
0
static PyObject* pyext_epoll_free(PyObject *self,PyObject *args){
    khiter_t hit;

    int epfd;
    struct pyep_data *pyep;

    if(!PyArg_ParseTuple(args,"i",&epfd)){
        PyErr_BadArgument();
        return NULL;
    }
    if((pyep = pyep_getby_epfd(epfd)) == NULL){
        PyErr_SetString(PyExc_KeyError,"epoll file descriptor not found");
        return NULL;
    }

    if(ev_close(&pyep->evdata)){
        PyErr_SetString(PyExc_SystemError,"epoll free failed");
        return NULL;
    }

    for(hit = kh_begin(pyep->evhdr_ht);hit != kh_end(pyep->evhdr_ht);hit++){
        if(kh_exist(pyep->evhdr_ht,hit)){
            free((struct ev_header*)kh_value(pyep->evhdr_ht,hit));
        }
    }
    
    kh_destroy(ptr,pyep->evhdr_ht);

    hit = kh_get(ptr,pyep_ht,epfd);
    kh_del(ptr,pyep_ht,hit);
    
    free(pyep);

    Py_INCREF(Py_None);
    return Py_None;
}
Exemple #11
0
static mrb_value
inspect_obj(mrb_state *mrb, mrb_value obj, mrb_value str, int recur)
{
  if (recur) {
    mrb_str_cat2(mrb, str, " ...");
  }
  else {
    khiter_t k;
    kh_iv_t *h = RCLASS_IV_TBL(obj);

    if (h) {
      for (k = kh_begin(h); k != kh_end(h); k++) {
        if (kh_exist(h, k)){
          mrb_sym id = kh_key(h, k);
          mrb_value value = kh_value(h, k);

          /* need not to show internal data */
          if (RSTRING_PTR(str)[0] == '-') { /* first element */
            RSTRING_PTR(str)[0] = '#';
            mrb_str_cat2(mrb, str, " ");
          }
          else {
            mrb_str_cat2(mrb, str, ", ");
          }
          mrb_str_cat2(mrb, str, mrb_sym2name(mrb, id));
          mrb_str_cat2(mrb, str, "=");
          mrb_str_append(mrb, str, mrb_inspect(mrb, value));
        }
      }
    }
  }
  mrb_str_cat2(mrb, str, ">");
  RSTRING_PTR(str)[0] = '#';

  return str;
}
Exemple #12
0
int
strm_env_copy(strm_state* s1, strm_state* s2)
{
  strm_env *e1 = s1->env;
  strm_env *e2 = s2->env;
  khiter_t k, kk;
  int r;

  if (!e1) {
    e1 = s1->env = kh_init(env);
  }
  if (!e2) {
    e2 = s1->env = kh_init(env);
  }
  for (k = kh_begin(e2); k != kh_end(e2); k++) {
    if (kh_exist(e2, k)) {
      kk = kh_put(env, e1, kh_key(e2, k), &r);
      if (r <= 0) return STRM_NG; /* r=0  key is present in the hash table */
                                  /* r=-1 operation failed */
      kh_value(e1, kk) = kh_value(e2, k);
    }
  }
  return STRM_OK;
}
Exemple #13
0
int do_grep() {
#ifdef DEBUGa
	printf("[!]do_grep\n");
#endif
	BamInfo_t *pbam;
	kh_cstr_t BamID;
	khiter_t ki, bami;
	kstring_t ks1 = { 0, 0, NULL };
	kstring_t ks2 = { 0, 0, NULL };
	kstring_t ks3 = { 0, 0, NULL };

	samFile *in;
	bam_hdr_t *h;
	hts_idx_t *idx;
	bam1_t *b, *d, *d2, *bR1, *bR2, *bR3;
	bR1 = bam_init1(); bR2 = bam_init1(); bR3 = bam_init1();
	//htsFile *out;
	//hts_opt *in_opts = NULL, *out_opts = NULL;
	int r = 0, exit_code = 0;

	kvec_t(bam1_t) R1, R2, RV;
	pierCluster_t *pierCluster;
	//samdat_t tmp_samdat;
	FILE *fs = fopen("./test.txt","w");

	for (bami = kh_begin(bamNFOp); bami != kh_end(bamNFOp); ++bami) {
		//printf(">[%d]:\n",bami);
		if (kh_exist(bamNFOp, bami)) {
			kv_init(R1); kv_init(R2); kv_init(RV);
			//tmp_samdat = (const samdat_t){ 0 };
			//memset(&tmp_samdat,0,sizeof(samdat_t));
			//printf("-[%d]:\n",bami);
			BamID = kh_key(bamNFOp, bami);
			pbam = &kh_value(bamNFOp, bami);
			fprintf(stderr, "%u [%s]=%s\t%u %u\n",bami,BamID,pbam->fileName,pbam->insertSize,pbam->SD);

			in = sam_open(pbam->fileName, "r");
			if (in == NULL) {
				fprintf(stderr, "[x]Error opening \"%s\"\n", pbam->fileName);
				return EXIT_FAILURE;
			}
			h = sam_hdr_read(in);
/*			out = hts_open("-", "w");
			if (out == NULL) {
				fprintf(stderr, "[x]Error opening standard output\n");
				return EXIT_FAILURE;
			}
			if (sam_hdr_write(out, h) < 0) {
				fprintf(stderr, "[!]Error writing output header.\n");
				exit_code = 1;
			}
*/
			int8_t *ChrIsHum;
			if (h == NULL) {
				fprintf(stderr, "[x]Couldn't read header for \"%s\"\n", pbam->fileName);
				return EXIT_FAILURE;
			} else {
				ChrIsHum = malloc(h->n_targets * sizeof(int8_t));
				for (int32_t i=0; i < h->n_targets; ++i) {
					//ChrIsHum[i] = -1;
					ki = kh_get(chrNFO, chrNFOp, h->target_name[i]);
					if (ki == kh_end(chrNFOp)) {
						errx(4,"[x]Cannot find ChrID for [%s] !",h->target_name[i]);
					} else {
						ChrInfo_t * tmp = &kh_value(chrNFOp, ki);
						ChrIsHum[i] = tmp->isHum;
						//printf(">>> %d Chr:%s %d\n",i,h->target_name[i],ChrIsHum[i]);
					}
				}
			}
			h->ignore_sam_err = 0;
			b = bam_init1();
			d = bam_init1();
			d2 = bam_init1();
			if ((idx = sam_index_load(in, pbam->fileName)) == 0) {
				fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
				return 1;
			}
			pierCluster = sam_plp_init();
			while ((r = sam_read1(in, h, b)) >= 0) {
				int8_t flag = false;
				const bam1_core_t *c = &b->core;
				if (c->flag & BAM_FSECONDARY) continue;
				if (c->n_cigar) {
					uint32_t *cigar = bam_get_cigar(b);
					for (int i = 0; i < c->n_cigar; ++i) {
						if (bam_cigar_opchr(cigar[i])=='S') {	// soft clipping
							if ( bam_cigar_oplen(cigar[i]) >= myConfig.minGrepSlen ) {
								flag = true;
							}
						}
					}
				}
				if (flag && ChrIsHum[c->tid]) {	// Now, skip Virus items.
					//bam_copy1(bR1, b);
					flag = 0;	// recycle
					//int enoughMapQ = 0;
					//kstring_t ks = { 0, 0, NULL };
					/*if (sam_format1(h, b, &ks1) < 0) {
						fprintf(stderr, "Error writing output.\n");
						exit_code = 1;
						break;
					} else*/ if ((c->mtid == c->tid && ChrIsHum[c->tid]) || (ChrIsHum[c->tid] ^ ChrIsHum[c->mtid])) {	// Only grep those mapped on same Human ChrID, or diff species/一方在病毒的情况.
						//printf(">[%s]\n",ks_str(&ks1));
						flag |= 1;
						//tmp_samdat.b = bam_dup1(b);
						//kv_push(samdat_t,R1,tmp_samdat);
						/*if (checkMapQ(ChrIsHum, b, true)) {
							++enoughMapQ;
						}*/
					}
					if (getPairedSam(in, idx, b, d) != 0) {
						flag &= ~1;
						continue;
					} else {
						flag |= 2;
						/*if (checkMapQ(ChrIsHum, d, false)) {
							++enoughMapQ;
						}*/
						/*if (c->flag & BAM_FSECONDARY) {
							if (getPairedSam(in, idx, d, d2) == 0) {
								//sam_format1(h, d2, &ks3);
								flag |= 4;
								if (checkMapQ(ChrIsHum, d2, false)) {
									++enoughMapQ;
								}
							}
						}*/
					}
/*
对于 BAM_FSECONDARY(256) 的 Read,跳两次 与 读 SA 项,效果一样。
>[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	353	chr2	13996555	0	50S40M	chr18	48245109	0ACACAACAATGTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAA	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:40	AS:i:40	XS:i:40	RG:Z:Fsimout_mB	SA:Z:rgi|59585|emb|X04615.1|,2000,-,40S46M4S,60,0;	YC:Z:CT	YD:Z:f]
-[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	177	chr18	48245109	9	40S50M	gi|59585|emb|X04615.1|2000	0	GTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAAAGGAATTCAA	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:50	AS:i:50	XS:i:46	RG:Z:Fsimout_mB	SA:Z:rgi|59585|emb|X04615.1|,2000,+,50S40M,9,0;	YC:Z:GA	YD:Z:f]
+[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	113	gi|59585|emb|X04615.1|	2000	60	40S46M4S	chr18	48245109	0	TTTTTTGGCTGAATAGTATTCCATGGTGTGTGTGTGTGTGGCCTCTGCTCTGTATCGGGAGGCCTTAGAGTCTCCGGAACATTGTTGTGT	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:46	AS:i:46	XS:i:27	RG:Z:Fsimout_mB	SA:Z:fchr2,13996555,+,50S40M,0,0;	YC:Z:CT	YD:Z:r]
*/
					/*if (sam_format1(h, d, &ks2) < 0) {
						fprintf(stderr, "Error writing output.\n");
						exit_code = 1;
						break;
					}*/
					if (((flag & 3) == 3) /*&& enoughMapQ >= myConfig.samples*/) {
						/*printf(">%d[%s]\n",checkMapQ(ChrIsHum, b, true),ks_str(&ks1));
						printf("-%d[%s]\n",checkMapQ(ChrIsHum, d, false),ks_str(&ks2));
						if (flag & 4) {
							printf("+%d[%s]\n",checkMapQ(ChrIsHum, d2, false),ks_str(&ks3));
						}
						printf("<--%d\n",enoughMapQ);*/
						if (sam_plp_push(ChrIsHum, pierCluster, b) == 0) {
							//printf("--HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							if ((!ChrIsHum[(d->core).tid]) && (flag & 2)) sam_plp_push(ChrIsHum, pierCluster, d);
							//if ((!ChrIsHum[(d2->core).tid]) && (flag & 4)) sam_plp_push(ChrIsHum, pierCluster, d2);
						} else {
							//print
							fprintf(fs,"[%s]\nHumRange=%s:%d-%d\n", BamID, h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							fprintf(fs,"VirRange=%s:%d-%d\n", h->target_name[(pierCluster->VirusRange).tid], (pierCluster->VirusRange).pos, (pierCluster->VirusRange).endpos);
							for (size_t i=0; i<kv_size(pierCluster->Reads);++i) {
								bam1_t *bi = kv_A(pierCluster->Reads, i);
								if (sam_format1(h, bi, &ks1) < 0) {
									fprintf(stderr, "Error writing output.\n");
									exit_code = 1;
									break;
								} else {
									fprintf(fs,"%s\n",ks1.s);
								}
							}
							fprintf(fs,"\n");
							//printf("HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							//fflush(fs);
							sam_plp_dectroy(pierCluster);
							pierCluster = sam_plp_init();
						}
					}
				}
				/*char *qname = bam_get_qname(b);
				if (sam_write1(out, h, b) < 0) {
					fprintf(stderr, "[x]Error writing output.\n");
					exit_code = 1;
					break;
				}*/
			}
/*			r = sam_close(out);   // stdout can only be closed once
			if (r < 0) {
				fprintf(stderr, "Error closing output.\n");
				exit_code = 1;
			}
*/
			hts_idx_destroy(idx);
			bam_destroy1(b);
			bam_destroy1(d);
			bam_destroy1(d2);
			bam_hdr_destroy(h);
			r = sam_close(in);
			free(ChrIsHum);
#ifdef DEBUGa
			fflush(NULL);
			//pressAnyKey();
#endif
			sam_plp_dectroy(pierCluster);
			//printf("<[%d]:\n",bami);
		}
	}
	fclose(fs);
	getPairedSam(NULL, NULL, NULL, NULL);	// sam_close(fp2);
	//printf("---[%d]---\n",exit_code);
	bam_destroy1(bR1); bam_destroy1(bR2); bam_destroy1(bR3);
	ks_release(&ks1);
	ks_release(&ks2);
	ks_release(&ks3);
	return exit_code;
}
Exemple #14
0
/*
 * Computes entropy from integer frequencies for various encoding methods and
 * picks the best encoding.
 *
 * FIXME: we could reuse some of the code here for the actual encoding
 * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman.
 *
 * Returns the best codec to use.
 */
enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st) {
    enum cram_encoding best_encoding = E_NULL;
    int best_size = INT_MAX, bits;
    int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k;
    int *vals = NULL, *freqs = NULL, vals_alloc = 0, *codes;

    //cram_stats_dump(st);

    /* Count number of unique symbols */
    for (nvals = i = 0; i < MAX_STAT_VAL; i++) {
	if (!st->freqs[i])
	    continue;
	if (nvals >= vals_alloc) {
	    vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
	    vals  = realloc(vals,  vals_alloc * sizeof(int));
	    freqs = realloc(freqs, vals_alloc * sizeof(int));
	    if (!vals || !freqs) {
		if (vals)  free(vals);
		if (freqs) free(freqs);
		return E_HUFFMAN; // Cannot do much else atm
	    }
	}
	vals[nvals] = i;
	freqs[nvals] = st->freqs[i];
	ntot += freqs[nvals];
	if (max_val < i) max_val = i;
	if (min_val > i) min_val = i;
	nvals++;
    }
    if (st->h) {
	khint_t k;
	int i;

	for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
	    if (!kh_exist(st->h, k))
		continue;

	    if (nvals >= vals_alloc) {
		vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
		vals  = realloc(vals,  vals_alloc * sizeof(int));
		freqs = realloc(freqs, vals_alloc * sizeof(int));
		if (!vals || !freqs)
		    return E_HUFFMAN; // Cannot do much else atm
	    }
	    i = kh_key(st->h, k);
	    vals[nvals]=i;
	    freqs[nvals] = kh_val(st->h, k);
	    ntot += freqs[nvals];
	    if (max_val < i) max_val = i;
	    if (min_val > i) min_val = i;
	    nvals++;
	}
    }

    st->nvals = nvals;
    assert(ntot == st->nsamp);

    if (nvals <= 1) {
	free(vals);
	free(freqs);
	return E_HUFFMAN;
    }

    if (fd->verbose > 1)
	fprintf(stderr, "Range = %d..%d, nvals=%d, ntot=%d\n",
		min_val, max_val, nvals, ntot);

    /* Theoretical entropy */
//    if (fd->verbose > 1) {
//	double dbits = 0;
//	for (i = 0; i < nvals; i++) {
//	    dbits += freqs[i] * log((double)freqs[i]/ntot);
//	}
//	dbits /= -log(2);
//	if (fd->verbose > 1)
//	    fprintf(stderr, "Entropy = %f\n", dbits);
//    }

    if (nvals > 1 && ntot > 256) {
#if 0
	/*
	 * CRUDE huffman estimator. Round to closest and round up from 0
	 * to 1 bit.
	 *
	 * With and without ITF8 incase we have a few discrete values but with
	 * large magnitude.
	 *
	 * Note rans0/arith0 and Z_HUFFMAN_ONLY vs internal huffman can be
	 * compared in this way, but order-1 (eg rans1) or maybe LZ77 modes
	 * may detect the correlation of high bytes to low bytes in multi-
	 * byte values. So this predictor breaks down.
	 */
	double dbits = 0;  // entropy + ~huffman
	double dbitsH = 0;
	double dbitsE = 0; // external entropy + ~huffman
	double dbitsEH = 0;
	int F[256] = {0}, n = 0;
	double e = 0; // accumulated error bits
	for (i = 0; i < nvals; i++) {
	    double x; int X;
	    unsigned int v = vals[i];

	    //Better encoding would cope with sign.
	    //v = ABS(vals[i])*2+(vals[i]<0);

	    if (!(v & ~0x7f)) {
		F[v]             += freqs[i], n+=freqs[i];
	    } else if (!(v & ~0x3fff)) {
		F[(v>>8) |0x80] += freqs[i];
		F[ v     &0xff] += freqs[i], n+=2*freqs[i];
	    } else if (!(v & ~0x1fffff)) {
Exemple #15
0
static int
cachefkcrt_exist_cb(cache_iter_t it)
{
	return kh_exist(certmap, it);
}
Exemple #16
0
static void TGM_FragLenHistToMature(TGM_FragLenHist* pHist)
{
    khash_t(fragLen)* pRawHist = pHist->rawHist;

    pHist->size = kh_size(pRawHist);

    if (pHist->size > pHist->capacity)
    {
        free(pHist->fragLen);
        free(pHist->freq);

        pHist->capacity = 2 * pHist->size;

        pHist->fragLen = (uint32_t*) malloc(pHist->capacity * sizeof(uint32_t));
        if(pHist->fragLen == NULL)
            TGM_ErrQuit("ERROR: Not enough memory for the storage of the fragment length array in the fragment length histogram object.\n");

        pHist->freq = (uint64_t*) malloc(pHist->capacity * sizeof(uint64_t));
        if(pHist->freq == NULL)
            TGM_ErrQuit("ERROR: Not enough memory for the storage of the frequency array in the fragment length histogram object.\n");
    }

    unsigned int i = 0;
    for (khiter_t khIter = kh_begin(pRawHist); khIter != kh_end(pRawHist); ++khIter)
    {
        if (kh_exist(pRawHist, khIter))
        {
            pHist->fragLen[i] = kh_key(pRawHist, khIter);
            ++i;
        }
    }

    qsort(pHist->fragLen, pHist->size, sizeof(uint32_t), CompareFragLenBin);

    double cumFreq = 0.0;
    double totalFragLen = 0.0;
    uint64_t totalFreq = pHist->modeCount[0];
    double cdf = 0;
    uint32_t fragLenQual = 0;

    TGM_Bool foundMedian = FALSE;
    for (unsigned int j = 0; j != pHist->size; ++j)
    {
        khiter_t khIter = kh_get(fragLen, pRawHist, pHist->fragLen[j]);
        if (khIter == kh_end(pRawHist))
            TGM_ErrQuit("ERROR: Cannot find the fragment length frequency from the hash table.\n");

        pHist->freq[j] = kh_value(pRawHist, khIter);

        totalFragLen += pHist->fragLen[j] * pHist->freq[j];
        cumFreq += pHist->freq[j];
        cdf = cumFreq / totalFreq;

        if (!foundMedian && cdf >= 0.5)
        {
            pHist->median = pHist->fragLen[j];
            foundMedian = TRUE;
        }

        cdf = cdf > 0.5 ? 1.0 - cdf : cdf;

        fragLenQual = DoubleRoundToInt(-10.0 * log10(cdf));
        kh_value(pRawHist, khIter) = fragLenQual;
    }

    pHist->mean = totalFragLen / totalFreq;

    pHist->stdev = 0.0;
    for (unsigned int j = 0; j != pHist->size; ++j)
    {
        pHist->stdev += (double) pHist->freq[j] * pow(pHist->mean - pHist->fragLen[j], 2);
    }

    if (totalFreq != 1)
        pHist->stdev = sqrt(pHist->stdev / (double) (totalFreq - 1));
}
static int output_stats_and_reset(struct bgpcorsaro_pfxmonitor_state_t *state,
                                  uint32_t interval_start)
{
  khiter_t k;
  khiter_t p;
  khiter_t a;
  int khret;
  uint8_t pfx_visible;
  uint32_t unique_pfxs = 0;
  khash_t(peer_asn_map) * pam;
  /* origin_asn -> num peer ASns*/
  khash_t(asn_count_map) *asn_np = NULL;

  if ((asn_np = kh_init(asn_count_map)) == NULL) {
    return -1;
  }

  /* for each prefix go through all peers */
  for (k = kh_begin(state->pfx_info); k != kh_end(state->pfx_info); ++k) {
    if (kh_exist(state->pfx_info, k) == 0) {
      continue;
    }
    /* reset counters */
    kh_clear(asn_count_map, asn_np);

    /* get peer-asn map for this prefix */
    pam = kh_value(state->pfx_info, k);

    /* save the origin asn visibility (i.e. how many peers' ASns
     * observe such information */

    /* for each peer, go through all origins */
    for (p = kh_begin(pam); p != kh_end(pam); ++p) {
      if (kh_exist(pam, p) == 0) {
        continue;
      }
      /* increment the counter for this ASN */
      if ((a = kh_get(asn_count_map, asn_np, kh_value(pam, p))) ==
          kh_end(asn_np)) {
        a = kh_put(asn_count_map, asn_np, kh_value(pam, p), &khret);
        kh_value(asn_np, a) = 1;
      } else {
        kh_value(asn_np, a)++;
      }
    }

    /* now asn_np has a complete count of the number of peers' ASns that
       observed
       each origin ASN */

    /* count the prefix and origins if their visibility
     * is above the threshold */
    pfx_visible = 0;
    for (a = kh_begin(asn_np); a != kh_end(asn_np); ++a) {
      if (kh_exist(asn_np, a) == 0) {
        continue;
      }
      /* the information is accounted only if it is
       * consistent on at least threshold peers' ASns */
      if (kh_value(asn_np, a) >= state->peer_asns_th) {
        pfx_visible = 1;
        bgpstream_id_set_insert(state->unique_origins, kh_key(asn_np, a));
      }
    }

    /* updating counters */
    unique_pfxs += pfx_visible;
  }

  DUMP_METRIC(unique_pfxs, state->interval_start, "%s.%s.%s.%s",
              state->metric_prefix, PLUGIN_NAME, state->ip_space_name,
              "prefixes_cnt");

  DUMP_METRIC(bgpstream_id_set_size(state->unique_origins),
              state->interval_start, "%s.%s.%s.%s", state->metric_prefix,
              PLUGIN_NAME, state->ip_space_name, "origin_ASns_cnt");

  bgpstream_id_set_clear(state->unique_origins);
  kh_destroy(asn_count_map, asn_np);

  return 0;
}
Exemple #18
0
static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int min_pos)
{
    if ( !srt->grp_str2int )
    {
        // first time here, initialize
        if ( !srt->pair )
        {
            if ( readers->collapse==COLLAPSE_NONE ) readers->collapse = BCF_SR_PAIR_EXACT;
            bcf_sr_set_opt(readers, BCF_SR_PAIR_LOGIC, readers->collapse);
        }
        bcf_sr_init_scores(srt);
        srt->grp_str2int = khash_str2int_init();
        srt->var_str2int = khash_str2int_init();
    }
    int k;
    khash_t(str2int) *hash;
    hash = srt->grp_str2int;
    for (k=0; k < kh_end(hash); k++)
        if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k));
    hash = srt->var_str2int;
    for (k=0; k < kh_end(hash); k++)
        if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k));
    kh_clear(str2int, srt->grp_str2int);
    kh_clear(str2int, srt->var_str2int);
    srt->ngrp = srt->nvar = srt->nvset = 0;

    grp_t grp;
    memset(&grp,0,sizeof(grp_t));

    // group VCFs into groups, each with a unique combination of variants in the duplicate lines
    int ireader,ivar,irec,igrp,ivset,iact;
    for (ireader=0; ireader<readers->nreaders; ireader++) srt->vcf_buf[ireader].nrec = 0;
    for (iact=0; iact<srt->nactive; iact++)
    {
        ireader = srt->active[iact];
        bcf_sr_t *reader = &readers->readers[ireader];
        int rid   = bcf_hdr_name2id(reader->header, chr);
        grp.nvar  = 0;
        hts_expand(int,reader->nbuffer,srt->moff,srt->off);
        srt->noff  = 0;
        srt->str.l = 0;
        for (irec=1; irec<=reader->nbuffer; irec++)
        {
            bcf1_t *line = reader->buffer[irec];
            if ( line->rid!=rid || line->pos!=min_pos ) break;

            if ( srt->str.l ) kputc(';',&srt->str);
            srt->off[srt->noff++] = srt->str.l;
            size_t beg = srt->str.l;
            for (ivar=1; ivar<line->n_allele; ivar++)
            {
                if ( ivar>1 ) kputc(',',&srt->str);
                kputs(line->d.allele[0],&srt->str);
                kputc('>',&srt->str);
                kputs(line->d.allele[ivar],&srt->str);
            }
            if ( line->n_allele==1 )
            {
                kputs(line->d.allele[0],&srt->str);
                kputsn(">.",2,&srt->str);
            }

            // Create new variant or attach to existing one. But careful, there can be duplicate
            // records with the same POS,REF,ALT (e.g. in dbSNP-b142)
            char *var_str = beg + srt->str.s;
            int ret, var_idx = 0, var_end = srt->str.l;
            while ( 1 )
            {
                ret = khash_str2int_get(srt->var_str2int, var_str, &ivar);
                if ( ret==-1 ) break;

                var_t *var = &srt->var[ivar];
                if ( var->vcf[var->nvcf-1] != ireader ) break;

                srt->str.l = var_end;
                kputw(var_idx, &srt->str);
                var_str = beg + srt->str.s;
                var_idx++;
            }
            if ( ret==-1 )
            {
                ivar = srt->nvar++;
                hts_expand0(var_t,srt->nvar,srt->mvar,srt->var);
                srt->var[ivar].nvcf = 0;
                khash_str2int_set(srt->var_str2int, strdup(var_str), ivar);
                free(srt->var[ivar].str);   // possible left-over from the previous position
            }
            var_t *var = &srt->var[ivar];
            var->nalt = line->n_allele - 1;
            var->type = bcf_get_variant_types(line);
            srt->str.s[var_end] = 0;
            if ( ret==-1 )
                var->str = strdup(var_str);

            int mvcf = var->mvcf;
            var->nvcf++;
            hts_expand0(int*, var->nvcf, var->mvcf, var->vcf);
            if ( mvcf != var->mvcf ) var->rec = (bcf1_t **) realloc(var->rec,sizeof(bcf1_t*)*var->mvcf);
            var->vcf[var->nvcf-1] = ireader;
            var->rec[var->nvcf-1] = line;

            grp.nvar++;
            hts_expand(var_t,grp.nvar,grp.mvar,grp.var);
            grp.var[grp.nvar-1] = ivar;
        }
        char *grp_key = grp_create_key(srt);
        int ret = khash_str2int_get(srt->grp_str2int, grp_key, &igrp);
        if ( ret==-1 )
        {
            igrp = srt->ngrp++;
            hts_expand0(grp_t, srt->ngrp, srt->mgrp, srt->grp);
            free(srt->grp[igrp].var);
            srt->grp[igrp] = grp;
            srt->grp[igrp].key = grp_key;
            khash_str2int_set(srt->grp_str2int, grp_key, igrp);
            memset(&grp,0,sizeof(grp_t));
        }
        else
            free(grp_key);
        srt->grp[igrp].nvcf++;
    }
    free(grp.var);

    // initialize bitmask - which groups is the variant present in
    for (ivar=0; ivar<srt->nvar; ivar++)
    {
        srt->var[ivar].mask = kbs_resize(srt->var[ivar].mask, srt->ngrp);
        kbs_clear(srt->var[ivar].mask);
    }
    for (igrp=0; igrp<srt->ngrp; igrp++)
    {
        for (ivar=0; ivar<srt->grp[igrp].nvar; ivar++)
        {
            int i = srt->grp[igrp].var[ivar];
            kbs_insert(srt->var[i].mask, igrp);
        }
    }

    // create the initial list of variant sets
    for (ivar=0; ivar<srt->nvar; ivar++)
    {
        ivset = srt->nvset++;
        hts_expand0(varset_t, srt->nvset, srt->mvset, srt->vset);

        varset_t *vset = &srt->vset[ivset];
        vset->nvar = 1;
        hts_expand0(var_t, vset->nvar, vset->mvar, vset->var);
        vset->var[vset->nvar-1] = ivar;
        var_t *var  = &srt->var[ivar];
        vset->cnt   = var->nvcf;
        vset->mask  = kbs_resize(vset->mask, srt->ngrp);
        kbs_clear(vset->mask);
        kbs_bitwise_or(vset->mask, var->mask);

        int type = 0;
        if ( var->type==VCF_REF ) type |= SR_REF;
        else
        {
            if ( var->type & VCF_SNP ) type |= SR_SNP;
            if ( var->type & VCF_MNP ) type |= SR_SNP;
            if ( var->type & VCF_INDEL ) type |= SR_INDEL;
            if ( var->type & VCF_OTHER ) type |= SR_OTHER;
        }
        var->type = type;
    }
#if DEBUG_VSETS
    debug_vsets(srt);
#endif

    // initialize the pairing matrix
    hts_expand(int, srt->ngrp*srt->nvset, srt->mpmat, srt->pmat);
    hts_expand(int, srt->nvset, srt->mcnt, srt->cnt);
    memset(srt->pmat, 0, sizeof(*srt->pmat)*srt->ngrp*srt->nvset);
    for (ivset=0; ivset<srt->nvset; ivset++)
    {
        varset_t *vset = &srt->vset[ivset];
        for (igrp=0; igrp<srt->ngrp; igrp++) srt->pmat[ivset*srt->ngrp+igrp] = 0;
        srt->cnt[ivset] = vset->cnt;
    }

    // pair the lines
    while ( srt->nvset )
    {
#if DEBUG_VSETS
    fprintf(stderr,"\n");
    debug_vsets(srt);
#endif

        int imax = 0;
        for (ivset=1; ivset<srt->nvset; ivset++)
            if ( srt->cnt[imax] < srt->cnt[ivset] ) imax = ivset;

        int ipair = -1;
        uint32_t max_score = 0;
        for (ivset=0; ivset<srt->nvset; ivset++)
        {
            if ( kbs_logical_and(srt->vset[imax].mask,srt->vset[ivset].mask) ) continue;   // cannot be merged
            uint32_t score = pairing_score(srt, imax, ivset);
            // fprintf(stderr,"score: %d %d, logic=%d \t..\t %u\n", imax,ivset,srt->pair,score);
            if ( max_score < score ) { max_score = score; ipair = ivset; }
        }

        // merge rows creating a new variant set this way
        if ( ipair!=-1 && ipair!=imax )
        {
            imax = merge_vsets(srt, imax, ipair);
            continue;
        }

        push_vset(srt, imax);
    }

    srt->chr = chr;
    srt->pos = min_pos;
}
Exemple #19
0
static void
gc_mark_phase(pic_state *pic)
{
  struct context *cxt;
  size_t j;

  assert(pic->heap->weaks == NULL);

  /* context */
  for (cxt = pic->cxt; cxt != NULL; cxt = cxt->prev) {
    if (cxt->fp) gc_mark_object(pic, (struct object *)cxt->fp);
    if (cxt->sp) gc_mark_object(pic, (struct object *)cxt->sp);
    if (cxt->irep) gc_mark_object(pic, (struct object *)cxt->irep);
  }

  /* arena */
  for (j = 0; j < pic->ai; ++j) {
    gc_mark_object(pic, (struct object *)pic->arena[j]);
  }

  /* global variables */
  gc_mark(pic, pic->globals);

  /* dynamic environment */
  gc_mark(pic, pic->dyn_env);

  /* top continuation */
  gc_mark(pic, pic->halt);

  /* features */
  gc_mark(pic, pic->features);

  /* weak maps */
  do {
    struct object *key;
    pic_value val;
    int it;
    khash_t(weak) *h;
    struct weak *weak;

    j = 0;
    weak = pic->heap->weaks;

    while (weak != NULL) {
      h = &weak->hash;
      for (it = kh_begin(h); it != kh_end(h); ++it) {
        if (! kh_exist(h, it))
          continue;
        key = kh_key(h, it);
        val = kh_val(h, it);
        if (is_alive(key)) {
          if (obj_p(pic, val) && ! is_alive(obj_ptr(pic, val))) {
            gc_mark(pic, val);
            ++j;
          }
        }
      }
      weak = weak->prev;
    }
  } while (j > 0);
}
Exemple #20
0
static void rmdupse_buf(buffer_t *buf)
{
	khash_t(32) *h;
	uint32_t key;
	khint_t k;
	int mpos, i, upper;
	listelem_t *p;
	mpos = 0x7fffffff;
	mpos = (buf->x == buf->n)? buf->buf[buf->x-1].b->core.pos : 0x7fffffff;
	upper = (buf->x < 0)? buf->n : buf->x;
	// fill the hash table
	h = kh_init(32);
	for (i = 0; i < upper; ++i) {
		elem_t *e = buf->buf + i;
		int ret;
		if (e->score < 0) continue;
		if (e->rpos >= 0) {
			if (e->rpos <= mpos) key = (uint32_t)e->rpos<<1 | 1;
			else continue;
		} else {
			if (e->b->core.pos < mpos) key = (uint32_t)e->b->core.pos<<1;
			else continue;
		}
		k = kh_put(32, h, key, &ret);
		p = &kh_val(h, k);
		if (ret == 0) { // present in the hash table
			if (p->n == p->m) {
				p->m <<= 1;
				p->a = (int*)realloc(p->a, p->m * sizeof(int));
			}
			p->a[p->n++] = i;
		} else {
			p->m = p->n = 1;
			p->a = (int*)calloc(p->m, sizeof(int));
			p->a[0] = i;
		}
	}
	// rmdup
	for (k = kh_begin(h); k < kh_end(h); ++k) {
		if (kh_exist(h, k)) {
			int max, maxi;
			p = &kh_val(h, k);
			// get the max
			for (i = max = 0, maxi = -1; i < p->n; ++i) {
				if (buf->buf[p->a[i]].score > max) {
					max = buf->buf[p->a[i]].score;
					maxi = i;
				}
			}
			// mark the elements
			for (i = 0; i < p->n; ++i) {
				buf->buf[p->a[i]].score = -1;
				if (i != maxi) {
					bam_destroy1(buf->buf[p->a[i]].b);
					buf->buf[p->a[i]].b = 0;
				}
			}
			// free
			free(p->a);
		}
	}
	kh_destroy(32, h);
}
Exemple #21
0
int git_strmap_has_data(git_strmap *map, size_t idx)
{
	return kh_exist(map, idx);
}
Exemple #22
0
bool HashMap_exists(HashMap* map, bstring key) {

    khint_t k = kh_get(str, map->h, bdata(key));
    return kh_exist(map->h, k);
}
Exemple #23
0
int main_samview(int argc, char *argv[])
{
 int index;
    for(index = 0; index < argc; index++) {
        printf("The %d is %s\n",index,argv[index]);
    }
    getchar();return 0;
    int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0;
    int is_long_help = 0, n_threads = 0;
    int64_t count = 0;
    samFile *in = 0, *out = 0, *un_out=0;
    bam_hdr_t *header = NULL;
    char out_mode[5], out_un_mode[5], *out_format = "";
    char *fn_in = 0, *fn_out = 0, *fn_list = 0, *q, *fn_un_out = 0;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;

    samview_settings_t settings = {
        .rghash = NULL,
        .min_mapQ = 0,
        .flag_on = 0,
        .flag_off = 0,
        .min_qlen = 0,
        .remove_B = 0,
        .subsam_seed = 0,
        .subsam_frac = -1.,
        .library = NULL,
        .bed = NULL,
    };

    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T'),
        { "threads", required_argument, NULL, '@' },
        { NULL, 0, NULL, 0 }
    };

    /* parse command-line options */
    strcpy(out_mode, "w");
    strcpy(out_un_mode, "w");
    while ((c = getopt_long(argc, argv,
                            "SbBcCt:h1Ho:O:q:f:F:ul:r:?T:R:L:s:@:m:x:U:",
                            lopts, NULL)) >= 0) {
        switch (c) {
        case 's':
            if ((settings.subsam_seed = strtol(optarg, &q, 10)) != 0) {
                srand(settings.subsam_seed);
                settings.subsam_seed = rand();
            }
            settings.subsam_frac = strtod(q, &q);
            break;
        case 'm': settings.min_qlen = atoi(optarg); break;
        case 'c': is_count = 1; break;
        case 'S': break;
        case 'b': out_format = "b"; break;
        case 'C': out_format = "c"; break;
        case 't': fn_list = strdup(optarg); break;
        case 'h': is_header = 1; break;
        case 'H': is_header_only = 1; break;
        case 'o': fn_out = strdup(optarg); break;
        case 'U': fn_un_out = strdup(optarg); break;
        case 'f': settings.flag_on |= strtol(optarg, 0, 0); break;
        case 'F': settings.flag_off |= strtol(optarg, 0, 0); break;
        case 'q': settings.min_mapQ = atoi(optarg); break;
        case 'u': compress_level = 0; break;
        case '1': compress_level = 1; break;
        case 'l': settings.library = strdup(optarg); break;
        case 'L':
            if ((settings.bed = bed_read(optarg)) == NULL) {
                print_error_errno("view", "Could not read file \"%s\"", optarg);
                ret = 1;
                goto view_end;
            }
            break;
        case 'r':
            if (add_read_group_single("view", &settings, optarg) != 0) {
                ret = 1;
                goto view_end;
            }
            break;
        case 'R':
            if (add_read_groups_file("view", &settings, optarg) != 0) {
                ret = 1;
                goto view_end;
            }
            break;
                /* REMOVED as htslib doesn't support this
        //case 'x': out_format = "x"; break;
        //case 'X': out_format = "X"; break;
                 */
        case '?': is_long_help = 1; break;
        case 'B': settings.remove_B = 1; break;
        case '@': n_threads = strtol(optarg, 0, 0); break;
        case 'x':
            {
                if (strlen(optarg) != 2) {
                    fprintf(stderr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
                    return usage(stderr, EXIT_FAILURE, is_long_help);
                }
                settings.remove_aux = (char**)realloc(settings.remove_aux, sizeof(char*) * (++settings.remove_aux_len));
                settings.remove_aux[settings.remove_aux_len-1] = optarg;
            }
            break;

        default:
            if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0)
                return usage(stderr, EXIT_FAILURE, is_long_help);
            break;
        }
    }
    if (compress_level >= 0 && !*out_format) out_format = "b";
    if (is_header_only) is_header = 1;
    // File format auto-detection first
    if (fn_out)    sam_open_mode(out_mode+1,    fn_out,    NULL);
    if (fn_un_out) sam_open_mode(out_un_mode+1, fn_un_out, NULL);
    // Overridden by manual -b, -C
    if (*out_format)
        out_mode[1] = out_un_mode[1] = *out_format;
    out_mode[2] = out_un_mode[2] = '\0';
    // out_(un_)mode now 1 or 2 bytes long, followed by nul.
    if (compress_level >= 0) {
        char tmp[2];
        tmp[0] = compress_level + '0'; tmp[1] = '\0';
        strcat(out_mode, tmp);
        strcat(out_un_mode, tmp);
    }
    if (argc == optind && isatty(STDIN_FILENO)) return usage(stdout, EXIT_SUCCESS, is_long_help); // potential memory leak...

    fn_in = (optind < argc)? argv[optind] : "-";
    // generate the fn_list if necessary
    if (fn_list == 0 && ga.reference) fn_list = samfaipath(ga.reference);
    // open file handlers
    if ((in = sam_open_format(fn_in, "r", &ga.in)) == 0) {
        print_error_errno("view", "failed to open \"%s\" for reading", fn_in);
        ret = 1;
        goto view_end;
    }

    if (fn_list) {
        if (hts_set_fai_filename(in, fn_list) != 0) {
            fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
            ret = 1;
            goto view_end;
        }
    }
    if ((header = sam_hdr_read(in)) == 0) {
        fprintf(stderr, "[main_samview] fail to read the header from \"%s\".\n", fn_in);
        ret = 1;
        goto view_end;
    }
    if (settings.rghash) { // FIXME: I do not know what "bam_header_t::n_text" is for...
        char *tmp;
        int l;
        tmp = drop_rg(header->text, settings.rghash, &l);
        free(header->text);
        header->text = tmp;
        header->l_text = l;
    }
    if (!is_count) {
        if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
            print_error_errno("view", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output");
            ret = 1;
            goto view_end;
        }
        if (fn_list) {
            if (hts_set_fai_filename(out, fn_list) != 0) {
                fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
                ret = 1;
                goto view_end;
            }
        }
        if (*out_format || is_header ||
            out_mode[1] == 'b' || out_mode[1] == 'c' ||
            (ga.out.format != sam && ga.out.format != unknown_format))  {
            if (sam_hdr_write(out, header) != 0) {
                fprintf(stderr, "[main_samview] failed to write the SAM header\n");
                ret = 1;
                goto view_end;
            }
        }
        if (fn_un_out) {
            if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
                print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out);
                ret = 1;
                goto view_end;
            }
            if (fn_list) {
                if (hts_set_fai_filename(un_out, fn_list) != 0) {
                    fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
                    ret = 1;
                    goto view_end;
                }
            }
            if (*out_format || is_header ||
                out_un_mode[1] == 'b' || out_un_mode[1] == 'c' ||
                (ga.out.format != sam && ga.out.format != unknown_format))  {
                if (sam_hdr_write(un_out, header) != 0) {
                    fprintf(stderr, "[main_samview] failed to write the SAM header\n");
                    ret = 1;
                    goto view_end;
                }
            }
        }
    }

    if (n_threads > 1) { if (out) hts_set_threads(out, n_threads); }
    if (is_header_only) goto view_end; // no need to print alignments

    if (optind + 1 >= argc) { // convert/print the entire file
        bam1_t *b = bam_init1();
        int r;
        while ((r = sam_read1(in, header, b)) >= 0) { // read one alignment from `in'
            if (!process_aln(header, b, &settings)) {
                if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
                count++;
            } else {
                if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
            }
        }
        if (r < -1) {
            fprintf(stderr, "[main_samview] truncated file.\n");
            ret = 1;
        }
        bam_destroy1(b);
    } else { // retrieve alignments in specified regions
        int i;
        bam1_t *b;
        hts_idx_t *idx = sam_index_load(in, fn_in); // load index
        if (idx == 0) { // index is unavailable
            fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
            ret = 1;
            goto view_end;
        }
        b = bam_init1();
        for (i = optind + 1; i < argc; ++i) {
            int result;
            hts_itr_t *iter = sam_itr_querys(idx, header, argv[i]); // parse a region in the format like `chr2:100-200'
            if (iter == NULL) { // region invalid or reference name not found
                int beg, end;
                if (hts_parse_reg(argv[i], &beg, &end))
                    fprintf(stderr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]);
                else
                    fprintf(stderr, "[main_samview] region \"%s\" could not be parsed. Continue anyway.\n", argv[i]);
                continue;
            }
            // fetch alignments
            while ((result = sam_itr_next(in, iter, b)) >= 0) {
                if (!process_aln(header, b, &settings)) {
                    if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
                    count++;
                } else {
                    if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
                }
            }
            hts_itr_destroy(iter);
            if (result < -1) {
                fprintf(stderr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]);
                ret = 1;
                break;
            }
        }
        bam_destroy1(b);
        hts_idx_destroy(idx); // destroy the BAM index
    }

view_end:
    if (is_count && ret == 0)
        printf("%" PRId64 "\n", count);

    // close files, free and return
    if (in) check_sam_close("view", in, fn_in, "standard input", &ret);
    if (out) check_sam_close("view", out, fn_out, "standard output", &ret);
    if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret);

    free(fn_list); free(fn_out); free(settings.library);  free(fn_un_out);
    sam_global_args_free(&ga);
    if ( header ) bam_hdr_destroy(header);
    if (settings.bed) bed_destroy(settings.bed);
    if (settings.rghash) {
        khint_t k;
        for (k = 0; k < kh_end(settings.rghash); ++k)
            if (kh_exist(settings.rghash, k)) free((char*)kh_key(settings.rghash, k));
        kh_destroy(rg, settings.rghash);
    }
    if (settings.remove_aux_len) {
        free(settings.remove_aux);
    }
    return ret;
}

static int usage(FILE *fp, int exit_status, int is_long_help)
{
    fprintf(fp,
"\n"
"Usage: samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]\n"
"\n"
"Options:\n"
// output options
"  -b       output BAM\n"
"  -C       output CRAM (requires -T)\n"
"  -1       use fast BAM compression (implies -b)\n"
"  -u       uncompressed BAM output (implies -b)\n"
"  -h       include header in SAM output\n"
"  -H       print SAM header only (no alignments)\n"
"  -c       print only the count of matching records\n"
"  -o FILE  output file name [stdout]\n"
"  -U FILE  output reads not selected by filters to FILE [null]\n"
// extra input
"  -t FILE  FILE listing reference names and lengths (see long help) [null]\n"
// read filters
"  -L FILE  only include reads overlapping this BED FILE [null]\n"
"  -r STR   only include reads in read group STR [null]\n"
"  -R FILE  only include reads with read group listed in FILE [null]\n"
"  -q INT   only include reads with mapping quality >= INT [0]\n"
"  -l STR   only include reads in library STR [null]\n"
"  -m INT   only include reads with number of CIGAR operations consuming\n"
"           query sequence >= INT [0]\n"
"  -f INT   only include reads with all bits set in INT set in FLAG [0]\n"
"  -F INT   only include reads with none of the bits set in INT set in FLAG [0]\n"
// read processing
"  -x STR   read tag to strip (repeatable) [null]\n"
"  -B       collapse the backward CIGAR operation\n"
"  -s FLOAT integer part sets seed of random number generator [0];\n"
"           rest sets fraction of templates to subsample [no subsampling]\n"
// general options
"  -@, --threads INT\n"
"           number of BAM/CRAM compression threads [0]\n"
"  -?       print long help, including note about region specification\n"
"  -S       ignored (input format is auto-detected)\n");

    sam_global_opt_help(fp, "-.O.T");
    fprintf(fp, "\n");

    if (is_long_help)
        fprintf(fp,
"Notes:\n"
"\n"
"1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
"   Further control over the CRAM format can be specified by using the\n"
"   --output-fmt-option, e.g. to specify the number of sequences per slice\n"
"   and to use avoid reference based compression:\n"
"\n"
"\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
"\t   --output-fmt-option no_ref -o out.cram in.bam\n"
"\n"
"   Options can also be specified as a comma separated list within the\n"
"   --output-fmt value too.  For example this is equivalent to the above\n"
"\n"
"\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
"\t   -o out.cram in.bam\n"
"\n"
"2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
"   two fields of each line consisting of the reference name and the\n"
"   corresponding sequence length. The `.fai' file generated by \n"
"   `samtools faidx' is suitable for use as this file. This may be an\n"
"   empty file if reads are unaligned.\n"
"\n"
"3. SAM->BAM conversion:  samtools view -bT ref.fa in.sam.gz\n"
"\n"
"4. BAM->SAM conversion:  samtools view -h in.bam\n"
"\n"
"5. A region should be presented in one of the following formats:\n"
"   `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
"   specified, the input alignment file must be a sorted and indexed\n"
"   alignment (BAM/CRAM) file.\n"
"\n"
"6. Option `-u' is preferred over `-b' when the output is piped to\n"
"   another samtools command.\n"
"\n");

    return exit_status;
}
Exemple #24
0
/** Implements the end_interval function of the plugin API */
int corsaro_dos_end_interval(corsaro_t *corsaro, corsaro_interval_t *int_end)
{
  int this_interval = int_end->time-STATE(corsaro)->first_interval;

  khiter_t i;
  attack_vector_t *vector;
  attack_vector_t **attack_arr = NULL;
  int attack_arr_cnt = 0;

  uint8_t gbuf[12];
  uint8_t cntbuf[4];

  if(this_interval < CORSARO_DOS_INTERVAL)
    {
      /* we haven't run for long enough to dump */
      return 0;
    }
  else
    {
      /* we either have hit exactly the right amount of time,
	 or we have gone for too long, dump now and reset the counter */
      STATE(corsaro)->first_interval = int_end->time;
      /* fall through and continue to dump */
    }

  /* this is an interval we care about */

  /* malloc an array big enough to hold the entire hash even though we wont
     need it to be that big */
  if((attack_arr =
      malloc(sizeof(attack_vector_t *)*
	     kh_size(STATE(corsaro)->attack_hash))) == NULL)
    {
      corsaro_log(__func__, corsaro,
		  "could not malloc array for attack vectors");
      return -1;
    }

  /* classify the flows and dump the attack ones */

  for(i = kh_begin(STATE(corsaro)->attack_hash);
      i != kh_end(STATE(corsaro)->attack_hash); ++i)
    {
      if(kh_exist(STATE(corsaro)->attack_hash, i))
	{
	  vector = kh_key(STATE(corsaro)->attack_hash, i);

	  if(attack_vector_is_expired(vector, int_end->time) != 0)
	    {
	      kh_del(av, STATE(corsaro)->attack_hash, i);
	      attack_vector_free(vector);
	      vector = NULL;
	    }
	  else if(attack_vector_is_attack(corsaro, vector, int_end->time) != 0)
	    {
	      /* this is an attack */
	      /* add it to the attack array so we can know how many
		 before we dump it */
	      attack_arr[attack_arr_cnt] = vector;
	      attack_arr_cnt++;
	    }
	  else
	    {
	      attack_vector_reset(vector);
	    }
	}
    }

  corsaro_io_write_interval_start(corsaro, STATE(corsaro)->outfile,
				  &corsaro->interval_start);
  if(corsaro->global_file != NULL)
    {
      corsaro_io_write_plugin_start(corsaro, corsaro->global_file,
				    PLUGIN(corsaro));
    }
  if(CORSARO_FILE_MODE(STATE(corsaro)->outfile) == CORSARO_FILE_MODE_ASCII)
    {
      if(corsaro->global_file != NULL)
	{
	  /* global stats */
	  /* dump the number of mismatched packets and vectors */
	  corsaro_file_printf(corsaro, corsaro->global_file,
			      "mismatch: %"PRIu32"\n"
			      "attack_vectors: %"PRIu32"\n"
			      "non-attack_vectors: %"PRIu32"\n",
			      STATE(corsaro)->number_mismatched_packets,
			      attack_arr_cnt,
			      kh_size(STATE(corsaro)->attack_hash)
			      -attack_arr_cnt);
	}

      /* dump the number of vectors */
      corsaro_file_printf(corsaro, STATE(corsaro)->outfile, "%"PRIu32"\n",
			attack_arr_cnt);
      /* dump the vectors */
      for(i = 0; i < attack_arr_cnt; i++)
	{
	  if(ascii_dump(corsaro, attack_arr[i]) != 0)
	    {
	      corsaro_log(__func__, corsaro, "could not dump hash");
	      return -1;
	    }
	  /* reset the interval stats */
	  attack_vector_reset(attack_arr[i]);
	}
    }
  else if(CORSARO_FILE_MODE(STATE(corsaro)->outfile) == CORSARO_FILE_MODE_BINARY)
      {
	if(corsaro->global_file != NULL)
	  {
	    /* global stats */
	    bytes_htonl(&gbuf[0], STATE(corsaro)->number_mismatched_packets);
	    bytes_htonl(&gbuf[4], attack_arr_cnt);
	    bytes_htonl(&gbuf[8],
			kh_size(STATE(corsaro)->attack_hash)-attack_arr_cnt);
	    if(corsaro_file_write(corsaro, corsaro->global_file,
				  &gbuf[0], 12) != 12)
	      {
		corsaro_log(__func__, corsaro,
			    "could not dump global stats to file");
		return -1;
	      }
	  }

	/* dump the number of vectors */
	bytes_htonl(&cntbuf[0], attack_arr_cnt);
	if(corsaro_file_write(corsaro, STATE(corsaro)->outfile,
			    &cntbuf[0], 4) != 4)
	  {
	    corsaro_log(__func__, corsaro,
			"could not dump vector count to file");
	    return -1;
	  }
	/* dump the vectors */
	for(i = 0; i < attack_arr_cnt; i++)
	  {
	    if(binary_dump(corsaro, attack_arr[i]) != 0)
	      {
		corsaro_log(__func__, corsaro, "could not dump hash");
		return -1;
	      }
	    attack_vector_reset(attack_arr[i]);
	  }
      }
  else
    {
      corsaro_log(__func__, corsaro, "invalid mode");
      return -1;
    }
  if(corsaro->global_file != NULL)
    {
      corsaro_io_write_plugin_end(corsaro, corsaro->global_file,
				  PLUGIN(corsaro));
    }
  corsaro_io_write_interval_end(corsaro, STATE(corsaro)->outfile, int_end);

  STATE(corsaro)->number_mismatched_packets = 0;

  free(attack_arr);

  /* if we are rotating, now is when we should do it */
  if(corsaro_is_rotate_interval(corsaro))
    {
      /* close the current file */
      if(STATE(corsaro)->outfile != NULL)
	{
	  corsaro_file_close(corsaro, STATE(corsaro)->outfile);
	  STATE(corsaro)->outfile = NULL;
	}
    }

  return 0;
}
Exemple #25
0
void
pic_gc(pic_state *pic)
{
  struct context *cxt;
  size_t j;
  khash_t(oblist) *s = &pic->oblist;
  struct symbol *sym;
  int it;
  struct object *obj, *prev, *next;

  assert(pic->gc_attrs == NULL);

  if (! pic->gc_enable) {
    return;
  }

  /* scan objects */

  for (cxt = pic->cxt; cxt != NULL; cxt = cxt->prev) {
    if (cxt->fp) gc_mark_object(pic, (struct object *)cxt->fp);
    if (cxt->sp) gc_mark_object(pic, (struct object *)cxt->sp);
    if (cxt->irep) gc_mark_object(pic, (struct object *)cxt->irep);
    gc_mark(pic, cxt->conts);
  }

  for (j = 0; j < pic->ai; ++j) {
    gc_mark_object(pic, (struct object *)pic->arena[j]);
  }

  gc_mark(pic, pic->globals);
  gc_mark(pic, pic->halt);

  /* scan weak references */

  do {
    struct object *key;
    pic_value val;
    int it;
    khash_t(attr) *h;
    struct attr *attr;

    j = 0;
    attr = pic->gc_attrs;

    while (attr != NULL) {
      h = &attr->hash;
      for (it = kh_begin(h); it != kh_end(h); ++it) {
        if (! kh_exist(h, it))
          continue;
        key = kh_key(h, it);
        val = kh_val(h, it);
        if (is_alive(key)) {
          if (pic_obj_p(pic, val) && ! is_alive((struct object *) pic_ptr(pic, val))) {
            gc_mark(pic, val);
            ++j;
          }
        }
      }
      attr = attr->prev;
    }
  } while (j > 0);

  /* reclaim dead weak references */

  while (pic->gc_attrs != NULL) {
    khash_t(attr) *h = &pic->gc_attrs->hash;
    for (it = kh_begin(h); it != kh_end(h); ++it) {
      if (! kh_exist(h, it))
        continue;
      obj = kh_key(h, it);
      if (! is_alive(obj)) {
        kh_del(attr, h, it);
      }
    }
    pic->gc_attrs = pic->gc_attrs->prev;
  }

  for (it = kh_begin(s); it != kh_end(s); ++it) {
    if (! kh_exist(s, it))
      continue;
    sym = kh_val(s, it);
    if (sym && ! is_alive((struct object *)sym)) {
      kh_del(oblist, s, it);
    }
  }

  /* reclaim dead objects */

  for (prev = &pic->gc_head, obj = prev->next; obj != &pic->gc_head; prev = obj, obj = next) {
    next = obj->next;
    if (is_alive(obj)) {
      unmark(obj);
    } else {
      gc_finalize_object(pic, obj);
      pic_free(pic, obj);
      prev->next = next;
      obj = prev;
    }
  }
}
Exemple #26
0
ERR_VALUE kmer_freq_distribution(const PROGRAM_OPTIONS *Options, const uint32_t KMerSize, const ONE_READ *Reads, const size_t ReadCount)
{
	int err;
	size_t maxValue = 0;
	khiter_t it;
	size_t kmerCount = 0;
	char *kmerString = NULL;
	khash_t(kc) *table = kh_init(kc);
	ERR_VALUE ret = ERR_INTERNAL_ERROR;

	ret = utils_calloc(KMerSize + 1, sizeof(char), &kmerString);
	if (ret == ERR_SUCCESS) {
		const ONE_READ *r = Reads;
		
		kmerString[KMerSize] = '\0';
		for (size_t i = 0; i < ReadCount; ++i) {
			const READ_PART *p = &r->Part;
			
				read_split(r);
				if (p->ReadSequenceLength >= KMerSize) {
					for (size_t j = 0; j < p->ReadSequenceLength - KMerSize + 1; ++j) {
						char *s = NULL;

						memcpy(kmerString, p->ReadSequence + j, KMerSize*sizeof(char));
						ret = utils_copy_string(kmerString, &s);
						if (ret == ERR_SUCCESS) {
							it = kh_put(kc, table, s, &err);
							switch (err) {
							case 0:
								kh_value(table, it) += 1;
								if (kh_value(table, it) > maxValue)
									maxValue = kh_value(table, it);

								utils_free(s);
								break;
							case 1:
							case 2:
								kh_value(table, it) = 1;
								break;
							default:
								ret = ERR_OUT_OF_MEMORY;
								break;
							}

							++kmerCount;
							if (ret != ERR_SUCCESS)
								utils_free(s);
						}

						if (ret != ERR_SUCCESS)
							break;
					}
				}

			if (ret != ERR_SUCCESS)
				break;

			++r;
		}

		if (ret == ERR_SUCCESS) {
			size_t *freqArray = NULL;

			++maxValue;
			ret = utils_calloc(maxValue, sizeof(size_t), &freqArray);
			if (ret == ERR_SUCCESS) {
				memset(freqArray, 0, maxValue*sizeof(size_t));
				for (it = kh_begin(table); it != kh_end(table); ++it) {
					if (kh_exist(table, it))
						++freqArray[kh_value(table, it)];
				}

				for (size_t i = 0; i < maxValue; ++i) {
					if (freqArray[i] > 0)
						fprintf(stdout, "%Iu, %Iu, %lf\n", i, freqArray[i], (double)freqArray[i]*100/ (double)kmerCount);
				}

				utils_free(freqArray);
			}
		}

		utils_free(kmerString);
	}

	for (size_t i = kh_begin(table); i < kh_end(table); ++i) {
		if (kh_exist(table, i))
			utils_free(kh_key(table, i));
	}

	kh_destroy(kc, table);

	return ret;
}
void printSegCounter() {
    for (khint_t k = kh_begin(seg_counter); k != kh_end(seg_counter); ++k)  // traverse
        if (kh_exist(seg_counter, k))                             // test if a bucket contains data
            printf("%04X %*d |%s", kh_key(seg_counter, k), 6, kh_value(seg_counter, k), !(k%16) ? "\n": " ");
}
Exemple #28
0
int main(int argc, char *argv[])
{
    int c, i, n, ret, res;
    int tid, pos, *n_plp;
    cmdopt_t o;
    bam_mplp_t mplp;
    const bam_pileup1_t **plp;
    aux_t **data;
    bam_hdr_t *h = 0;
    sv_t sv1;
    qual_sum_t qual2;
    khiter_t k_iter;
    khash_t(sv_hash) *sv_h = kh_init(sv_hash);
    khash_t(sv_geno) *geno_h = kh_init(sv_geno);
    khash_t(colmap) *smp_cols;
    khash_t(ped) *ped_h = 0;
    mempool_t *mp;
    char **samples;
    
    o.min_q = 40; o.min_s = 80; o.min_len = 150; o.min_dp = 10; o.bed = 0, o.fnped = 0, o.mi_prob=0.005;
    while ((c = getopt(argc, argv, "hq:s:l:d:b:p:m:")) >= 0) {
        if (c == 'h') { usage(stderr, &o); return 0; }
        else if (c == 'q') o.min_q = atoi(optarg);
        else if (c == 's') o.min_s = atoi(optarg);
        else if (c == 'l') o.min_len = atoi(optarg);
        else if (c == 'd') o.min_dp = atoi(optarg);
        else if (c == 'p') o.fnped = optarg;
        else if (c == 'm') o.mi_prob = atof(optarg);
        else if (c == 'b') { 
            if ((o.bed = bed_read(optarg)) == NULL) {
                return -1;
            }
        }
    }
    if (o.mi_prob < 0.0000000000001 || o.mi_prob > 0.1) {
        fprintf(stderr, "Error. Probability of a mendelian inconsistency must be between 0.1 and 0.0000000000001.\n");
    }
    
    if (argc - optind < 1) {
        usage(stderr, &o);
        return 1;
    }

    // Open files and initalize aux data //
    n = argc - optind;
    data = calloc(n, sizeof(aux_t*));
    samples = (char**)malloc(n * sizeof(char*));
    for (i = 0; i < n; ++i) {
        data[i] = calloc(1, sizeof (aux_t));
        data[i]->fp = sam_open(argv[optind + i], "r");
        if (!data[i]->fp) {
            fprintf(stderr, "Input file \"%s\" could not be opened.\n", argv[optind + 1]);
            return 1;
        }
        data[i]->min_mapq = o.min_q;
        data[i]->min_as = o.min_s;
        data[i]->min_len = o.min_len;
        data[i]->hdr = sam_hdr_read(data[i]->fp);
        if (!data[i]->hdr) {
            fprintf(stderr, "Could not read the header for input file \"%s\".\n", argv[optind + 1]);
            return 1;
        }
        samples[i] = find_sample(data[i]->hdr, &res);
        if (!samples[i]) {
            fprintf(stderr, "Warning. No sample name detected for bam %s. Using filename\n", argv[optind + i]);
            samples[i] = argv[optind + i];
        }
    }
    h = data[0]->hdr;
    smp_cols = map_samples(samples, n);
    if (o.fnped) {
        if ((ped_h = read_ped(o.fnped, smp_cols)) == 0) { return -1; }
    }

    // The core data processing loop //
    mplp = bam_mplp_init(n, read_bam, (void**)data);
    n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
    plp = calloc(n, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads in mplp
    //quals = (qual_vec_t*)calloc(n, sizeof(qual_vec_t));
    mp = mp_init();
    while ((ret = bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // iterate of positions with coverage
        int n_sv;
        if (o.bed && tid >= 0 && !bed_overlap(o.bed, h->target_name[tid], pos, pos+1)) continue;
        n_sv = plp2sv(h, tid, pos, n, n_plp, plp, sv_h);
        if (n_sv > 1) { fprintf(stderr, "Warning: more than two alleles detected at %s:%d\n", h->target_name[tid], pos); }
        if (n_sv) {
            fprintf(stderr, "SV detected at %d:%d\n", tid, pos);
            for (k_iter = kh_begin(sv_h); k_iter != kh_end(sv_h); ++k_iter) {
                if (kh_exist(sv_h, k_iter)) {
                    sv1 = kh_value(sv_h, k_iter);
                    fprintf(stderr, "SV tid1=%d, tid2=%d, pos1=%d, pos2=%d, ori1=%d, ori2=%d, allele=%d\n", sv1.tid1, sv1.tid2, sv1.pos1, sv1.pos2, sv1.ori1, sv1.ori2, sv1.allele);
                }
            }
            res = get_qual_data(h, tid, pos, n, n_plp, plp, n_sv + 1, sv_h, geno_h, mp);
            if (res < 0) {
                fprintf(stderr, "Error collecting quality data from reads\n");
                return -1;
            }
            kh_clear(sv_hash, sv_h);
        }
    }

    print_header(h, optind, n, argv);
    genotype_sv(h, n, geno_h, o.min_dp, ped_h, o.mi_prob);

    free(n_plp);
    free(plp);
    bam_mplp_destroy(mplp);
    mp_destroy(mp);
    if (o.bed) bed_destroy(o.bed);
    for (i = 0; i < n; ++i) { 
        bam_hdr_destroy(data[i]->hdr);
        sam_close(data[i]->fp);
        free(data[i]);
        free(samples[i]);
    }
    free(data);
    free(samples);
    kh_destroy(sv_hash, sv_h);
    kh_destroy(sv_geno, geno_h);
    kh_destroy(colmap, smp_cols);
    kh_destroy(ped, ped_h);
    return 0;
}
Exemple #29
0
static int
cachessess_exist_cb(cache_iter_t it)
{
	return kh_exist(srcsessmap, it);
}
Exemple #30
0
size_t
sen_render_flush(int clear_buff)
{
//  gl_check_error();
  //_logfi("1");
  blend_group_t* bg;
  khint_t i,k,j;
  size_t total = 0;
  khash_t(hmsp)*  tgs;
  camera_t* cam = sen_camera();
  tex_group_t* tg;
  khash_t(hmsp)* sgs;
  shader_group_t* sg;

  vector_clear(zsorter);
  for (k = kh_begin(g_bgs); k != kh_end(g_bgs); ++k)
  {
    if (!kh_exist(g_bgs,k)) continue;
    bg = kh_val(g_bgs, k);

    if (bg->num == 0) {
      kh_del(hmip,g_bgs,k);
      continue;
    }
    tgs = bg->tgs;

   // set_blending( (blend_func) (kh_key(g_bgs, k))  );

    for (i = kh_begin(tgs); i != kh_end(tgs); ++i)
    {
      if (!kh_exist(tgs,i)) continue;
      tg = kh_val(tgs, i);
      if (tg->num == 0) {
        kh_del(hmsp,tgs,i);
        continue;
      }

      /*
      if (tg->tex)
        sen_texture_bind(tg->tex);
      else if (tg->font)
        sen_font_bind(tg->font);
        */

      sgs = tg->sgs;

      for (j = kh_begin(sgs); j != kh_end(sgs); ++j)
      {
        if (!kh_exist(sgs,j)) continue;
        sg = kh_val(sgs, j);
        if (sg->num == 0 || !sg->buff) {
          kh_del(hmsp,sgs,j);
          continue;
        }
        if (sg->buff) {
          /*
          sen_shader_use(sg->program);
          {
            if (tg->tex || tg->font)
              sen_uniform_1iN(sg->program, "u_tex0", 0);
            sen_uniform_m4fN(sg->program, "u_mvp",  cam->view_proj.data);
            vertex_buffer_render( sg->buff, GL_TRIANGLES);
            total+=vertex_buffer_size(sg->buff);
            if (clear_buff)
              vertex_buffer_clear( sg->buff );
            //sen_shader_use(NULL);
          }*/
          vector_push_back( zsorter, &sg );
        }
        sg->num = 0;
      }
      tg->num = 0;
    }
    bg->num = 0;
  }
  if (zsorter->size > 0)
    vector_sort(zsorter, zcmp);

  for (j = 0; j < zsorter->size; j++) {
    shader_group_t* sg = *(shader_group_t**)vector_get(zsorter, j);
   // _logfi("%s %d",sg->name, sg->z);


    set_blending( (blend_func) (sg->bg->key)  );


    if (sg->tg->tex)
      sen_texture_bind(sg->tg->tex);
    else if (sg->tg->font)
      sen_font_bind(sg->tg->font);

    sen_shader_use(sg->program);
    {

      if (sg->tg->tex || sg->tg->font)
        sen_uniform_1iN(sg->program, "u_tex0", 0);
      sen_uniform_m4fN(sg->program, "u_mvp", sg->z > 9500 ? cam->proj.data  : cam->view_proj.data);
      vertex_buffer_render( sg->buff, GL_TRIANGLES);
      total+=vertex_buffer_size(sg->buff);
      if (clear_buff)
        vertex_buffer_clear( sg->buff );

      //sen_shader_use(NULL);
    }
  }

 // _logfi("-------------------------------------------------");
  return total;
}