int
main(int argc, char **argv) {
  struct ks_config config;
  memset(&config, 0, sizeof(struct ks_config));
  config.revoke_refresh = 30;

  int c;
  while((c = getopt(argc, argv, "u:p:t:r:")) != -1)
    switch(c) {
    case 'u':
      config.username = optarg;
      break;
    case 'p':
      config.password = optarg;
      break;
    case 't':
      config.tenant = optarg;
      break;
    case 'r':
      config.revoke_refresh = atoi(optarg);
      break;
    default:
      return usage();
    }

  if(optind + 2 > argc || !config.username || !config.password ||
     !config.tenant || !config.revoke_refresh)
    return usage();

  config.url = argv[optind++];

  if(argc - optind == 3 && !strcasecmp(argv[optind], "validate_login")) {
    if(ks_init(&config))
      return 1;

    char *username = ks_validate_login(argv[optind + 1], argv[optind + 2]);
    if(username) {
      printf("%s\n", username);
      free(username);
    }
    ks_deinit();

  } else if(argc - optind == 2 && !strcasecmp(argv[optind], "validate_token")) {
    if(ks_init(&config))
      return 1;

    char *username = ks_validate_token(argv[optind + 1]);
    if(username) {
      printf("%s\n", username);
      free(username);
    }
    ks_deinit();

  } else
    return usage();

  return 0;
}
Example #2
0
File: vcf.c Project: 9beckert/TIR
int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn)
{
	vcf_t *v;
	gzFile fp;
	kstream_t *ks;
	kstring_t s, rn;
	int dret;
	if (bp == 0) return -1;
	if (!bp->is_vcf) return 0;
	s.l = s.m = 0; s.s = 0;
	rn.m = rn.l = h->l_nm; rn.s = h->name;
	v = (vcf_t*)bp->v;
	fp = gzopen(fn, "r");
	ks = ks_init(fp);
	while (ks_getuntil(ks, 0, &s, &dret) >= 0) {
		bcf_str2id_add(v->refhash, strdup(s.s));
		kputs(s.s, &rn); kputc('\0', &rn);
		if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
	}
	ks_destroy(ks);
	gzclose(fp);
	h->l_nm = rn.l; h->name = rn.s;
	bcf_hdr_sync(h);
	free(s.s);
	return 0;
}
Example #3
0
bam_header_t *sam_header_read2(const char *fn)
{
	bam_header_t *header;
	int c, dret, ret;
	gzFile fp;
	kstream_t *ks;
	kstring_t *str;
	kh_ref_t *hash;
	khiter_t k;
	hash = kh_init(ref);
	fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r");
	assert(fp);
	ks = ks_init(fp);
	str = (kstring_t*)calloc(1, sizeof(kstring_t));
	while (ks_getuntil(ks, 0, str, &dret) > 0) {
		char *s = strdup(str->s);
		int len, i;
		i = kh_size(hash);
		ks_getuntil(ks, 0, str, &dret);
		len = atoi(str->s);
		k = kh_put(ref, hash, s, &ret);
		kh_value(hash, k) = (uint64_t)len<<32 | i;
		if (dret != '\n')
			while ((c = ks_getc(ks)) != '\n' && c != -1);
	}
	ks_destroy(ks);
	gzclose(fp);
	free(str->s); free(str);
	fprintf(stderr, "[sam_header_read2] %d sequences loaded.\n", kh_size(hash));
	header = hash2header(hash);
	kh_destroy(ref, hash);
	return header;
}
bam_header_t *sam_header_read2(const char *fn)
{
    bam_header_t *header;
    int c, dret, n_targets = 0;
    gzFile fp;
    kstream_t *ks;
    kstring_t *str;
    kstring_t samstr = { 0, 0, NULL };
    if (fn == 0) return 0;
    fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r");
    if (fp == 0) return 0;
    ks = ks_init(fp);
    str = (kstring_t*)calloc(1, sizeof(kstring_t));
    while (ks_getuntil(ks, 0, str, &dret) > 0) {
        ksprintf(&samstr, "@SQ\tSN:%s", str->s);
        ks_getuntil(ks, 0, str, &dret);
        ksprintf(&samstr, "\tLN:%d\n", atoi(str->s));
        n_targets++;
        if (dret != '\n')
            while ((c = ks_getc(ks)) != '\n' && c != -1);
    }
    ks_destroy(ks);
    gzclose(fp);
    free(str->s); free(str);
    header = sam_hdr_parse(samstr.l, samstr.s? samstr.s : "");
    free(samstr.s);
    fprintf(pysamerr, "[sam_header_read2] %d sequences loaded.\n", n_targets);
    return header;
}
Example #5
0
paf_file_t *paf_open(const char *fn)
{
	kstream_t *ks;
	gzFile fp;
	paf_file_t *pf;
	fp = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
	if (fp == 0) return 0;
	ks = ks_init(fp);
	pf = (paf_file_t*)calloc(1, sizeof(paf_file_t));
	pf->fp = ks;
	return pf;
}
Example #6
0
void __init __noreturn hal_main()
{
	/* 开辟鸿蒙,谁为情种?最初的一切*/
	km_cluster_init();
	build_ram_list();
	
	/* Go back to ARCH, we have inited the basic paging allocator */
	hal_arch_init(HAL_ARCH_INIT_PHASE_EARLY);
	
	/* KERNEL */
	kc_init();
	kp_init();
	ks_init();
	
	hal_malloc_init();
	hal_dpc_init();
	hal_time_init();

	hal_arch_init(HAL_ARCH_INIT_PHASE_MIDDLE);

	hal_console_init();
	printk("GridOS 启动中...\n");
	ke_module_entry();
	
	local_irq_enable();
	
	/* Driver pakcage loading, and it must be the last file */
	hal_boot_module_loop(start_driver_ctx);
	if (last_package_id == driver_package_id && driver_pakcage)
	{
		ke_startup_driver_process(driver_pakcage, driver_size);
	}
	else
	{
		if (driver_size)
			printk("Driver package is not the last one, BSS in it may overlay the useful file data after it...");
		else
			printk("No driver package was loaded...");
	}
	printk("Hal startup ok.\n");
	
	kernel_test();
	while (1) 
	{
		kt_schedule_driver();
		/* If have no process, sleep */
		if (!kt_schedule_running_count())
			dumy_idle_ops(0);
		else
			kt_schedule();
	}
}
Example #7
0
int main(int argc, char *argv[])
{
	gzFile fp;
	kstream_t *ks;
	khash_t(s) *hash;
	mask32_t *q = 0;
	kstring_t *str;
	int i, dret, c, last = 0;

	while ((c = getopt(argc, argv, "")) >= 0) {
	}
	if (argc <= optind + 1) {
		fprintf(stderr, "Usage: uniq-dist <in.mask.fa> <in-sorted.list>\n");
		return 1;
	}

	str = (kstring_t*)calloc(1, sizeof(kstring_t));
	fprintf(stderr, "[uniq-dist] loading mask...\n");
	hash = load_mask(argv[optind]);
	fp = gzopen(argv[optind+1], "r");
	ks = ks_init(fp);
	fprintf(stderr, "[uniq-dist] calculating unique distance...\n");
	while (ks_getuntil(ks, 0, str, &dret) >= 0) {
		khint_t k;
		mask32_t *p;
		int pos;
		k = kh_get(s, hash, str->s);
		p = (k != kh_end(hash))? &kh_val(hash, k) : 0;
		ks_getuntil(ks, 0, str, &dret);
		pos = atoi(str->s) - 1;
		if (p && pos >= 0 && pos < p->ori_len) {
			if (p != q) q = p; // change of reference
			else {
				if (last >= pos) {
					fprintf(stderr, "[uniq-dist] out of order: %s:%d <= %d\n", kh_key(hash, k), pos+1, last+1);
				} else {
					for (i = last, c = 0; i < pos; ++i)
						if (p->mask[i/32] & 1u<<i%32) ++c;
					if (last > 0) printf("%s\t%d\t%d\t%d\n", kh_key(hash, k), last, pos, c);
				}
			}
			last = pos;
		}
		if (dret != '\n')
			while ((c = ks_getc(ks)) != -1 && c != '\n');
	}
	ks_destroy(ks);
	gzclose(fp);
	free(str->s); free(str);
	// hash table is not freed...
	return 0;
}
Example #8
0
int main(int argc, char *argv[])
{
	gzFile fp;
	kstream_t *ks;
	khash_t(s) *hash;
	kstring_t *str;
	int dret, c, complement = 0;

	while ((c = getopt(argc, argv, "c")) >= 0) {
		switch (c) {
			case 'c': complement = 1; break;
		}
	}
	if (argc <= optind + 1) {
		fprintf(stderr, "Usage: apply_mask_l [-c] <in.mask.fa> <in.list>\n");
		return 1;
	}

	str = (kstring_t*)calloc(1, sizeof(kstring_t));
	fprintf(stderr, "[apply_mask_l] loading mask...\n");
	hash = load_mask(argv[optind]);
	fp = gzopen(argv[optind+1], "r");
	ks = ks_init(fp);
	fprintf(stderr, "[apply_mask_l] filtering list...\n");
	while (ks_getuntil(ks, 0, str, &dret) >= 0) {
		khint_t k;
		mask32_t *p;
		int pos, do_print = 0;
		k = kh_get(s, hash, str->s);
		p = (k != kh_end(hash))? &kh_val(hash, k) : 0;
		ks_getuntil(ks, 0, str, &dret);
		pos = atoi(str->s) - 1;
		if (complement == 0) {
			if (p && pos < p->ori_len && (p->mask[pos/32]&1u<<pos%32)) do_print = 1;
		} else {
			if (p && pos < p->ori_len && (p->mask[pos/32]&1u<<pos%32) == 0) do_print = 1;
		}
		if (do_print) printf("%s\t%d", kh_key(hash, k), pos + 1);
		if (dret != '\n') {
			if (do_print) putchar('\t');
			while ((c = ks_getc(ks)) != -1 && c != '\n')
				if (do_print) putchar(c);
		}
		if (do_print) putchar('\n');
	}
	ks_destroy(ks);
	gzclose(fp);
	free(str->s); free(str);
	// hash table is not freed...
	return 0;
}
Example #9
0
reghash_t *stk_reg_read(const char *fn)
{
	reghash_t *h = kh_init(reg);
	gzFile fp;
	kstream_t *ks;
	int dret;
	kstring_t *str;
	// read the list
	str = calloc(1, sizeof(kstring_t));
	fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
	ks = ks_init(fp);
	while (ks_getuntil(ks, 0, str, &dret) >= 0) {
		int beg = -1, end = -1;
		reglist_t *p;
		khint_t k = kh_get(reg, h, str->s);
		if (k == kh_end(h)) {
			int ret;
			char *s = strdup(str->s);
			k = kh_put(reg, h, s, &ret);
			memset(&kh_val(h, k), 0, sizeof(reglist_t));
		}
		p = &kh_val(h, k);
		if (dret != '\n') {
			if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) {
				beg = atoi(str->s);
				if (dret != '\n') {
					if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) {
						end = atoi(str->s);
						if (end < 0) end = -1;
					}
				}
			}
		}
		// skip the rest of the line
		if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n');
		if (end < 0 && beg > 0) end = beg, beg = beg - 1; // if there is only one column
		if (beg < 0) beg = 0, end = INT_MAX;
		if (p->n == p->m) {
			p->m = p->m? p->m<<1 : 4;
			p->a = realloc(p->a, p->m * 8);
		}
		p->a[p->n++] = (uint64_t)beg<<32 | end;
	}
	ks_destroy(ks);
	gzclose(fp);
	free(str->s); free(str);
	return h;
}
Example #10
0
File: vcf.c Project: 9beckert/TIR
bcf_t *vcf_open(const char *fn, const char *mode)
{
	bcf_t *bp;
	vcf_t *v;
	if (strchr(mode, 'b')) return bcf_open(fn, mode);
	bp = calloc(1, sizeof(bcf_t));
	v = calloc(1, sizeof(vcf_t));
	bp->is_vcf = 1;
	bp->v = v;
	v->refhash = bcf_str2id_init();
	if (strchr(mode, 'r')) {
		v->fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
		v->ks = ks_init(v->fp);
	} else if (strchr(mode, 'w'))
		v->fpout = strcmp(fn, "-")? fopen(fn, "w") : stdout;
	return bp;
}
Example #11
0
bam_header_t *sam_header_read2(const char *fn)
{
	bam_header_t *header;
	int c, dret, ret, error = 0;
	gzFile fp;
	kstream_t *ks;
	kstring_t *str;
	kh_ref_t *hash;
	khiter_t k;
	if (fn == 0) return 0;
	fp = gzopen(fn, "r");
	if (fp == 0) return 0;
	hash = kh_init(ref);
	ks = ks_init(fp);
	str = (kstring_t*)calloc(1, sizeof(kstring_t));
	while (ks_getuntil(ks, 0, str, &dret) > 0) {
		char *s = malloc(strlen(str->s) + 1);
		strcpy(s,str->s);

		int len, i;
		i = kh_size(hash);
		ks_getuntil(ks, 0, str, &dret);
		len = atoi(str->s);
		k = kh_put(ref, hash, s, &ret);
		if (ret == 0) {
			Rprintf("[sam_header_read2] duplicated sequence name: %s\n", s);
			error = 1;
		}
		kh_value(hash, k) = (uint64_t)len<<32 | i;
		if (dret != '\n')
			while ((c = ks_getc(ks)) != '\n' && c != -1);
	}
	ks_destroy(ks);
	gzclose(fp);
	free(str->s); free(str);
	Rprintf("[sam_header_read2] %d sequences loaded.\n", kh_size(hash));
	if (error) return 0;
	header = hash2header(hash);
	kh_destroy(ref, hash);
	return header;
}
Example #12
0
File: prob1.c Project: NCIP/alview
int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn)
{
	gzFile fp;
	kstring_t s;
	kstream_t *ks;
	long double sum;
	int dret, k;
	memset(&s, 0, sizeof(kstring_t));
	fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
	ks = ks_init(fp);
	memset(ma->phi, 0, sizeof(double) * (ma->M + 1));
	while (ks_getuntil(ks, '\n', &s, &dret) >= 0) {
		if (strstr(s.s, "[afs] ") == s.s) {
			char *p = s.s + 6;
			for (k = 0; k <= ma->M; ++k) {
				int x;
				double y;
				x = strtol(p, &p, 10);
				if (x != k && (errno == EINVAL || errno == ERANGE)) return -1;
				++p;
				y = strtod(p, &p);
				if (y == 0. && (errno == EINVAL || errno == ERANGE)) return -1;
				ma->phi[ma->M - k] += y;
			}
		}
	}
	ks_destroy(ks);
	gzclose(fp);
	free(s.s);
	for (sum = 0., k = 0; k <= ma->M; ++k) sum += ma->phi[k];
	fprintf(stderr, "[prior]");
	for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum;
	for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]);
	fputc('\n', stderr);
	for (sum = 0., k = 1; k < ma->M; ++k) sum += ma->phi[ma->M - k] * (2.* k * (ma->M - k) / ma->M / (ma->M - 1));
	fprintf(stderr, "heterozygosity=%lf, ",  (double)sum);
	for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k] / ma->M;
	fprintf(stderr, "theta=%lf\n", (double)sum);
	bcf_p1_indel_prior(ma, MC_DEF_INDEL);
	return 0;
}
Example #13
0
File: aux.c Project: dpryan79/bison
htsFile * sam_popen(char *cmd) {
    htsFile *fp = (htsFile*)calloc(1, sizeof(htsFile));
    int fid, fid2;
    assert(fp);
    popen_fd = popen(cmd, "r"); //Global

    fid = fileno(popen_fd);
    fid2 = dup(fid); //otherwise, the file descriptor is closed by zlib and pclose() won't work!!
    if(popen_fd == NULL) return 0;
    if(fp == NULL) return 0;

    hFILE *hfile = hdopen(fid2, "r"); //does this exist?
    if(hfile == NULL) return 0;

    fp->is_be = ed_is_big();
    BGZF *gzfp = bgzf_hopen(hfile, "r");
    fp->fp.voidp = ks_init(gzfp);
    fp->format.format = sam;

    return(fp);
}
Example #14
0
File: vcf.c Project: goshng/cocoa
bcf_hdr_t *vcf_hdr_read(htsFile *fp)
{
	if (!fp->is_bin) {
		kstring_t txt, *s = &fp->line;
		bcf_hdr_t *h;
		h = bcf_hdr_init();
		txt.l = txt.m = 0; txt.s = 0;
		while (hts_getline(fp, KS_SEP_LINE, s) >= 0) {
			if (s->l == 0) continue;
			if (s->s[0] != '#') {
				if (hts_verbose >= 2)
					fprintf(stderr, "[E::%s] no sample line\n", __func__);
				free(txt.s);
				bcf_hdr_destroy(h);
				return 0;
			}
			if (s->s[1] != '#' && fp->fn_aux) { // insert contigs here
				int dret;
				gzFile f;
				kstream_t *ks;
				kstring_t tmp;
				tmp.l = tmp.m = 0; tmp.s = 0;
				f = gzopen(fp->fn_aux, "r");
				ks = ks_init(f);
				while (ks_getuntil(ks, 0, &tmp, &dret) >= 0) {
					int c;
					kputs("##contig=<ID=", &txt); kputs(tmp.s, &txt);
					ks_getuntil(ks, 0, &tmp, &dret);
					kputs(",length=", &txt); kputw(atol(tmp.s), &txt);
					kputsn(">\n", 2, &txt);
					if (dret != '\n')
						while ((c = ks_getc(ks)) != '\n' && c != -1); // skip the rest of the line
				}
				free(tmp.s);
				ks_destroy(ks);
				gzclose(f);
			}
			kputsn(s->s, s->l, &txt);
			if (s->s[1] != '#') break;
			kputc('\n', &txt);
		}
		h->l_text = txt.l + 1; // including NULL
		h->text = txt.s;
		bcf_hdr_parse(h);
        // check tabix index, are all contigs listed in the header? add the missing ones
        tbx_t *idx = tbx_index_load(fp->fn);
        if ( idx )
        {
			int i, n, need_sync = 0;
			const char **names = tbx_seqnames(idx, &n);
			for (i=0; i<n; i++)
			{
                bcf_hrec_t *hrec = bcf_hdr_get_hrec(h, BCF_DT_CTG, (char*) names[i]);
                if ( hrec ) continue;
                hrec = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t));
                hrec->key = strdup("contig");
                bcf_hrec_add_key(hrec, "ID", strlen("ID"));
                bcf_hrec_set_val(hrec, hrec->nkeys-1, (char*) names[i], strlen(names[i]), 0);
                bcf_hrec_add_key(hrec, "length", strlen("length"));
                bcf_hrec_set_val(hrec, hrec->nkeys-1, "-1", strlen("-1"), 0);   // what is a good default value?
                bcf_hdr_add_hrec(h, hrec);
                need_sync = 1;
			}
			free(names);
			tbx_destroy(idx);
            if ( need_sync )
            {
                bcf_hdr_sync(h);
                bcf_hdr_fmt_text(h);
            }
		}
		return h;
	} else return bcf_hdr_read((BGZF*)fp->fp);
}
Example #15
0
int stk_mutfa(int argc, char *argv[])
{
	khash_t(reg) *h = kh_init(reg);
	gzFile fp;
	kseq_t *seq;
	kstream_t *ks;
	int l, i, dret;
	kstring_t *str;
	khint_t k;
	if (argc < 3) {
		fprintf(stderr, "Usage: seqtk mutfa <in.fa> <in.snp>\n\n");
		fprintf(stderr, "Note: <in.snp> contains at least four columns per line which are:\n");
		fprintf(stderr, "      'chr  1-based-pos  any  base-changed-to'.\n");
		return 1;
	}
	// read the list
	str = calloc(1, sizeof(kstring_t));
	fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
	ks = ks_init(fp);
	while (ks_getuntil(ks, 0, str, &dret) >= 0) {
		char *s = strdup(str->s);
		int beg = 0, ret;
		reglist_t *p;
		k = kh_get(reg, h, s);
		if (k == kh_end(h)) {
			k = kh_put(reg, h, s, &ret);
			memset(&kh_val(h, k), 0, sizeof(reglist_t));
		}
		p = &kh_val(h, k);
		if (ks_getuntil(ks, 0, str, &dret) > 0) beg = atol(str->s) - 1; // 2nd col
		ks_getuntil(ks, 0, str, &dret); // 3rd col
		ks_getuntil(ks, 0, str, &dret); // 4th col
		// skip the rest of the line
		if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n');
		if (isalpha(str->s[0]) && str->l == 1) {
			if (p->n == p->m) {
				p->m = p->m? p->m<<1 : 4;
				p->a = realloc(p->a, p->m * 8);
			}
			p->a[p->n++] = (uint64_t)beg<<32 | str->s[0];
		}
	}
	ks_destroy(ks);
	gzclose(fp);
	free(str->s); free(str);
	// mutfa
	fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
	seq = kseq_init(fp);
	while ((l = kseq_read(seq)) >= 0) {
		reglist_t *p;
		k = kh_get(reg, h, seq->name.s);
		if (k != kh_end(h)) {
			p = &kh_val(h, k);
			for (i = 0; i < p->n; ++i) {
				int beg = p->a[i]>>32;
				if (beg < seq->seq.l)
					seq->seq.s[beg] = (int)p->a[i];
			}
		}
		printf(">%s", seq->name.s);
		for (i = 0; i < l; ++i) {
			if (i%60 == 0) putchar('\n');
			putchar(seq->seq.s[i]);
		}
		putchar('\n');
	}
Example #16
0
File: arg.c Project: lh3/fastARG
arg_t *arg_load(gzFile fp)
{
	kstream_t *ks;
	kstring_t *str;
	int dret, lineno = 0;
	arg_t *a;
	a = arg_init();
	ks = ks_init(fp);
	str = (kstring_t*)calloc(1, sizeof(kstring_t));
	while (ks_getuntil(ks, 0, str, &dret) >= 0) {
		int n1, n2, i;
		++lineno;
		if (str->l != 1) {
			fprintf(stderr, "[arg_load] invalid initial character at line %d\n", lineno);
			exit(1);
		}
		if (str->s[0] == 'C' || str->s[0] == 'R') {
			argnode_t *an1, *an2;
			int beg, end, n_mut;
			// read
			ks_getuntil(ks, 0, str, &dret); n1 = atoi(str->s);
			ks_getuntil(ks, 0, str, &dret); n2 = atoi(str->s);
			if (n1 <= n2) {
				fprintf(stderr, "[arg_load] invalid edge (%d)\n", lineno);
				exit(1);
			}
			// add to the node array
			if (n1 + 1 > a->max_size) {
				int old_size = a->max_size;
				a->max_size = n1 + 1;
				kroundup32(a->max_size);
				a->node = (argnode_t*)realloc(a->node, sizeof(argnode_t) * a->max_size);
				memset(a->node + old_size, 0, (a->max_size - old_size) * sizeof(argnode_t));
			}
			an1 = a->node + n1; an2 = a->node + n2;
			an1->nid = n1; an2->nid = n2;
			if (an1->n_nei == 3 || an2->n_nei == 3) {
				fprintf(stderr, "[arg_load] multifurcated node: %d or %d (%d)\n", n1, n2, lineno);
				exit(1);
			}
			an1->nei[an1->n_nei++] = n2; an2->nei[an2->n_nei++] = n1;
			// read intervals
			ks_getuntil(ks, 0, str, &dret); beg = atoi(str->s);
			ks_getuntil(ks, 0, str, &dret); end = atoi(str->s);
			if (an2->end) { // a recombination node
				if (an2->end == beg) an2->x = an2->end, an2->end = end;
				else if (an2->beg == end) { // and also swap
					int x = an2->nei[1]; an2->nei[1] = an2->nei[2]; an2->nei[2] = x;
					an2->x = an2->beg, an2->beg = beg;
				} else {
					fprintf(stderr, "[arg_load] inconsisten interval at node %d (%d)\n", n2, lineno);
					exit(1);
				}
			} else an2->beg = beg, an2->end = end;
			// read mutations
			ks_getuntil(ks, 0, str, &dret); n_mut = atoi(str->s);
			if (n_mut) {
				an2->mut = (int*)realloc(an2->mut, sizeof(int) * (an2->n_mut + n_mut));
				for (i = 0; i < n_mut; ++i) {
					ks_getuntil(ks, 0, str, &dret);
					an2->mut[an2->n_mut++] = atoi(str->s);
				}
			}
			// save interval and swap if necessary
		} else if (str->s[0] == 'N') {
			ks_getuntil(ks, 0, str, &dret); a->n = atoi(str->s);
			ks_getuntil(ks, 0, str, &dret); a->m = atoi(str->s);
		} else if (str->s[0] == 'S') {
			ks_getuntil(ks, 0, str, &dret); a->root = atoi(str->s);
			ks_getuntil(ks, 0, str, &dret);
			if (str->l != a->m) {
				fprintf(stderr, "[arg_load] inconsistent root sequence (%d)\n", lineno);
				exit(1);
			}
			a->rootseq = (uint64_t*)calloc((a->m + 63) / 64, 8);
			for (i = 0; i < a->m; ++i)
				if (str->s[i] == '1') arg_setseq1(a->rootseq, i);
		} else ks_getuntil(ks, '\n', str, &dret);
	}
	free(str->s); free(str);
	ks_destroy(ks);
	return a;
}
Example #17
0
int main(int argc, char *argv[])
{
    gzFile fp;
    kstream_t *ks;
    kstring_t s, t[N_TMPSTR];
    int dret, i, k, af_n, af_max, af_i, c, is_padded = 0, write_cns = 0, *p2u = 0;
    long m_cigar = 0, n_cigar = 0;
    unsigned *af, *cigar = 0;

    while ((c = getopt(argc, argv, "pc")) >= 0) {
        switch (c) {
            case 'p': is_padded = 1; break;
            case 'c': write_cns = 1; break;
        }
    }
    if (argc == optind) {
        fprintf(stderr, "\nUsage:   ace2sam [-pc] <in.ace>\n\n");
        fprintf(stderr, "Options: -p     output padded SAM\n");
        fprintf(stderr, "         -c     write the contig sequence in SAM\n\n");
        fprintf(stderr, "Notes: 1. Fields must appear in the following order: (CO->[BQ]->(AF)->(RD->QA))\n");
        fprintf(stderr, "       2. The order of reads in AF and in RD must be identical\n");
        fprintf(stderr, "       3. Except in BQ, words and numbers must be separated by a single SPACE or TAB\n");
        fprintf(stderr, "       4. This program writes the headerless SAM to stdout and header to stderr\n\n");
        return 1;
    }

    s.l = s.m = 0; s.s = 0;
    af_n = af_max = af_i = 0; af = 0;
    for (i = 0; i < N_TMPSTR; ++i) t[i].l = t[i].m = 0, t[i].s = 0;
    fp = strcmp(argv[1], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
    ks = ks_init(fp);
    while (ks_getuntil(ks, 0, &s, &dret) >= 0) {
        if (strcmp(s.s, "CO") == 0) { // contig sequence
            kstring_t *cns;
            t[0].l = t[1].l = t[2].l = t[3].l = t[4].l = 0; // 0: name; 1: padded ctg; 2: unpadded ctg/padded read; 3: unpadded read; 4: SAM line
            af_n = af_i = 0; // reset the af array
            ks_getuntil(ks, 0, &s, &dret); kputs(s.s, &t[0]); // contig name
            ks_getuntil(ks, '\n', &s, &dret); // read the whole line
            while (ks_getuntil(ks, '\n', &s, &dret) >= 0 && s.l > 0) kputsn(s.s, s.l, &t[1]); // read the padded consensus sequence
            remove_pads(&t[1], &t[2]); // construct the unpadded sequence
            // compute the array for mapping padded positions to unpadded positions
            p2u = realloc(p2u, t[1].m * sizeof(int));
            for (i = k = 0; i < t[1].l; ++i) {
                p2u[i] = k;
                if (t[1].s[i] != '*') ++k;
            }
            // write out the SAM header and contig sequences
            fprintf(stderr, "H @SQ\tSN:%s\tLN:%llu\n", t[0].s, (unsigned long long)(t[is_padded?1:2].l)); // The SAM header line
            cns = &t[is_padded?1:2];
            fprintf(stderr, "S >%s\n", t[0].s);
            for (i = 0; i < cns->l; i += LINE_LEN) {
                fputs("S ", stderr);
                for (k = 0; k < LINE_LEN && i + k < cns->l; ++k)
                    fputc(cns->s[i + k], stderr);
                fputc('\n', stderr);
            }

#define __padded2cigar(sp) do { \
        int i, l_M = 0, l_D = 0; \
        for (i = 0; i < sp.l; ++i) { \
            if (sp.s[i] == '*') { \
                if (l_M) write_cigar(cigar, n_cigar, m_cigar, l_M<<4); \
                ++l_D; l_M = 0; \
            } else { \
                if (l_D) write_cigar(cigar, n_cigar, m_cigar, l_D<<4 | 2); \
                ++l_M; l_D = 0; \
            } \
        } \
        if (l_M) write_cigar(cigar, n_cigar, m_cigar, l_M<<4); \
        else write_cigar(cigar, n_cigar, m_cigar, l_D<<4 | 2); \
    } while (0)

            if (write_cns) { // write the consensus SAM line (dummy read)
                n_cigar = 0;
                if (is_padded) __padded2cigar(t[1]);
                else write_cigar(cigar, n_cigar, m_cigar, t[2].l<<4);
                kputsn(t[0].s, t[0].l, &t[4]); kputs("\t516\t", &t[4]); kputsn(t[0].s, t[0].l, &t[4]); kputs("\t1\t60\t", &t[4]);
                for (i = 0; i < n_cigar; ++i) {
                    kputw(cigar[i]>>4, &t[4]); kputc("MIDNSHP=X"[cigar[i]&0xf], &t[4]);
                }
                kputs("\t*\t0\t0\t", &t[4]); kputsn(t[2].s, t[2].l, &t[4]); kputs("\t*", &t[4]);
            }
        } else if (strcmp(s.s, "BQ") == 0) { // contig quality
Example #18
0
int main_bedcov(int argc, char *argv[])
{
    gzFile fp;
    kstring_t str;
    kstream_t *ks;
    hts_idx_t **idx;
    aux_t **aux;
    int *n_plp, dret, i, n, c, min_mapQ = 0;
    int64_t *cnt;
    const bam_pileup1_t **plp;
    int usage = 0;

    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
        { NULL, 0, NULL, 0 }
    };

    while ((c = getopt_long(argc, argv, "Q:", lopts, NULL)) >= 0) {
        switch (c) {
        case 'Q': min_mapQ = atoi(optarg); break;
        default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                  /* else fall-through */
        case '?': usage = 1; break;
        }
        if (usage) break;
    }
    if (usage || optind + 2 > argc) {
        fprintf(pysam_stderr, "Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]\n\n");
        fprintf(pysam_stderr, "  -Q INT       Only count bases of at least INT quality [0]\n");
        sam_global_opt_help(pysam_stderr, "-.--.");
        return 1;
    }
    memset(&str, 0, sizeof(kstring_t));
    n = argc - optind - 1;
    aux = calloc(n, sizeof(aux_t*));
    idx = calloc(n, sizeof(hts_idx_t*));
    for (i = 0; i < n; ++i) {
        aux[i] = calloc(1, sizeof(aux_t));
        aux[i]->min_mapQ = min_mapQ;
        aux[i]->fp = sam_open_format(argv[i+optind+1], "r", &ga.in);
        if (aux[i]->fp)
            idx[i] = sam_index_load(aux[i]->fp, argv[i+optind+1]);
        if (aux[i]->fp == 0 || idx[i] == 0) {
            fprintf(pysam_stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
            return 2;
        }
        // TODO bgzf_set_cache_size(aux[i]->fp, 20);
        aux[i]->header = sam_hdr_read(aux[i]->fp);
        if (aux[i]->header == NULL) {
            fprintf(pysam_stderr, "ERROR: failed to read header for '%s'\n",
                    argv[i+optind+1]);
            return 2;
        }
    }
    cnt = calloc(n, 8);

    fp = gzopen(argv[optind], "rb");
    ks = ks_init(fp);
    n_plp = calloc(n, sizeof(int));
    plp = calloc(n, sizeof(bam_pileup1_t*));
    while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
        char *p, *q;
        int tid, beg, end, pos;
        bam_mplp_t mplp;

        for (p = q = str.s; *p && *p != '\t'; ++p);
        if (*p != '\t') goto bed_error;
        *p = 0; tid = bam_name2id(aux[0]->header, q); *p = '\t';
        if (tid < 0) goto bed_error;
        for (q = p = p + 1; isdigit(*p); ++p);
        if (*p != '\t') goto bed_error;
        *p = 0; beg = atoi(q); *p = '\t';
        for (q = p = p + 1; isdigit(*p); ++p);
        if (*p == '\t' || *p == 0) {
            int c = *p;
            *p = 0; end = atoi(q); *p = c;
        } else goto bed_error;

        for (i = 0; i < n; ++i) {
            if (aux[i]->iter) hts_itr_destroy(aux[i]->iter);
            aux[i]->iter = sam_itr_queryi(idx[i], tid, beg, end);
        }
        mplp = bam_mplp_init(n, read_bam, (void**)aux);
        bam_mplp_set_maxcnt(mplp, 64000);
        memset(cnt, 0, 8 * n);
        while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0)
            if (pos >= beg && pos < end)
                for (i = 0; i < n; ++i) cnt[i] += n_plp[i];
        for (i = 0; i < n; ++i) {
            kputc('\t', &str);
            kputl(cnt[i], &str);
        }
        fputs(str.s, pysam_stdout) & fputc('\n', pysam_stdout);
        bam_mplp_destroy(mplp);
        continue;

bed_error:
        fprintf(pysam_stderr, "Errors in BED line '%s'\n", str.s);
    }
    free(n_plp); free(plp);
    ks_destroy(ks);
    gzclose(fp);

    free(cnt);
    for (i = 0; i < n; ++i) {
        if (aux[i]->iter) hts_itr_destroy(aux[i]->iter);
        hts_idx_destroy(idx[i]);
        bam_hdr_destroy(aux[i]->header);
        sam_close(aux[i]->fp);
        free(aux[i]);
    }
    free(aux); free(idx);
    free(str.s);
    sam_global_args_free(&ga);
    return 0;
}
Example #19
0
int ingest1(const char *input,const char *output,char *ref,bool exit_on_mismatch=true) {
  cerr << "Input: " << input << "\tOutput: "<<output<<endl;

  kstream_t *ks;
  kstring_t str = {0,0,0};    
  gzFile fp = gzopen(input, "r");
  VarBuffer vbuf(1000);
  int prev_rid = -1;
  if(fp==NULL) {
    fprintf(stderr,"problem opening %s\n",input);
    exit(1);
  }

  char *out_fname = (char *)malloc(strlen(output)+5);
  strcpy(out_fname,output);
  strcat(out_fname,".tmp");
  if(fileexists(out_fname)) {
    fprintf(stderr,"%s file already exists. will not overwrite\n",out_fname);
    exit(1);
  }
  printf("depth: %s\n",out_fname);
  gzFile depth_fp = gzopen(out_fname, "wb1");
  strcpy(out_fname,output);
  strcat(out_fname,".bcf");
  if(fileexists(out_fname)) {
    fprintf(stderr,"%s file already exists. will not overwrite\n",out_fname);
    exit(1);
  }
  printf("variants: %s\n",out_fname);
  htsFile *variant_fp=hts_open(out_fname,"wb1");
  if(variant_fp==NULL) {
    fprintf(stderr,"problem opening %s\n",input);
    exit(1);    
  }

  ks = ks_init(fp);
  htsFile *hfp=hts_open(input, "r");
  bcf_hdr_t *hdr_in =  bcf_hdr_read(hfp);
  hts_close(hfp);
  //this is a hack to fix gvcfs where AD is incorrectly defined in the header. (vcf4.2 does not technically allow Number=R)
  bcf_hdr_remove(hdr_in,BCF_HL_FMT,"AD");
  assert(  bcf_hdr_append(hdr_in,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.999 or higher that read contains indicated allele vs all other intersecting indel alleles)\">") == 0);

  //this is a hack to fix broken gvcfs where GQ is incorrectly labelled as float (v4.3 spec says it should be integer)
  bcf_hdr_remove(hdr_in,BCF_HL_FMT,"GQ");
  assert(  bcf_hdr_append(hdr_in,"##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">") == 0);


  //  bcf_hdr_t  *hdr_out=hdr_in;
  bcf_hdr_t *hdr_out =  bcf_hdr_dup(hdr_in);
  remove_hdr_lines(hdr_out,BCF_HL_INFO);
  remove_hdr_lines(hdr_out,BCF_HL_FLT);
  bcf_hdr_sync(hdr_out);

  //here we add FORMAT/PF. which is the pass filter flag for alts.
  assert(  bcf_hdr_append(hdr_out,"##FORMAT=<ID=PF,Number=A,Type=Integer,Description=\"variant was PASS filter in original sample gvcf\">") == 0);

  args_t *norm_args = init_vcfnorm(hdr_out,ref);
  norm_args->check_ref |= CHECK_REF_WARN;
  bcf1_t *bcf_rec = bcf_init();
  bcf_hdr_write(variant_fp, hdr_out);
  kstring_t work1 = {0,0,0};            
  int buf[5];
  ks_tokaux_t aux;
  int ndec=0;
  int ref_len,alt_len;
  while(    ks_getuntil(ks, '\n', &str, 0) >=0) {
    //    fprintf(stderr,"%s\n",str.s);
    if(str.s[0]!='#')  {
      char *ptr = kstrtok(str.s,"\t",&aux);//chrom
      ptr = kstrtok(NULL,NULL,&aux);//pos
      work1.l=0;
      kputsn(str.s,ptr-str.s-1, &work1);   
      buf[0] =  bcf_hdr_name2id(hdr_in, work1.s);
      assert(      buf[0]>=0);
      buf[1]=atoi(ptr)-1;
      ptr = kstrtok(NULL,NULL,&aux);//ID
      ptr = kstrtok(NULL,NULL,&aux);//REF

      ref_len=0;
      while(ptr[ref_len]!='\t') ref_len++;

      ptr = kstrtok(NULL,NULL,&aux);//ALT

      bool is_variant=false;
      alt_len=0;
      while(ptr[alt_len]!='\t') alt_len++;
      if(ptr[0]!='.') 
	is_variant=true;

      char * QUAL_ptr = kstrtok(NULL, NULL, &aux);
      assert (QUAL_ptr != NULL);
      
      for(int i=0;i<2;i++)  ptr = kstrtok(NULL,NULL,&aux);// gets us to INFO

      //find END if it is there
      char *end_ptr=strstr(ptr,"END=") ;
      if(end_ptr!=NULL) 
	buf[2]=atoi(end_ptr+4)-1;
      else
	buf[2]=buf[1]+alt_len-1;

      ptr  = kstrtok(NULL,NULL,&aux);//FORMAT
      //find index of DP (if present)
      //if not present, dont output anything (indels ignored)

      char *DP_ptr = find_format(ptr,"DP");
      int GQX = 0;
      int QUAL = 0;

      // AH: change code to use the minimum of GQ and QUAL fields if
      // GQX is not defined. See here:
      // https://support.basespace.illumina.com/knowledgebase/articles/144844-vcf-file
      // "GQXGenotype quality. GQX is the minimum of the GQ value
      // and the QUAL column. In general, these are similar values;
      // taking the minimum makes GQX the more conservative measure of
      // genotype quality."
      if(DP_ptr!=NULL) {
	buf[3]=atoi(DP_ptr);
	char *GQX_ptr = find_format(ptr,"GQX");
	if (GQX_ptr == NULL) 
	  {
	    GQX_ptr = find_format(ptr,"GQ");
	    GQX = atoi(GQX_ptr);
	    if (QUAL_ptr[0] != '.') 
	      {
		QUAL = atoi(QUAL_ptr);
		if (QUAL < GQX)
		  GQX = QUAL;
	      }
	  }
	else
	  {
	    GQX = atoi(GQX_ptr);
	  }
	
	//trying to reduce entropy on GQ to get better compression performance.
	//1. rounds down to nearest 10. 
	//2. sets gq to min(gq,100). 
	buf[4]=GQX/10;
	buf[4]*=10;
	if(buf[4]>100) buf[4]=100;

	//	printf("%d\t%d\t%d\t%d\t%d\n",buf[0],buf[1],buf[2],buf[3],buf[4]);
	if(gzwrite(depth_fp,buf,5*sizeof(int))!=(5*sizeof(int)))
	  die("ERROR: problem writing "+(string)out_fname+".tmp");
      }
      if(is_variant) {//wass this a variant? if so write it out to the bcf
	norm_args->ntotal++;
	vcf_parse(&str,hdr_in,bcf_rec);
	//	cerr<<bcf_rec->rid<<":"<<bcf_rec->pos<<endl;
	if(prev_rid!=bcf_rec->rid) 
	  vbuf.flush(variant_fp,hdr_out);
	else
	  vbuf.flush(bcf_rec->pos,variant_fp,hdr_out);
	prev_rid=bcf_rec->rid;
	int32_t pass = bcf_has_filter(hdr_in, bcf_rec, ".");
	bcf_update_format_int32(hdr_out,bcf_rec,"PF",&pass,1);
	bcf_update_filter(hdr_out,bcf_rec,NULL,0);
	if(bcf_rec->n_allele>2) {//split multi-allelics (using vcfnorm.c from bcftools1.3
	  norm_args->nsplit++;
	  split_multiallelic_to_biallelics(norm_args,bcf_rec );
	  for(int i=0;i<norm_args->ntmp_lines;i++){
	    remove_info(norm_args->tmp_lines[i]);
	    if(realign(norm_args,norm_args->tmp_lines[i]) != ERR_REF_MISMATCH)
	      ndec+=decompose(norm_args->tmp_lines[i],hdr_out,vbuf);
	    else
	      if(exit_on_mismatch)
		die("vcf did not match the reference");
	      else
		norm_args->nskipped++;
	  }
	}
	else {
	  remove_info(bcf_rec);
	  if( realign(norm_args,bcf_rec) !=  ERR_REF_MISMATCH)
	    ndec+=decompose(bcf_rec,hdr_out,vbuf);
	  else
	    if(exit_on_mismatch)
	      die("vcf did not match the reference");
	    else
	      norm_args->nskipped++;
	}
	vbuf.flush(bcf_rec->pos,variant_fp,hdr_out);
      }
    }
  }
  vbuf.flush(variant_fp,hdr_out);
  bcf_hdr_destroy(hdr_in);
  bcf_hdr_destroy(hdr_out);
  bcf_destroy1(bcf_rec);
  ks_destroy(ks);
  gzclose(fp);
  gzclose(depth_fp);  
  free(str.s);
  free(work1.s);
  hts_close(variant_fp);
  destroy_data(norm_args);
  fprintf(stderr,"Variant lines   total/split/realigned/skipped:\t%d/%d/%d/%d\n", norm_args->ntotal,norm_args->nsplit,norm_args->nchanged,norm_args->nskipped);
  fprintf(stderr,"Decomposed %d MNPs\n", ndec);


  fprintf(stderr,"Indexing %s\n",out_fname);
  bcf_index_build(out_fname, BCF_LIDX_SHIFT);
  free(out_fname);
  return 0;
}
Example #20
0
int main_bedcov(int argc, char *argv[])
{
    gzFile fp;
    kstring_t str;
    kstream_t *ks;
    hts_idx_t **idx;
    aux_t **aux;
    int *n_plp, dret, i, n, c, min_mapQ = 0;
    int64_t *cnt;
    const bam_pileup1_t **plp;

    while ((c = getopt(argc, argv, "Q:")) >= 0) {
        switch (c) {
        case 'Q': min_mapQ = atoi(optarg); break;
        }
    }
    if (optind + 2 > argc) {
        fprintf(stderr, "Usage: samtools bedcov <in.bed> <in1.bam> [...]\n");
        return 1;
    }
    memset(&str, 0, sizeof(kstring_t));
    n = argc - optind - 1;
    aux = calloc(n, sizeof(aux_t*));
    idx = calloc(n, sizeof(hts_idx_t*));
    for (i = 0; i < n; ++i) {
        aux[i] = calloc(1, sizeof(aux_t));
        aux[i]->min_mapQ = min_mapQ;
        aux[i]->fp = sam_open(argv[i+optind+1], "r");
        idx[i] = sam_index_load(aux[i]->fp, argv[i+optind+1]);
        if (aux[i]->fp == 0 || idx[i] == 0) {
            fprintf(stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
            return 2;
        }
        // TODO bgzf_set_cache_size(aux[i]->fp, 20);
        aux[i]->header = sam_hdr_read(aux[i]->fp);
    }
    cnt = calloc(n, 8);

    fp = gzopen(argv[optind], "rb");
    ks = ks_init(fp);
    n_plp = calloc(n, sizeof(int));
    plp = calloc(n, sizeof(bam_pileup1_t*));
    while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
        char *p, *q;
        int tid, beg, end, pos;
        bam_mplp_t mplp;

        for (p = q = str.s; *p && *p != '\t'; ++p);
        if (*p != '\t') goto bed_error;
        *p = 0; tid = bam_name2id(aux[0]->header, q); *p = '\t';
        if (tid < 0) goto bed_error;
        for (q = p = p + 1; isdigit(*p); ++p);
        if (*p != '\t') goto bed_error;
        *p = 0; beg = atoi(q); *p = '\t';
        for (q = p = p + 1; isdigit(*p); ++p);
        if (*p == '\t' || *p == 0) {
            int c = *p;
            *p = 0; end = atoi(q); *p = c;
        } else goto bed_error;

        for (i = 0; i < n; ++i) {
            if (aux[i]->iter) hts_itr_destroy(aux[i]->iter);
            aux[i]->iter = sam_itr_queryi(idx[i], tid, beg, end);
        }
        mplp = bam_mplp_init(n, read_bam, (void**)aux);
        bam_mplp_set_maxcnt(mplp, 64000);
        memset(cnt, 0, 8 * n);
        while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0)
            if (pos >= beg && pos < end)
                for (i = 0; i < n; ++i) cnt[i] += n_plp[i];
        for (i = 0; i < n; ++i) {
            kputc('\t', &str);
            kputl(cnt[i], &str);
        }
        puts(str.s);
        bam_mplp_destroy(mplp);
        continue;

bed_error:
        fprintf(stderr, "Errors in BED line '%s'\n", str.s);
    }
    free(n_plp); free(plp);
    ks_destroy(ks);
    gzclose(fp);

    free(cnt);
    for (i = 0; i < n; ++i) {
        if (aux[i]->iter) hts_itr_destroy(aux[i]->iter);
        hts_idx_destroy(idx[i]);
        bam_hdr_destroy(aux[i]->header);
        sam_close(aux[i]->fp);
        free(aux[i]);
    }
    free(aux); free(idx);
    free(str.s);
    return 0;
}
Example #21
0
int main(int argc, char *argv[])
{
	int choice = atoi(argv[1]);
	bool isPairEnd = false;

	if (choice) {
	
		char *kmerPath = argv[2];
		char *refPath = argv[3];

		char *taxonomyNodesPath = argv[4];
		char *giTaxidPath = argv[5];
		char *dirPath = argv[6];
		

		//vector<uint64_t> fKmer;
		_kmer = (uint8_t) atoi(argv[7]); 
		string  bwt_s;
		vector<uint32_t> nKmerTaxonID;	
		
		fprintf(stderr,"start preprocessing......\n");
		
	
		uint64_t hash_index_size = (uint64_t)1 <<((PREINDEXLEN<<1) + 1);
		
		uint64_t *hash_index = new uint64_t[hash_index_size]();

		preprocess(refPath, kmerPath, taxonomyNodesPath, giTaxidPath,  bwt_s, nKmerTaxonID, hash_index);

		fprintf(stderr,"writing index....\n");
		//char *dirPath = ".";
		//
		bwt bt(bwt_s.c_str(), bwt_s.length(),hash_index);

		bt.bwt_init();

		bt.write_info(dirPath, nKmerTaxonID);
		//uint64_t sp,ep;	
		//bt.exactMatch("GCTTCGCTGTTATTGGCACCAATTGGATCAC",31, sp,ep);
	} else {
	
		char *readPath;
	
		
		char *taxonomyNodesPath;
		
		char * dirPath;
		
		char *readPath_s;	
		
		
		fprintf(stderr,"%d", argc);	
		if (argc > 7 ) {
			isPairEnd = true;
		
			readPath = argv[2];
		
			readPath_s = argv[3];	
			
			taxonomyNodesPath = argv[4];
			
			dirPath = argv[5];
			_kmer = (uint8_t) atoi(argv[6]);
			
			_interval = atoi(argv[7]);
	
		} else {
		
			readPath = argv[2];
			
			taxonomyNodesPath = argv[3];
			
			
			dirPath = argv[4];
			
			_kmer = (uint8_t) atoi(argv[5]);
			
			_interval = atoi(argv[6]);
		
		}


		--_kmer;
		
		bwt bt(_kmer);
		
		fprintf(stderr,"loading index\n");

		bt.load_info(dirPath);
		
		taxonTree(taxonomyNodesPath);
		//map<uint32_t, uint32_t>::iterator it = taxonomyTree.begin();
		//while (it!=taxonomyTree.end()) {
		//	cout<<it->first<<"\t"<<it->second<<endl;

		//	++it;
		//}
		//cout<<taxonomyTree.size()<<endl;
		fprintf(stderr,"classifying...\n");
		
		gzFile fp;
		
		//uint32_t *nKmerTaxonID = bt.taxonIDTab;	
		
		fp = gzopen(readPath, "r");

		if (!fp) return FILE_OPEN_ERROR;

		kstream_t *_fp = ks_init(fp);

		kseq_t *seqs = (kseq_t *) calloc(N_NEEDED, sizeof(kseq_t));

		if (!seqs) return MEM_ALLOCATE_ERROR;
		//
		//parameters for pair-end reads
		gzFile fp_s;
		kstream_t *_fp_s;
		kseq_t *seqs_s;

		if (isPairEnd) {
		
			fp_s = gzopen(readPath_s, "r");

			if (!fp_s) return FILE_OPEN_ERROR;

			_fp_s  = ks_init(fp_s);

			seqs_s = (kseq_t *) calloc(N_NEEDED, sizeof(kseq_t));
			
			if (!seqs_s) return MEM_ALLOCATE_ERROR;
		
		
		}

		if (!seqs) return MEM_ALLOCATE_ERROR;
		
		cly_r *results = (cly_r *)calloc(2 * N_NEEDED, sizeof(cly_r));




  		struct timeval tv1, tv2;
		
		int n_seqs;
		total_sequences = 0;
		gettimeofday(&tv1,NULL);
		if (isPairEnd) {
		
			while ((n_seqs = read_reads(_fp, seqs, N_NEEDED) )> 0 && read_reads(_fp_s, seqs_s, N_NEEDED)) {
				
				classify_seq(seqs, n_seqs , bt, results, 0);
				classify_seq(seqs_s, n_seqs, bt, results, 1);
				output_results(results, n_seqs, isPairEnd);
				total_sequences += n_seqs;
			}	
		
		
		} else {
		
			while ((n_seqs = read_reads(_fp, seqs, N_NEEDED) )> 0) {
			
				classify_seq(seqs, n_seqs , bt, results, 0);
				output_results(results, n_seqs, isPairEnd);
				total_sequences += n_seqs;

			}	
		
		}	
		gettimeofday(&tv2,NULL);
		report_stats(tv1,tv2);

		//fprintf(stderr,"%f seconds\n",((float)t)/CLOCKS_PER_SEC);
		if (results) free(results);
		if (seqs) free(seqs);

	}
	
	//char *st;
	
	//uint32_t *uts;

	//uint64_t z;
	//load_index(dirPath, st, uts, &z );		
	//cerr<<<<endl;
	//fprintf(stderr,"%s\n",st);
	
	//uint64_t sp, ep;

	//bwt b(st, uts, z);
	
	//b.bwt_init();
	//char *read = "GGCT";
	//cout<<b.exactMatch(read,4,sp,ep )<<endl;
	//cout<<sp<<"\t"<<ep<<endl;	
	//read reads file
	//output(kmerValue, kmerInfo, _2kmers);
	

	return NORMAL_EXIT;

}
Example #22
0
int main(int argc, char *argv[])
{
	int c, dret, lineno = 0, n_rows = 0, m_rows = 0, n_cols = 0, max_hap = 0;
	int64_t n_missing = 0, n_tot = 0;
	gzFile fp;
	kstream_t *ks;
	kstring_t str = {0,0,0};
	int8_t **C = 0;
	double **M, *X, min_maf = 0.0;
	char **names = 0;

//	_MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~(_MM_MASK_INVALID | _MM_MASK_OVERFLOW | _MM_MASK_UNDERFLOW | _MM_MASK_DIV_ZERO));
	_MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~(_MM_MASK_INVALID | _MM_MASK_DIV_ZERO));
	while ((c = getopt(argc, argv, "m:")) >= 0) {
		if (c == 'm') min_maf = atof(optarg);
	}
	if (argc - optind == 0) {
		fprintf(stderr, "Usage: naivepca [-m min_maf] <in.txt>\n");
		return 1;
	}

	fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
	if (fp == 0) {
		fprintf(stderr, "[E::%s] failed to open file '%s'. Abort.\n", __func__, argv[optind]);
		return 2;
	}
	ks = ks_init(fp);

	// read the matrix into C
	while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
		int8_t *q;
		char *p, *name = str.s;
		int i;
		++lineno;
		for (p = str.s; *p && *p != '\t' && *p != ' '; ++p);
		if (*p) {
			*p++ = 0;
			for (; *p && (*p == '\t' || *p == ' '); ++p);
		}
		if (*p == 0) {
			fprintf(stderr, "[W::%s] line %d has one field; skipped.\n", __func__, lineno);
			continue;
		}
		if (n_cols != 0) {
			if (n_cols != str.s + str.l - p) {
				fprintf(stderr, "[W::%s] line %d has a different number of columns; skipped.\n", __func__, lineno);
				continue;
			}
		} else n_cols = str.s + str.l - p;
		if (n_rows == m_rows) {
			m_rows = m_rows? m_rows<<1 : 16;
			C = (int8_t**)realloc(C, m_rows * sizeof(int8_t*));
			names = (char**)realloc(names, m_rows * sizeof(char*));
		}
		names[n_rows] = strdup(name);
		q = C[n_rows++] = (int8_t*)calloc(n_cols, sizeof(double));
		for (i = 0; i < n_cols; ++i) {
			if (p[i] >= '0' && p[i] <= '9') q[i] = p[i] - '0';
			else q[i] = -1, ++n_missing;
			max_hap = max_hap > q[i]? max_hap : q[i];
		}
		n_tot += n_cols;
	}
	free(str.s);
	fprintf(stderr, "[M::%s] read %d samples and %d sites; ploidy is %d\n", __func__, n_rows, n_cols, max_hap);
	fprintf(stderr, "[M::%s] %.3f%% of genotypes are missing\n", __func__, (double)n_missing / n_tot);

	{ // normalize the matrix into M
		int i, j, *sum, *cnt, n_dropped = 0;
		double *mu, *pp;
		sum = (int*)calloc(n_cols, sizeof(int));
		cnt = (int*)calloc(n_cols, sizeof(int));
		mu = (double*)calloc(n_cols, sizeof(double));
		pp = (double*)calloc(n_cols, sizeof(double));
		for (i = 0; i < n_rows; ++i) {
			int8_t *q = C[i];
			for (j = 0; j < n_cols; ++j)
				if (q[j] >= 0) sum[j] += q[j], ++cnt[j];
		}
		for (j = 0; j < n_cols; ++j) {
			if (cnt[j] > 0) {
				mu[j] = (double)sum[j] / cnt[j];
				pp[j] = mu[j] / max_hap;
				if (pp[j] < min_maf || 1. - pp[j] < min_maf) ++n_dropped;
			} else ++n_dropped;
		}
		fprintf(stderr, "[M::%s] %d rare sites are dropped\n", __func__, n_dropped);
		M = (double**)calloc(n_rows, sizeof(double*));
		for (i = 0; i < n_rows; ++i) {
			int8_t *q = C[i];
			double *r;
			r = M[i] = (double*)calloc(n_cols, sizeof(double));
			for (j = 0; j < n_cols; ++j)
				r[j] = q[j] < 0 || pp[j] < min_maf || 1. - pp[j] < min_maf || pp[j] == 0. || 1 - pp[j] == 0. ? 0. : (q[j] - mu[j]) / sqrt(pp[j] * (1. - pp[j]));
		}
		free(sum); free(cnt); free(mu); free(pp);
		for (i = 0; i < n_rows; ++i) free(C[i]);
		free(C);
	}

	{ // multiplication
		int i, j, k;
		X = (double*)calloc(n_rows * n_rows, sizeof(double));
		for (i = 0; i < n_rows; ++i) {
			double *zi = M[i];
			for (j = 0; j <= i; ++j) {
				double t = 0., *zj = M[j];
				for (k = 0; k < n_cols; ++k)
					t += zi[k] * zj[k];
				X[i*n_rows + j] = X[j*n_rows + i] = t / n_cols;
			}
		}
		for (i = 0; i < n_rows; ++i) free(M[i]);
		free(M);
	}

	{ // print eigan vectors
		double *ev;
		int i, j;
		evsrt_t *evsrt;
		ev = (double*)calloc(n_rows, sizeof(double));
		evsrt = (evsrt_t*)calloc(n_rows, sizeof(evsrt_t));
		n_eigen_symm(X, n_rows, ev);
		for (i = 0; i < n_rows; ++i)
			evsrt[i].ev = ev[i], evsrt[i].i = i;
		ks_introsort(ev, n_rows, evsrt);
		for (i = 0; i < n_rows; ++i) {
			printf("%s", names[i]);
			for (j = 0; j < n_rows; ++j)
				printf("\t%.6f", X[i*n_rows + evsrt[j].i] * evsrt[j].ev);
			putchar('\n');
			free(names[i]);
		}
		free(ev); free(evsrt);
		free(X); free(names);
	}
	
	ks_destroy(ks);
	gzclose(fp);
	return 0;
}