Esempio n. 1
0
File: dbask.c Progetto: sergiy8/fsha
static unsigned dbget_raw(struct fheader * h, unsigned busy){
	uint32_t idx;
	if( h->wbit){
	 idx = search(busy,blist[h->rank],cnk[h->rank]);
	 return  twobit_get(h->data,idx);
	}
	if (h->sorted){
		idx = search(busy,(uint32_t *)(h->data),h->count0);
		if (idx != -1)
			return r0[h->value];
		idx = search(busy,(uint32_t *)(h->data)+h->count0,h->count1);
		if (idx != -1)
			return r1[h->value];
	}else {
		for(idx=0;idx<h->count0;idx++)
			if(((uint32_t*)(h->data))[idx] == busy)
				return r0[h->value];
		for(;idx<h->count0+h->count1;idx++)
			if(((uint32_t*)(h->data))[idx] == busy)
				return r1[h->value];
	}
	if (h->full){
		return h->value;
	}
	error("Not in db");
}
Esempio n. 2
0
void seqenc_encode_alignment(
        seqenc_t* E,
        uint32_t spos, uint8_t strand,
        const unsigned char* query_str,
        const twobit_t* query)
{
    size_t qlen = twobit_len(query);
    size_t slen = E->supercontig_motif.n;

    assert(spos < slen);

    dist2_encode(E->ac, &E->d_type, SEQENC_TYPE_ALIGNMENT);

    /* encode N mask */
    size_t i;
    reserve_nmask(E, qlen);
    for (i = 0; i < qlen; ++i) {
        dist2_encode(E->ac, &E->d_nmask[i],
            query_str[i] == 'N' ? 1 : 0);
    }

    dist2_encode(E->ac, &E->d_aln_strand, strand);
    uint32_enc_encode(E->ac, &E->d_contig_off, spos);

    kmer_t u;
    if (strand) {
        for (i = 0; i < qlen; ++i) {
            if (query_str[i] == 'N') continue;
            u = kmer_comp1(twobit_get(query, i));
            cond_dist4_encode(E->ac, &E->supercontig_motif, slen - (spos + i) - 1, u);
        }
    }
    else {
        for (i = 0; i < qlen; ++i) {
            if (query_str[i] == 'N') continue;
            u = twobit_get(query, i);
            cond_dist4_encode(E->ac, &E->supercontig_motif, spos + i, u);
        }
    }
}
Esempio n. 3
0
void seqenc_set_supercontig(seqenc_t* E, const twobit_t* supercontig)
{
    size_t len = twobit_len(supercontig);
    if (len == 0) return;

    cond_dist4_init(&E->supercontig_motif, len);
    cond_dist4_set_update_rate(&E->supercontig_motif, motif_update_rate);
    size_t i;
    kmer_t u;
    for (i = 0; i < len; ++i) {
        u = twobit_get(supercontig, i);
        E->supercontig_motif.xss[i].xs[u].count = contig_motif_prior;
        dist4_update(&E->supercontig_motif.xss[i]);
    }
}
Esempio n. 4
0
void seqenc_encode_twobit_seq(seqenc_t* E, const unsigned char* x_str, const twobit_t* x)
{
    dist2_encode(E->ac, &E->d_type, SEQENC_TYPE_SEQUENCE);

    size_t n = twobit_len(x);
    if (n == 0) return;

    kmer_t uv;
    uint32_t ctx = 0;
    size_t i;

    for (i = 0; i < n - 1 && i / 2 < prefix_len; i += 2) {
        uv = (twobit_get(x, i) << 2) | twobit_get(x, i + 1);
        cond_dist16_encode(E->ac, &E->cs0[i/2], ctx, uv);
        ctx = ((ctx << 4) | uv) & E->ctx_mask;
    }

    for (; i < n - 1; i += 2) {
        uv = (twobit_get(x, i) << 2) | twobit_get(x, i + 1);
        cond_dist16_encode(E->ac, &E->cs, ctx, uv);
        ctx = ((ctx << 4) | uv) & E->ctx_mask;
    }

    /* handle odd read lengths */
    if (i < n) {
        uv = twobit_get(x, i);
        cond_dist16_encode(E->ac, &E->cs, ctx, uv);
    }

    /* encode N mask */
    reserve_nmask(E, n);
    for (i = 0; i < n; ++i) {
        dist2_encode(E->ac, &E->d_nmask[i],
            x_str[i] == 'N' ? 1 : 0);
    }
}
Esempio n. 5
0
File: ask.c Progetto: sergiy8/fsha
int main(int argc, char ** argv){
	uint32_t  x,b,w,d;
	unsigned char * array;
	int rank;
	int idx = 0;
	if(argc!=4) panic();
	b = getarg(1);
	w = getarg(2);
	d = getarg(3);
	rank = _popc(b);
        array = malloc_file(ARRAY_SIZE_S(rank),FMODE_RO,DATA_FORMAT,rank);
// search index
	for(x=ALLONE(rank);x!=b;x=_permut(x))
		idx++;
	if ( blist_get(b) != idx )
		error("What's f***a with blist\n");
//
	int res = twobit_get(array+(uint64_t)((w<<rank)|d)*cnk(32,rank)/4,idx);
	printf("%08X %X %X = %d\n",b,w,d,res);
	return 0;
}
Esempio n. 6
0
static void seqenc_decode_reference_alignment(seqenc_t* E, short_read_t* r, size_t seqlen)
{
    const twobit_t* refseq = seqmap_get(E->ref, (const char*) r->seqname.s);

    if (refseq == NULL) {
        quip_error(
            "A read was aligned to sequence %s, which was not found in the reference.",
            r->seqname.s);
    }

    str_reserve(&r->seq, seqlen + 1);
    r->seq.n = 0;

    /* decode N mask */
    size_t i;
    reserve_nmask(E, seqlen);
    memset(r->seq.s, '\0', seqlen + 1);
    for (i = 0; i < seqlen; ++i) {
        if (dist2_decode(E->ac, &E->d_nmask[i])) r->seq.s[i] = 'N';
    }

    uint32_t ref_pos   = r->pos;
    uint32_t read_pos  = 0;

    i = 0;    /* cigar operation */
    size_t j; /* position within the cigar op */

    kmer_t y; /* reference nucleotide */

    for (i = 0; i < r->cigar.n; ++i) {
        switch (r->cigar.ops[i]) {
            case BAM_CEQUAL:
            case BAM_CDIFF:
            case BAM_CMATCH:
                for (j = 0; j < r->cigar.lens[i]; ++j, ++read_pos, ++ref_pos) {
                    if (r->seq.s[read_pos] == 'N') continue;

                    if (dist2_decode(E->ac, &E->d_ref_match) == SEQENC_REF_MATCH) {
                        y = twobit_get(refseq, ref_pos);
                        r->seq.s[read_pos] = kmertochar[y];
                    }
                    else {
                        r->seq.s[read_pos] = kmertochar[dist4_decode(E->ac, &E->d_ref_ins_nuc)];
                    }
                }
                break;

            case BAM_CINS:
                for (j = 0; j < r->cigar.lens[i]; ++j, ++read_pos) {
                    if (r->seq.s[read_pos] == 'N') continue;
                    r->seq.s[read_pos] = kmertochar[dist4_decode(E->ac, &E->d_ref_ins_nuc)];
                }
                break;

            case BAM_CDEL:
                ref_pos += r->cigar.lens[i];
                break;

            case BAM_CREF_SKIP:
                ref_pos += r->cigar.lens[i];
                break;

            case BAM_CSOFT_CLIP:
                for (j = 0; j < r->cigar.lens[i]; ++j, ++read_pos) {
                    if (r->seq.s[read_pos] == 'N') continue;
                    r->seq.s[read_pos] = kmertochar[dist4_decode(E->ac, &E->d_ref_ins_nuc)];
                }
                break;

            case BAM_CHARD_CLIP:
                ref_pos += r->cigar.lens[i];
                break;

            case BAM_CPAD:
                quip_error("Unsupported cigar operation.");
                break;
        }
    }
    r->seq.s[seqlen] = '\0';
    r->seq.n = seqlen;

    if (read_pos != seqlen) {
        quip_error("Cigar operations do not account for full read length.");
    }

    if (r->strand) str_revcomp(r->seq.s, r->seq.n);
}
Esempio n. 7
0
void seqenc_encode_reference_alignment(
        seqenc_t* E, const short_read_t* r)
{
    const twobit_t* refseq = seqmap_get(E->ref, (const char*) r->seqname.s);

    if (refseq == NULL) {
        quip_error(
            "A read was aligned to sequence %s, which was not found in the reference.\n",
            r->seqname.s);
    }

    str_copy(&E->tmpseq, &r->seq);
    if (r->strand) {
        str_revcomp(E->tmpseq.s, E->tmpseq.n);
    }

    /* encode N mask */
    size_t i;
    reserve_nmask(E, r->seq.n);
    for (i = 0; i < r->seq.n; ++i) {
        dist2_encode(E->ac, &E->d_nmask[i],
            E->tmpseq.s[i] == 'N' ? 1 : 0);
    }

    uint32_t ref_pos   = r->pos;
    uint32_t read_pos  = 0;

    i = 0;    /* cigar operation */
    size_t j; /* position within the cigar op */

    kmer_t x; /* read nucleotide */
    kmer_t y; /* reference nucleotide */

    for (i = 0; i < r->cigar.n; ++i) {
        switch (r->cigar.ops[i]) {
            case BAM_CEQUAL:
            case BAM_CDIFF:
            case BAM_CMATCH:
                for (j = 0; j < r->cigar.lens[i]; ++j, ++read_pos, ++ref_pos) {
                    if (E->tmpseq.s[read_pos] == 'N') continue;
                       
                    x = chartokmer[E->tmpseq.s[read_pos]];
                    y = twobit_get(refseq, ref_pos);

                    if (x == y) {
                        dist2_encode(E->ac, &E->d_ref_match, SEQENC_REF_MATCH);
                    }
                    else {
                        dist2_encode(E->ac, &E->d_ref_match, SEQENC_REF_MISMATCH);
                        dist4_encode(E->ac, &E->d_ref_ins_nuc, chartokmer[E->tmpseq.s[read_pos]]);
                    }
                }
                break;

            case BAM_CINS:
                for (j = 0; j < r->cigar.lens[i]; ++j, ++read_pos) {
                    if (E->tmpseq.s[read_pos] == 'N') continue;
                    dist4_encode(E->ac, &E->d_ref_ins_nuc, chartokmer[E->tmpseq.s[read_pos]]);
                }
                break;

            case BAM_CDEL:
                ref_pos += r->cigar.lens[i];
                break;

            case BAM_CREF_SKIP:
                ref_pos += r->cigar.lens[i];
                break;

            case BAM_CSOFT_CLIP:
                for (j = 0; j < r->cigar.lens[i]; ++j, ++read_pos) {
                    if (E->tmpseq.s[read_pos] == 'N') continue;
                    dist4_encode(E->ac, &E->d_ref_ins_nuc, chartokmer[E->tmpseq.s[read_pos]]);
                }
                break;

            case BAM_CHARD_CLIP:
                ref_pos += r->cigar.lens[i];
                break;

            case BAM_CPAD:
                quip_error("Cigar PAD operation is unsupported.");
                break;
        }
    }

    if (read_pos != r->seq.n) {
        quip_error("Cigar operations do not account for full read length.");
    }
}