// reproject shapes shapes_v shape_proj( const shapes_v* shapes, const char* from, const char* to){ projPJ old_prj = pj_init_plus(from); projPJ new_prj = pj_init_plus(to); shapes_v shapes_prj; kv_init(shapes_prj); shapes_prj.min = (point_t){ INFINITY, INFINITY}; shapes_prj.max = (point_t){-INFINITY,-INFINITY}; double k = 0.0; for(uint32_t s=0; s<shapes->n; s++) { shape_v* shape = &shapes->a[s]; shape_v shape_prj; kv_init(shape_prj); for(uint32_t p=0; p<shape->n; p++) { point_t pnt = shape->a[p]; pnt.x *= DEG_TO_RAD; pnt.y *= DEG_TO_RAD; int32_t err = pj_transform(old_prj, new_prj, 1, 0, &pnt.x, &pnt.y, NULL); if (err) { fprintf(stderr, "ERR%d %s\n", err, pj_strerrno(err)); continue; } // cumulitive average for center if(k>=1.0) { shapes_prj.center.x = (k-1.0)/k*shapes_prj.center.x + pnt.x/k; shapes_prj.center.y = (k-1.0)/k*shapes_prj.center.y + pnt.y/k; }else { shapes_prj.center.x = pnt.x; shapes_prj.center.y = pnt.y; } k+=1.0; // new bounds if(pnt.x>shapes_prj.max.x) shapes_prj.max.x = pnt.x; if(pnt.y>shapes_prj.max.y) shapes_prj.max.y = pnt.y; if(pnt.x<shapes_prj.min.x) shapes_prj.min.x = pnt.x; if(pnt.y<shapes_prj.min.y) shapes_prj.min.y = pnt.y; kv_push(point_t, shape_prj, pnt); } kv_push(shape_v, shapes_prj, shape_prj); } pj_free(old_prj); pj_free(new_prj); return shapes_prj; }
shapes_v shape_load_globe(const char* filename) { shapes_v globe; kv_init(globe); double adfMinBound[4], adfMaxBound[4]; // Read file SHPHandle hSHP = SHPOpen( filename, "rb" ); if(hSHP == NULL) goto end_loading; // Print shape bounds int country_count, shapes_vype; SHPGetInfo( hSHP, &country_count, &shapes_vype, adfMinBound, adfMaxBound ); fprintf(stderr, "Load %d countries\n", country_count); // Iterate through countries for(int i = 0; i < country_count; i++ ) { SHPObject *shp = SHPReadObject(hSHP, i); if(shp == NULL) goto end_loading; if(shp->nParts == 0) continue; // first part starts at point 0 if(shp->panPartStart[0] != 0) goto end_loading; // collect parts of country uint32_t parts = shp->nParts; for (uint32_t j=0; j<parts; j++) { // start index uint32_t s = shp->panPartStart[j]; // end index - start of next minus one, or end uint32_t e = (j+1 < parts) ? shp->panPartStart[j+1]: shp->nVertices; shape_v shape; kv_init(shape); // collect points of part for(uint32_t i=s; i<e; i++){ point_t p = (point_t){shp->padfX[i], shp->padfY[i]}; kv_push(point_t, shape, p); } kv_push(shape_v, globe, shape); } SHPDestroyObject( shp ); } SHPClose( hSHP ); end_loading: return globe; }
triangle_t triangle_new( points_v* p, point_t* a, point_t* b, point_t* c) { uint32_t s = p->n; kv_push(point_t, *p, *a); kv_push(point_t, *p, *b); kv_push(point_t, *p, *c); return (triangle_t) {s, s+1, s+2}; }
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]) { int i, d, max; uint64_v isize[4]; memset(pes, 0, 4 * sizeof(mem_pestat_t)); memset(isize, 0, sizeof(kvec_t(int)) * 4); /* infer based on the first reg from the two reads */ for (i = 0; i < n>>1; ++i) { int dir; int64_t is; mem_alnreg_v *r[2]; r[0] = (mem_alnreg_v*)®s[i<<1|0]; r[1] = (mem_alnreg_v*)®s[i<<1|1]; if (r[0]->n == 0 || r[1]->n == 0) continue; if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) continue; if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) continue; if (r[0]->a[0].rid != r[1]->a[0].rid) continue; // not on the same chr if (r[0]->a[0].bss != r[1]->a[0].bss) continue; /* not on the same bisulfite strand */ dir = mem_infer_dir(l_pac, r[0]->a[0].rb, r[1]->a[0].rb, &is); if (is && is <= opt->max_ins) kv_push(uint64_t, isize[dir], is); } if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n); for (d = 0; d < 4; ++d) { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two. mem_pestat_t *r = &pes[d]; uint64_v *q = &isize[d]; int p25, p50, p75, x; if (q->n < MIN_DIR_CNT) { fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d>>1&1], "FR"[d&1]); r->failed = 1; free(q->a); continue; } else fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d>>1&1], "FR"[d&1]);
static void mem_collect_intv(const SalmonOpts& sopt, const mem_opt_t *opt, SalmonIndex* sidx, int len, const uint8_t *seq, smem_aux_t *a) { const bwt_t* bwt = sidx->bwaIndex()->bwt; int i, k, x = 0, old_n; int start_width = (opt->flag & MEM_F_SELF_OVLP)? 2 : 1; int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); a->mem.n = 0; // first pass: find all SMEMs if (sidx->hasAuxKmerIndex()) { KmerIntervalMap& auxIdx = sidx->auxIndex(); uint32_t klen = auxIdx.k(); while (x < len) { if (seq[x] < 4) { // Make sure there are at least k bases left if (len - x < klen) { x = len; continue; } // search for this key in the auxiliary index KmerKey kmer(const_cast<uint8_t*>(&(seq[x])), klen); auto it = auxIdx.find(kmer); // if we can't find it, move to the next key if (it == auxIdx.end()) { ++x; continue; } // otherwise, start the search using the initial interval @it->second from the hash int xb = x; x = bwautils::bwt_smem1_with_kmer(bwt, len, seq, x, start_width, it->second, &a->mem1, a->tmpv); for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info>>32); // seed length if (slen >= opt->min_seed_len) kv_push(bwtintv_t, a->mem, *p); } } else ++x; }
vec_u8_t digify(int v) { vec_u8_t ret; kv_init(ret); int i = v; do { kv_push(uint8_t, ret, i % 10); i = i / 10; } while(i != 0); return ret; }
/* push a TValue into local stack, returning * index of local */ int BijouBlock_push_local(BijouBlock *b, TValue v) { size_t i; for (i = 0; i < kv_size(b->locals); ++i) { if (TValue_equal(kv_A(b->locals, i), v)) return -1; } kv_push(TValue, b->locals, v); return kv_size(b->locals) - 1; }
uint32_t shape_add_poly( shape_t* shape, poly_t poly) { assert( shape!=NULL ); kv_push(poly_t, shape->polys, poly); return kv_size(shape->polys)-1; }
uint32_t shape_add_point( shape_t* shape, point_t point) { assert( shape!=NULL ); kv_push(point_t, shape->points, point); return kv_size(shape->points)-1; }
/* push a TValue into constants of block * returns index, or -1 if the value has * already been added */ int BijouBlock_push_const(BijouBlock *b, TValue v) { size_t i; for (i = 0; i < kv_size(b->k); ++i) { if (TValue_equal(kv_A(b->k, i), v)) return -1; } kv_push(TValue, b->k, v); return kv_size(b->k) - 1; }
mesh_t* mesh_create(int x, int y) { shape_t* s = shape_new(); loop_t l; kv_init(l); uint32_t steps = 50; for(uint32_t i=0; i<steps; i++) { double dr = 150.0 * cos(2*3.1415926*i/steps*5); point_t p = (point_t) { x/2.0 + (200-dr) * cos(dr/250.0 + 2.0*3.1415926*i/steps), y/2.0 + (200-dr) * sin(dr/250.0 + 2.0*3.1415926*i/steps)}; uint32_t pid = shape_add_point(s, p); kv_push(uint32_t, l, pid); } kv_push(loop_t, s->loops, l); return shape_triangulate(s); }
static kseq_v read_seqs(kseq_t *seq, size_t n_wanted) { kseq_v result; kv_init(result); for (size_t i = 0; i < n_wanted || n_wanted == 0; i++) { if (kseq_read(seq) <= 0) break; kseq_t s; kseq_copy(&s, seq); kv_push(kseq_t, result, s); } return result; }
static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq, smem_aux_t *a) { int i, k, x = 0, old_n; int start_width = (opt->flag & MEM_F_SELF_OVLP)? 2 : 1; int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); a->mem.n = 0; // first pass: find all SMEMs while (x < len) { if (seq[x] < 4) { x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv); for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info>>32); // seed length if (slen >= opt->min_seed_len) kv_push(bwtintv_t, a->mem, *p); } } else ++x;
static void sa_gen1(const rld_t *e, fmsa_t *sa, int64_t k, uint64_v *buf) { int c, mask = (1<<sa->ss) - 1; uint64_t ok[e->asize1], k0 = k, l = 0; size_t i; buf->n = 0; do { ++l; c = rld_rank1a(e, k + 1, ok); k = e->cnt[c] + ok[c] - 1; if (c) { if (((k - e->mcnt[1]) & mask) == 0) { int64_t x = (k - e->mcnt[1]) >> sa->ss; sa->ssa[x] = l; kv_push(uint64_t, *buf, x); } } else sa->r2i[k] = k0; } while (c);
mem_alnreg_v mem_fmeas_fliter_se(mem_alnreg_v a , int n , int l_seq , int mode) { mem_alnreg_v aa ; int i , j ; kvec_t(FF_t) k_ff_t ; kv_init(k_ff_t); kv_init(aa); // caculate FMEAS value if(n == 0) return aa ; for( i = 0 ; i < a.n ; i++){ mem_alnreg_t *p_ar = a.a + i ; for( j = i + 1 ; j < a.n ; j++){ FF_t tmp ; mem_alnreg_t *q_ar = a.a + j ; double sens , spec ; int FN = 0 , TP = 0 ,TN = 0 , FP = 0 ; int A,B,C,D; if( p_ar->qb < q_ar->qb || (p_ar->qb == q_ar->qb && p_ar->qe >= q_ar->qe)){ // p q A = p_ar->qb ; B = p_ar->qe - 1 ; C = q_ar->qb ; D = q_ar->qe - 1 ; }else { // p q A = q_ar->qb ; B = q_ar->qe - 1; C = p_ar->qb ; D = p_ar->qe - 1; } if(B < C){ TP = B - A + D - C + 2 ; FN = l_seq - D - 1 + A + C - B - 1 ; TN = l_seq ; FP = 0 ; }else if( D <= B){ // contain continue ; }else{ TP = D - A + 1 ; FN = l_seq - D - 1 + A ; FP = B - C + 1 ; TN = l_seq - FP; } sens = (double)TP/(double)(TP+FN); spec = (double)TN/(double)(TN+FP); tmp.FMEAS = (2*spec*sens)/(spec+sens); tmp.score = p_ar->score + q_ar->score; tmp.x = i , tmp.y = j ; if(tmp.FMEAS > 0.95) kv_push(FF_t,k_ff_t,tmp); } } ks_introsort(ff_mem_flt, k_ff_t.n, k_ff_t.a); kv_push(mem_alnreg_t,aa,a.a[0]); double max_feas ; // int score ; if( k_ff_t.n == 0 ) return aa; max_feas = k_ff_t.a[0].FMEAS ; // score = k_ff_t.a[0].score ; if(mode){ int cnt = 0 ; for( i = 0 ; i < kv_size(k_ff_t) ; i++){ FF_t p = kv_A(k_ff_t,i); if(p.x == 0 && cnt == 0){ kv_push(mem_alnreg_t,aa,a.a[p.y]); cnt = 1 ; }else if(p.x == 0){ kv_push(mem_alnreg_t,aa,a.a[0]); kv_push(mem_alnreg_t,aa,a.a[p.y]); } } for( i = 0 ; i < kv_size(k_ff_t); i++){ FF_t p = kv_A(k_ff_t,i); if(max_feas != p.FMEAS ) break; if(p.x == 0) continue ; kv_push(mem_alnreg_t,aa,a.a[p.x]); kv_push(mem_alnreg_t,aa,a.a[p.y]); } }else{ int cnt = 0 ; for( i = 0 ; i < kv_size(k_ff_t); i++){ FF_t p = kv_A(k_ff_t,i); if(max_feas != p.FMEAS ) break; if(p.x == 0 && cnt == 0){ kv_push(mem_alnreg_t,aa,a.a[p.y]); continue ; }else if( p.x == 0 ){ kv_push(mem_alnreg_t,aa,a.a[0]); kv_push(mem_alnreg_t,aa,a.a[p.y]); continue ; } kv_push(mem_alnreg_t,aa,a.a[p.x]); kv_push(mem_alnreg_t,aa,a.a[p.y]); } } kv_destroy(k_ff_t); #if 0 for( i = 0 ; i < kv_size(aa); i++){ mem_alnreg_t *q = aa.a + i; printf("%db: %d %de:%d \t" , i, q->qb , i, q->qe); if( i == kv_size(aa) -1 ) printf("\n"); } #endif return aa ; }
static aln_v align_read(const kseq_t *read, const kseq_v targets, const align_config_t *conf) { kseq_t *r; const int32_t read_len = read->seq.l; aln_v result; kv_init(result); kv_resize(aln_t, result, kv_size(targets)); uint8_t *read_num = calloc(read_len, sizeof(uint8_t)); for(size_t k = 0; k < read_len; ++k) read_num[k] = conf->table[(int)read->seq.s[k]]; // Align to each target kswq_t *qry = NULL; for(size_t j = 0; j < kv_size(targets); j++) { // Encode target r = &kv_A(targets, j); uint8_t *ref_num = calloc(r->seq.l, sizeof(uint8_t)); for(size_t k = 0; k < r->seq.l; ++k) ref_num[k] = conf->table[(int)r->seq.s[k]]; aln_t aln; aln.target_idx = j; aln.loc = ksw_align(read_len, read_num, r->seq.l, ref_num, conf->m, conf->mat, conf->gap_o, conf->gap_e, KSW_XSTART, &qry); ksw_global(aln.loc.qe - aln.loc.qb + 1, &read_num[aln.loc.qb], aln.loc.te - aln.loc.tb + 1, &ref_num[aln.loc.tb], conf->m, conf->mat, conf->gap_o, conf->gap_e, 50, /* TODO: Magic number - band width */ &aln.n_cigar, &aln.cigar); aln.nm = 0; size_t qi = aln.loc.qb, ri = aln.loc.tb; for(size_t k = 0; k < aln.n_cigar; k++) { const int32_t oplen = bam_cigar_oplen(aln.cigar[k]), optype = bam_cigar_type(aln.cigar[k]); if(optype & 3) { // consumes both - check for mismatches for(size_t j = 0; j < oplen; j++) { if(UNLIKELY(read_num[qi + j] != ref_num[ri + j])) aln.nm++; } } else { aln.nm += oplen; } if(optype & 1) qi += oplen; if(optype & 2) ri += oplen; } kv_push(aln_t, result, aln); free(ref_num); } free(qry); free(read_num); ks_introsort(dec_score, kv_size(result), result.a); return result; }
countries_v shape_load_countries(const char* filename) { countries_v countries; kv_init(countries); double adfMinBound[4], adfMaxBound[4]; // Read file SHPHandle hSHP = SHPOpen( filename, "rb" ); if(hSHP == NULL) goto end_loading; // Print shape bounds int country_count, shapes_vype; SHPGetInfo( hSHP, &country_count, &shapes_vype, adfMinBound, adfMaxBound ); fprintf(stderr, "Load %d countries\n", country_count); // Iterate through countries for(int i = 0; i < country_count; i++ ) { SHPObject *shp = SHPReadObject(hSHP, i); if(shp == NULL) goto end_loading; if(shp->nParts == 0) continue; // first part starts at point 0 if(shp->panPartStart[0] != 0) goto end_loading; // collect parts of country shapes_v shapes; kv_init(shapes); shapes.min = (point_t){shp->dfXMin, shp->dfYMin}; shapes.max = (point_t){shp->dfXMax, shp->dfYMax}; uint32_t parts = shp->nParts; double k = 0.0; for (uint32_t j=0; j<parts; j++) { // start index uint32_t s = shp->panPartStart[j]; // end index - start of next minus one, or end uint32_t e = (j+1 < parts) ? shp->panPartStart[j+1]: shp->nVertices; shape_v shape; kv_init(shape); // collect points of part for(uint32_t i=s; i<e; i++){ point_t p = (point_t){shp->padfX[i], shp->padfY[i]}; kv_push(point_t, shape, p); // cumulitive average for center if(k>=1.0) { shapes.center.x = (k-1.0)/k*shapes.center.x + p.x/k; shapes.center.y = (k-1.0)/k*shapes.center.y + p.y/k; }else { shapes.center.x = p.x; shapes.center.y = p.y; } k+=1.0; } kv_push(shape_v, shapes, shape); } SHPDestroyObject( shp ); kv_push(shapes_v, countries, shapes); } SHPClose( hSHP ); end_loading: return countries; }
static aln_v align_read(const kseq_t *read, const kseq_v targets, const size_t n_extra_targets, const kseq_v *extra_targets, const align_config_t *conf) { kseq_t *r; const int32_t read_len = read->seq.l; aln_v result; kv_init(result); kv_resize(aln_t, result, kv_size(targets)); uint8_t *read_num = calloc(read_len, sizeof(uint8_t)); for (int k = 0; k < read_len; ++k) read_num[k] = conf->table[(int)read->seq.s[k]]; // Align to each target kswq_t *qry = NULL; int min_score = -1000; int max_score = 0; for (size_t j = 0; j < kv_size(targets); j++) { // Encode target r = &kv_A(targets, j); aln_t aln = align_read_against_one(r, read_len, read_num, &qry, conf, min_score); if (aln.cigar != NULL) { max_score = aln.loc.score > max_score ? aln.loc.score : max_score; min_score = (aln.loc.score - conf->max_drop) > min_score ? (aln.loc.score - conf->max_drop) : min_score; kv_push(aln_t, result, aln); } } /* If no alignments to the first set of targets reached the minimum score, * abort. */ if (max_score < conf->min_score) { // kv_size returns the n field of a kvec_t, which is a size_t. for (size_t i = 0; i < kv_size(result); i++) free(kv_A(result, i).cigar); kv_size(result) = 0; free(qry); free(read_num); return result; } drop_low_scores(&result, 0, conf->max_drop); // Extra references - qe points to the exact end of the sequence int qend = kv_A(result, 0).loc.qe + 1; int read_len_trunc = read_len - qend; uint8_t *read_num_trunc = read_num + qend; free(qry); qry = NULL; if (read_len_trunc > 2) { for (size_t i = 0; i < n_extra_targets; i++) { const size_t idx = n_extra_targets - i - 1; min_score = -1000; const size_t init_count = kv_size(result); for (size_t j = 0; j < kv_size(extra_targets[idx]); j++) { r = &kv_A(extra_targets[idx], j); aln_t aln = align_read_against_one(r, read_len_trunc, read_num_trunc, &qry, conf, min_score); if (aln.cigar != NULL) { min_score = (aln.loc.score - conf->max_drop) > min_score ? (aln.loc.score - conf->max_drop) : min_score; aln.loc.qb += qend; aln.loc.qe += qend; kv_push(aln_t, result, aln); } } drop_low_scores(&result, init_count, conf->max_drop); /* Truncate */ const int alen = kv_A(result, init_count).loc.qe - kv_A(result, init_count).loc.qb; read_len_trunc = read_len_trunc - alen; free(qry); qry = NULL; } } free(qry); free(read_num); return result; }
/* pushes an instruction into the block. returns the index */ int BijouBlock_push_instruction(BijouBlock *b, bInst inst) { kv_push(bInst, b->code, inst); return kv_size(b->code) - 1; }