Пример #1
0
pool *pet_cvg(const char *pet_fn, const ass_opt *opt) {
	bwa_seq_t *pets, *query, *p, *p2;
	int i = 0, j = 0, k = 0;
	index64 mate_i = 0;
	pool *good_pets = new_pool(), *repeat_pets = new_pool();
	alignarray *align, *align_2;
	alg *a;
	hash_table *ht;

	ht = pe_load_hash(pet_fn);
	pets = ht->seqs;

	fprintf(stderr, "[pe_cvg] Converging RNA-PETs... \n");
	// for (i = n_pets - 1; i >= 0; i -= 2) {
	for (i = 0; i < ht->n_seqs; i += 2) {
		p = &pets[i];
		p2 = &pets[i + 1];
		if (binary_exists(repeat_pets->reads, p) || binary_exists(good_pets->reads, p))
			continue;

		for (k = p->len - opt->ol; k >= 0; k--) {
			query = new_seq(p, opt->ol, k);
			// p_query(query);
			pe_aln_query(query, query->seq, ht, opt->nm + 2, opt->ol, 0, align);
			pool_sort_ins(good_pets, p);
			// p_align(align);

			query = new_seq(p2, opt->ol, k);
			// p_query(query);
			pe_aln_query(query, query->seq, ht, opt->nm + 2, opt->ol, 0, align_2);
			pool_sort_ins(good_pets, p2);
			// p_align(align_2);
			for (j = 0; j < align->len; j++) {
				a = g_ptr_array_index(align, j);
				// The aligned seq is the query itself
				if (a->r_id == atoll(p->name))
					continue;
				mate_i = get_mate_index(a->r_id);
				// If the right mate is also aligned
				if (!aligned(align_2, mate_i))
					continue;
				pool_sort_ins(repeat_pets, &pets[a->r_id]);
				pool_sort_ins(repeat_pets, &pets[mate_i]);
			}
		}
		// p_pool("Good Pets: ", good_pets);
		// p_pool("Repeat Pets: ", repeat_pets);
	}

	fprintf(stderr, "[pet_cvg] Converged to %zd RNA-PETs... \n", (good_pets->n));
	fprintf(stderr, "[pet_cvg] ------------------------------ \n");
	//	p_pool("Good Pets: ", good_pets);
	return good_pets;
}
Пример #2
0
static void *correct_thread(void *data) {
	correct_aux_t *d = (correct_aux_t*) data;
	int i = 0;
	bwa_seq_t *s = NULL, *query = NULL, *seqs = d->ht->seqs;
	readarray *low_kmer_reads = d->low_kmer_reads;
	alignarray *aligns = NULL;

	aligns = g_ptr_array_sized_new(N_DEFAULT_ALIGNS);
	for (i = d->start; i < d->end; i++) {
		if (i % 10000 == 0)
			show_msg(__func__,
					"Thread %d correction progress: [%d,%d,%d]... \n", d->tid,
					d->start, i, d->end);
		s = g_ptr_array_index(low_kmer_reads, i);
		if (is_repetitive_q(s)) {
			s->status = USED;
			continue;
		}
		// Only the fresh reads, or the reads tried once would be corrected.
		if (s->status != FRESH)
			continue;
		query = new_seq(s, s->len - 8, 0);
		pe_aln_query(s, s->seq, d->ht, MISMATCHES, s->len, 0, aligns);
		pe_aln_query(s, s->rseq, d->ht, MISMATCHES, s->len, 1, aligns);
		if (aligns->len >= 4)
			correct_bases(seqs, s, aligns, d->tid);
		s->status = TRIED;
		reset_alg(aligns);
		bwa_free_read_seq(1, query);
		//if (i > 10000)
		//	break;
	}
	free_alg(aligns);
	show_msg(__func__, "Thread %d finished. \n", d->tid);
}
Пример #3
0
OBJ FileRead_P(OBJ filename, generated::ENV &) {
  char *fname = obj_to_str(filename);
  int size;
  char *data = file_read(fname, size);
  delete_byte_array(fname, strlen(fname)+1);

  if (size == -1)
    return make_symb(symb_idx_nothing);

  OBJ seq_obj = make_empty_seq();
  if (size > 0) {
    SEQ_OBJ *seq = new_seq(size);
    for (uint32 i=0 ; i < size ; i++)
      seq->buffer[i] = make_int((uint8) data[i]);
    delete_byte_array(data, size);
    seq_obj = make_seq(seq, size);
  }

  return make_tag_obj(symb_idx_just, seq_obj);
}
Пример #4
0
void Caller::create_node_calls(const NodePileup& np) {
    
    int n = _node->sequence().length();
    const string& seq = _node->sequence();
    int cur = 0;
    int cat = call_cat(_node_calls[cur]);
    NodePair prev_nodes(-1, -1);

    // scan contiguous chunks of a node with same call
    // (note: snps will always be 1-base -- never merged)
    for (int next = 1; next <= n; ++next) {
        int next_cat = next == n ? -1 : call_cat(_node_calls[next]);
        if (cat == 2 || cat != next_cat) {
            NodePair new_nodes(-1, -1);
            bool secondary_snp = false;

            // process first genotype if it's not missing
            if (_node_calls[cur].first == '.') {
                // add single node for stretch of reference node
                string new_seq = seq.substr(cur, next - cur);
                new_nodes.first = ++_max_id;
                _call_graph.create_node(new_seq, new_nodes.first);
            } else if (_node_calls[cur].first != '-') {
                // add snp node
                assert(next - cur == 1);
                string new_seq(1, _node_calls[cur].first);
                new_nodes.first = ++_max_id;
                _call_graph.create_node(new_seq, new_nodes.first);
                create_snp_path(new_nodes.first, secondary_snp);
                secondary_snp = true;
            }

            // process second genotype if difference from first
            if (_node_calls[cur].second != _node_calls[cur].first) {
                if (_node_calls[cur].second == '.') {
                    // add single node for stretch of reference node
                    string new_seq = seq.substr(cur, next - cur);
                    new_nodes.second = ++_max_id;
                    _call_graph.create_node(new_seq, new_nodes.second);
                } else if (_node_calls[cur].second != '-') {
                    // add snp node
                    assert(next - cur == 1);
                    string new_seq(1, _node_calls[cur].second);
                    new_nodes.second = ++_max_id;
                    _call_graph.create_node(new_seq, new_nodes.second);
                    create_snp_path(new_nodes.second, secondary_snp);
                }
            }
            
            // update maps if new node abuts end of original node
            // so that edges can be updated later on:
            if (new_nodes.first != -1 || new_nodes.second != -1) {
                if (cur == 0) {
                    _start_node_map[_node->id()] = new_nodes;
                }
                if (next == n) {
                    _end_node_map[_node->id()] = new_nodes;
                }
            }

            // add edges
            if (prev_nodes.first != -1 && new_nodes.first != -1) {
                _call_graph.create_edge(prev_nodes.first, new_nodes.first);
            }
            if (prev_nodes.first != -1 && new_nodes.second != -1) {
                _call_graph.create_edge(prev_nodes.first, new_nodes.second);
            }
            if (prev_nodes.second != -1 && new_nodes.first != -1) {
                _call_graph.create_edge(prev_nodes.second, new_nodes.first);
            }
            if (prev_nodes.second != -1 && new_nodes.second != -1) {
                _call_graph.create_edge(prev_nodes.second, new_nodes.second);
            }

            // shift right
            cur = next;
            cat = next_cat;
            prev_nodes = new_nodes;
        }
    }
}