Example #1
0
Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) {
  bwa_seq_t* sequence = create_sequence(bases,read_length);

  // Calculate paths.
  bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);

  // Check for no alignments found and return null.
  if(sequence->n_aln == 0) {
    bwa_free_read_seq(1,sequence);
    return NULL;
  }

  // bwa_cal_sa_reg_gap destroys the bases / read length.  Copy them back in.
  copy_bases_into_sequence(sequence,bases,read_length);

  // Pick best alignment and propagate its information into the sequence.
  bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);

  // Generate the best alignment from the sequence.
  Alignment* alignment = new Alignment;
  *alignment = generate_final_alignment_from_sequence(sequence);

  bwa_free_read_seq(1,sequence);

  return alignment;
}
Example #2
0
void pe_clean_core(char *fa_fn, clean_opt *opt) {
	bwa_seq_t *seqs, *s = NULL;
	int n_seqs = 0, i = 0;
	char *item = (char*) malloc(BUFSIZE), *solid = malloc(BUFSIZE);
	FILE *solid_file;
	clock_t t = clock();
	GPtrArray *solid_reads = NULL;

	show_debug_msg(__func__, "Loading library %s...\n", fa_fn);
	seqs = load_reads(fa_fn, &n_seqs);

	show_debug_msg(__func__, "Saving k-mer frequencies: %.2f sec...\n",
			(float) (clock() - t) / CLOCKS_PER_SEC);
	sprintf(solid, "%s.solid", opt->lib_name);
	solid_file = xopen(solid, "w");
	solid_reads = calc_solid_reads(seqs, n_seqs, opt, n_seqs * opt->stop_thre,
			0, 1);
	for (i = 0; i < solid_reads->len; i++) {
		s = g_ptr_array_index(solid_reads, i);
		sprintf(item, "%s\n", s->name);
		fputs(item, solid_file);
	}

	free(item);
	free(solid);
	g_ptr_array_free(solid_reads, TRUE);
	bwa_free_read_seq(n_seqs, seqs);
	fclose(solid_file);
}
Example #3
0
static void *correct_thread(void *data) {
	correct_aux_t *d = (correct_aux_t*) data;
	int i = 0;
	bwa_seq_t *s = NULL, *query = NULL, *seqs = d->ht->seqs;
	readarray *low_kmer_reads = d->low_kmer_reads;
	alignarray *aligns = NULL;

	aligns = g_ptr_array_sized_new(N_DEFAULT_ALIGNS);
	for (i = d->start; i < d->end; i++) {
		if (i % 10000 == 0)
			show_msg(__func__,
					"Thread %d correction progress: [%d,%d,%d]... \n", d->tid,
					d->start, i, d->end);
		s = g_ptr_array_index(low_kmer_reads, i);
		if (is_repetitive_q(s)) {
			s->status = USED;
			continue;
		}
		// Only the fresh reads, or the reads tried once would be corrected.
		if (s->status != FRESH)
			continue;
		query = new_seq(s, s->len - 8, 0);
		pe_aln_query(s, s->seq, d->ht, MISMATCHES, s->len, 0, aligns);
		pe_aln_query(s, s->rseq, d->ht, MISMATCHES, s->len, 1, aligns);
		if (aligns->len >= 4)
			correct_bases(seqs, s, aligns, d->tid);
		s->status = TRIED;
		reset_alg(aligns);
		bwa_free_read_seq(1, query);
		//if (i > 10000)
		//	break;
	}
	free_alg(aligns);
	show_msg(__func__, "Thread %d finished. \n", d->tid);
}
Example #4
0
bwa_alg *pe_aln_core(bwa_seq_t *seqs, const gap_opt_t *opt,
		const bwt_t *bwt_0, const bwt_t *bwt_1) {
	clock_t t;
	bwt_t *bwt[2];
	bwa_alg *align = (bwa_alg*) malloc(sizeof(bwa_alg));

	bwt[0] = bwt_0;
	bwt[1] = bwt_1;
	t = clock();
	int n_seqs = 1;

#ifdef HAVE_PTHREAD
	if (opt->n_threads <= 1) { // no multi-threading at all
		bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
	} else {
		pthread_t *tid;
		pthread_attr_t attr;
		thread_aux_t *data;
		int j;
		pthread_attr_init(&attr);
		pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
		data = (thread_aux_t*) calloc(opt->n_threads, sizeof(thread_aux_t));
		tid = (pthread_t*) calloc(opt->n_threads, sizeof(pthread_t));
		for (j = 0; j < opt->n_threads; ++j) {
			data[j].tid = j;
			data[j].bwt[0] = bwt[0];
			data[j].bwt[1] = bwt[1];
			data[j].n_seqs = n_seqs;
			data[j].seqs = seqs;
			data[j].opt = opt;
			pthread_create(&tid[j], &attr, worker, data + j);
		}
		for (j = 0; j < opt->n_threads; ++j)
			pthread_join(tid[j], 0);
		free(data);
		free(tid);
	}
#else
	bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
#endif

	t = clock();
	bwa_seq_t *p = seqs;
	align->n_algs = p->n_aln;
	if (p->n_aln) {
		align->algs = (bwt_aln1_t*) calloc(p->n_aln, sizeof(bwt_aln1_t));
		memcpy(align->algs, p->aln, sizeof(bwt_aln1_t) * p->n_aln);
	}

	bwa_free_read_seq(n_seqs, seqs);
	return align;
}
Example #5
0
void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count) 
{
  bwa_seq_t* sequence = create_sequence(bases, read_length);

  // Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln).
  // This method will destroy the contents of seq and rseq.
  bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);

  paths = new bwt_aln1_t[sequence->n_aln];
  memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t));
  num_paths = sequence->n_aln;

  // Call aln2seq to initialize the type of match present.
  bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
  best_path_count = sequence->c1;
  second_best_path_count = sequence->c2;

  bwa_free_read_seq(1,sequence);
}
Example #6
0
File: bwtaln.c Project: lukk01/obwa
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
	int i, n_seqs, tot_seqs = 0;
	bwa_seq_t *seqs;
	bwa_seqio_t *ks;
	clock_t t;
	bwt_t *bwt;

	// initialization
	ks = bwa_open_reads(opt->mode, fn_fa);

	{ // load BWT
		char *str = (char*)calloc(strlen(prefix) + 10, 1);
		strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
		free(str);
	}

	// core loop
	err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
	while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
		tot_seqs += n_seqs;
		t = clock();

		fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... ");

#ifdef HAVE_PTHREAD
		if (opt->n_threads <= 1) { // no multi-threading at all
			bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
		} else {
			pthread_t *tid;
			pthread_attr_t attr;
			thread_aux_t *data;
			int j;
			pthread_attr_init(&attr);
			pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
			data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
			tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
			for (j = 0; j < opt->n_threads; ++j) {
				data[j].tid = j; data[j].bwt = bwt;
				data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;
				pthread_create(&tid[j], &attr, worker, data + j);
			}
			for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
			free(data); free(tid);
		}
#else
		bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
#endif

		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

		t = clock();
		fprintf(stderr, "[bwa_aln_core] write to the disk... ");
		for (i = 0; i < n_seqs; ++i) {
			bwa_seq_t *p = seqs + i;
			err_fwrite(&p->n_aln, 4, 1, stdout);
			if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
		}
		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

		bwa_free_read_seq(n_seqs, seqs);
		fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
	}

	// destroy
	bwt_destroy(bwt);
	bwa_seq_close(ks);
}
Example #7
0
void BWA::generate_alignments_from_paths(const char* bases, 
                                         const unsigned read_length, 
                                         bwt_aln1_t* paths, 
                                         const unsigned num_paths, 
                                         const unsigned best_count,
                                         const unsigned second_best_count,
                                         Alignment*& alignments, 
                                         unsigned& num_alignments) 
{
  bwa_seq_t* sequence = create_sequence(bases,read_length);

  sequence->aln = paths;
  sequence->n_aln = num_paths;

  // (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself.
  bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);

  // But overwrite key parts of the sequence in case the user passed back only a smaller subset
  // of the paths.
  sequence->c1 = best_count;
  sequence->c2 = second_best_count;
  sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;

  num_alignments = 0;
  for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
    num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;

  alignments = new Alignment[num_alignments];
  unsigned alignment_idx = 0;

  for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
    // Stub in a 'working' path, so that only the desired alignment is local-aligned.
    const bwt_aln1_t* path = paths + path_idx;
    bwt_aln1_t working_path = *path;

    // Loop through all alignments, aligning each one individually.
    for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) {
      working_path.k = working_path.l = sa_idx;
      sequence->aln = &working_path;
      sequence->n_aln = 1;

      sequence->sa = sa_idx;
      sequence->strand = path->a;
      sequence->score = path->score;

      // Each time through bwa_refine_gapped, seq gets reversed.  Revert the reverse.
      // TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
      if(alignment_idx > 0)
        seq_reverse(sequence->len, sequence->seq, 0);

      // Copy the local alignment data into the alignment object.
      *(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence);

      alignment_idx++;
    }
  }

  sequence->aln = NULL;
  sequence->n_aln = 0;

  bwa_free_read_seq(1,sequence);
}
Example #8
0
File: bwtaln.c Project: xied75/bwa
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
{
    int i, n_seqs, tot_seqs = 0;
    bwa_seq_t *seqs;
    bwa_seqio_t *ks;
    clock_t t;
    bwt_t *bwt;

    // initialization
    ks = bwa_open_reads(opt->mode, fn_fa);

    { // load BWT
        char *str = (char*)calloc(strlen(prefix) + 10, 1);
        strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
        free(str);
    }

    // core loop
    err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
    while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
        tot_seqs += n_seqs;
        t = clock();

        fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... ");

#ifdef THREAD
        if (opt->n_threads <= 1) { // no multi-threading at all
            bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
        } else {
            DWORD ThreadID;
            HANDLE *tid;
            thread_aux_t *data;
            int j;
            data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
            tid = (HANDLE*)calloc(opt->n_threads, sizeof(HANDLE));
            for (j = 0; j < opt->n_threads; ++j) {
                data[j].tid = j; data[j].bwt = bwt;
                data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;

                //create threads
                tid[j] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) worker, data + j, 0, &ThreadID);
                if (tid[j] == NULL)
                {
                    printf("CreateThread error: %d\n", GetLastError());
                    return;
                }
            }
            // Wait for all threads to terminate
            WaitForMultipleObjects(opt->n_threads, &tid[0], TRUE, INFINITE);
            free(data); free(tid);
        }
#else
        bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
#endif

        fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

        t = clock();
        fprintf(stderr, "[bwa_aln_core] write to the disk... ");
        for (i = 0; i < n_seqs; ++i) {
            bwa_seq_t *p = seqs + i;
            err_fwrite(&p->n_aln, 4, 1, stdout);
            if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
        }
        fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

        bwa_free_read_seq(n_seqs, seqs);
        fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
    }

    // destroy
    bwt_destroy(bwt);
    bwa_seq_close(ks);
}
Example #9
0
int bwa_read_seq1(bwa_seqio_t *bs, int iter, int tid, int thrds, bwa_seq_t **_seqs, int *n_avail, int mode, int trim_qual)
{
	bwa_seq_t *p;
   bwa_seq_t *seqs = *_seqs;
	kseq_t *seq = bs->ks;
	int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24;
	long n_trimmed = 0, n_tot = 0;
   bool first;

	if (l_bc > BWA_MAX_BCLEN) {
		fprintf(stderr, "[%s] the maximum barcode length is %d.\n", __func__, BWA_MAX_BCLEN);
		return 0;
	}
	if (bs->is_bam) {
      fprintf (stderr, "IS BAM! --- Port bwa_read_bam function\n");
      bwa_free_read_seq(*n_avail, seqs);
      // return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input
      exit(0);
   }
   if (*n_avail == 0) {
      if (*n_avail) bwa_free_read_seq(*n_avail, seqs);
	   seqs = (bwa_seq_t*)calloc(READ_SEQ_SIZE, sizeof(bwa_seq_t));
      *_seqs = seqs;
      *n_avail = READ_SEQ_SIZE;
   }
	n_seqs = 0;
   first = true;
//err_fwrite("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", strlen("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"), 1, stdout);
//long cksm = 0;
	while ((l = kseq_read1(seq, iter, tid, thrds, &first)) >= 0) {
		if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) {
			// skip reads that are marked to be filtered by Casava
			char *s = index(seq->comment.s, ':');
			if (s && *(++s) == 'Y') {
				continue;
			}
		}
		if (is_64 && seq->qual.l)
			for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
		if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
		p = &seqs[n_seqs++];
      if(n_seqs > READ_SEQ_SIZE) {
         fprintf (stderr, "READ_SEQ_SIZE not big enough\n");
         abort();
      }
      init_bwa_seq_t(p);
		if (l_bc) { // then trim barcode
			for (i = 0; i < l_bc; ++i)
				p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]);
			p->bc[i] = 0;
			for (; i < seq->seq.l; ++i)
				seq->seq.s[i - l_bc] = seq->seq.s[i];
			seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0;
			if (seq->qual.l) {
				for (i = l_bc; i < seq->qual.l; ++i)
					seq->qual.s[i - l_bc] = seq->qual.s[i];
				seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0;
			}
			l = seq->seq.l;
		} else p->bc[0] = 0;
//ComputeChecksum(seq->seq.s,seq->seq.l,&cksm);
		p->tid = -1; // no assigned to a thread
		p->full_len = p->clip_len = p->len = l;
		n_tot += p->full_len;
      if (p->llen < p->len) {
         if(p->seq) free(p->seq);
         p->llen = p->len;
		   p->seq = (ubyte_t*)calloc(p->len, 1);
		   p->rseq = (ubyte_t*)calloc(p->full_len, 1);
      }
		for (i = 0; i != p->full_len; ++i)
			p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
		if (seq->qual.l) { // copy quality
         if(p->qual) free(p->qual);
			p->qual = (ubyte_t*)strdup((char*)seq->qual.s);
			if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
		}
		memcpy(p->rseq, p->seq, p->len);
		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
		seq_reverse(p->len, p->rseq, is_comp);
      if(p->name) free(p->name);
		p->name = strdup((const char*)seq->name.s);
		{ // trim /[12]$
			int t = strlen(p->name);
			if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0';
		}
		//if (n_seqs == n_needed) break;
		if(kseq_end(seq)) break;
	}
	if (n_seqs && trim_qual >= 1)
		fprintf(stderr, "[bwa_read_seq1] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
//fprintf(stderr, "%d tid: %d sequences: %d cksum: %lx\n", iter, tid, n_seqs, cksm);
	return n_seqs;
}
Example #10
0
extern "C" void bwa_seed2genome_cleanup_seq(bwa_seq_t *seq)
{
	bwa_free_read_seq(1, seq) ;
}