Esempio n. 1
0
int main(int argc, char *argv[])
{
	fml_opt_t opt;
	int c, n_seqs, n_utg, gfa_out = 0;
	bseq1_t *seqs;
	fml_utg_t *utg;

	fml_opt_init(&opt);
	while ((c = getopt(argc, argv, "gOAe:l:r:t:c:d:v:")) >= 0) {
		if (c == 'e') opt.ec_k = atoi(optarg);
		else if (c == 'l') opt.min_asm_ovlp = atoi(optarg);
		else if (c == 'r') opt.mag_opt.min_dratio1 = atof(optarg);
		else if (c == 'A') opt.mag_opt.flag |= MAG_F_AGGRESSIVE;
		else if (c == 'O') opt.mag_opt.flag &= ~MAG_F_POPOPEN;
		else if (c == 'd') opt.mag_opt.max_bdiff = atoi(optarg);
		else if (c == 't') opt.n_threads = atoi(optarg);
		else if (c == 'g') gfa_out = 1;
		else if (c == 'v') fm_verbose = atoi(optarg);
		else if (c == 'c') {
			char *p;
			opt.min_cnt = strtol(optarg, &p, 10);
			if (*p == ',') opt.max_cnt = strtol(p + 1, &p, 10);
		}
	}
	if (argc == optind) {
		fprintf(stderr, "Usage: fml-asm [options] <in.fq>\n");
		fprintf(stderr, "Options:\n");
		fprintf(stderr, "  -e INT          k-mer length for error correction (0 for auto; -1 to disable) [%d]\n", opt.ec_k);
		fprintf(stderr, "  -c INT1[,INT2]  range of k-mer & read count thresholds for ec and graph cleaning [%d,%d]\n", opt.min_cnt, opt.max_cnt);
		fprintf(stderr, "  -l INT          min overlap length during initial assembly [%d]\n", opt.min_asm_ovlp);
		fprintf(stderr, "  -r FLOAT        drop an overlap if its length is below maxOvlpLen*FLOAT [%g]\n", opt.mag_opt.min_dratio1);
		fprintf(stderr, "  -t INT          number of threads (don't use multi-threading for small data sets) [%d]\n", opt.n_threads);
		fprintf(stderr, "  -d INT          retain a bubble if one side is longer than the other side by >INT-bp [%d]\n", opt.mag_opt.max_bdiff);
		fprintf(stderr, "  -A              discard heterozygotes (apply this to assemble bacterial genomes; override -O)\n");
		fprintf(stderr, "  -O              don't apply aggressive tip trimming\n");
		fprintf(stderr, "  -g              output the assembly graph in the GFA format\n");
		return 1;
	}
	seqs = bseq_read(argv[optind], &n_seqs);
	utg = fml_assemble(&opt, n_seqs, seqs, &n_utg);
	if (!gfa_out) fml_utg_print(n_utg, utg);
	else fml_utg_print_gfa(n_utg, utg);
	fml_utg_destroy(n_utg, utg);
	return 0;
}
Esempio n. 2
0
int main(int argc, char *argv[])
{
	fml_opt_t opt;
	int c, n_seqs, n_utg;
	bseq1_t *seqs;
	fml_utg_t *utg;

	fml_opt_init(&opt);
	while ((c = getopt(argc, argv, "Ae:l:r:t:c:")) >= 0) {
		if (c == 'e') opt.ec_k = atoi(optarg);
		else if (c == 'l') opt.min_asm_ovlp = atoi(optarg);
		else if (c == 'r') opt.mag_opt.min_dratio1 = atof(optarg);
		else if (c == 'A') opt.mag_opt.flag |= MAG_F_AGGRESSIVE;
		else if (c == 't') opt.n_threads = atoi(optarg);
		else if (c == 'c') {
			char *p;
			opt.min_cnt = strtol(optarg, &p, 10);
			if (*p == ',') opt.max_cnt = strtol(p + 1, &p, 10);
		}
	}
	if (argc == optind) {
		fprintf(stderr, "Usage: fml-asm [options] <in.fq>\n");
		fprintf(stderr, "Options:\n");
		fprintf(stderr, "  -e INT          k-mer length for error correction (0 for auto; -1 to disable) [%d]\n", opt.ec_k);
		fprintf(stderr, "  -c INT1[,INT2]  range of k-mer & read count thresholds for ec and graph cleaning [%d,%d]\n", opt.min_cnt, opt.max_cnt);
		fprintf(stderr, "  -l INT          min overlap length during initial assembly [%d]\n", opt.min_asm_ovlp);
		fprintf(stderr, "  -r FLOAT        drop an overlap if its length is below maxOvlpLen*FLOAT [%g]\n", opt.mag_opt.min_dratio1);
		fprintf(stderr, "  -t INT          number of threads (don't use multi-threading for small data sets) [%d]\n", opt.n_threads);
		fprintf(stderr, "  -A              discard heterozygotes (apply this to assemble bacterial genomes)\n");
		return 1;
	}
	seqs = bseq_read(argv[optind], &n_seqs);
	utg = fml_assemble(&opt, n_seqs, seqs, &n_utg);
	fml_utg_print(n_utg, utg);
	fml_utg_destroy(n_utg, utg);
	return 0;
}
Esempio n. 3
0
int main(int argc, char **argv)
{
	fml_opt_t options;
	bseq1_t *seqs = NULL;
	PONE_READ reads = NULL;
	size_t readCount = 0;
	ERR_VALUE ret = ERR_INTERNAL_ERROR;

	fml_opt_init(&options);
	options.n_threads = omp_get_num_procs();
	options.ec_k = 31;
	utils_allocator_init(options.n_threads);
	fprintf(stderr, "Loading reads from %s...\n", argv[1]);
	ret = input_get_reads(argv[1], "sam", &reads, &readCount);
	if (ret == ERR_SUCCESS) {
		fprintf(stderr, "Converting to fermi-lite format...\n");
		ret = utils_calloc(readCount, sizeof(bseq1_t), &seqs);
		if (ret == ERR_SUCCESS) {
			for (size_t i = 0; i < readCount; ++i) {
				memset(seqs + i, 0, sizeof(seqs[i]));
				seqs[i].l_seq = reads[i].ReadSequenceLen;
				read_quality_encode(reads + i);
				seqs[i].seq = _copy_string(reads[i].ReadSequence, reads[i].ReadSequenceLen);
				if (reads[i].Quality != NULL)
					seqs[i].qual = _copy_string(reads[i].Quality, reads[i].QualityLen);

				read_quality_decode(reads + i);
			}

			fml_opt_adjust(&options, readCount, seqs);
			fprintf(stderr, "Correcting...\n");
			fml_correct(&options, readCount, seqs);
			fprintf(stderr, "Fitting unique k-mers...\n");
			fml_fltuniq(&options, readCount, seqs);
			fprintf(stderr, "Converting back to our format...\n");
			for (size_t i = 0; i < readCount; ++i) {
				if (reads[i].ReadSequenceLen != seqs[i].l_seq) {
					utils_copy_string("*", &reads[i].CIGAR);
					reads[i].CIGARLen = 1;
				}

				reads[i].ReadSequenceLen = seqs[i].l_seq;
				reads[i].QualityLen = seqs[i].l_seq;
				ret = utils_copy_string(seqs[i].seq, &reads[i].ReadSequence);
				if (ret == ERR_SUCCESS)
					ret = utils_copy_string(seqs[i].qual, &reads[i].Quality);

				for (size_t j = 0; j < reads[i].ReadSequenceLen; ++j)
					reads[i].ReadSequence[j] = toupper(reads[i].ReadSequence[j]);

				if (reads[i].ReadSequenceLen > 0 && reads[i].QualityLen > 0)
					read_write_sam(stdout, reads + i);

				read_quality_decode(reads + i);
			}
				
			fprintf(stderr, "Freeing fermi-lite resources...\n");
			utils_free(seqs);
		}
		
		fprintf(stderr, "Freeing our reads...\n");
		read_set_destroy(reads, readCount);
	}

	return 0;
}