Beispiel #1
0
void seq_parse_pe(const char *path1, const char *path2, uint8_t ascii_fq_offset,
                  read_t *r1, read_t *r2,
                  void (*read_func)(read_t *_r1, read_t *_r2,
                                    uint8_t _qoffset1, uint8_t _qoffset2,
                                    void *_ptr),
                  void *reader_ptr)
{
  seq_file_t *sf1, *sf2;
  if((sf1 = seq_open(path1)) == NULL) die("Cannot open: %s", path1);
  if((sf2 = seq_open(path2)) == NULL) die("Cannot open: %s", path2);
  seq_parse_pe_sf(sf1, sf2, ascii_fq_offset, r1, r2, read_func, reader_ptr);
  seq_close(sf1);
  seq_close(sf2);
}
Beispiel #2
0
int load_seqs(const char *path, char ***seqs_ptr, int *cap_ptr)
{
  int cap = 1024;
  char **seqs = my_malloc(sizeof(char*) * cap,__FILE__,__LINE__);

  read_t read;
  seq_read_alloc(&read);

  seq_file_t *file = seq_open(path);
  if(file == NULL) die("Cannot open file: %s.", path);
  int num = 0;

  while(seq_read(file, &read))
  {
    if(num == cap) {
      cap *= 2;
      seqs = realloc(seqs, sizeof(char*) * cap);
    }
    seqs[num++] = strdup(read.seq.b);
  }

  seq_read_dealloc(&read);
  seq_close(file);

  *seqs_ptr = seqs;
  *cap_ptr = cap;

  return num;
}
Beispiel #3
0
int main (int argc, char** argv) {
  int pitch;
  float length;
  char buf[16];
  seq_opened = 0;
  if (argc > 1) seq_client = atoi(argv[1]);
  seq_open();
  while (fgets(buf, 16, stdin) != NULL) {
    if (sscanf (buf, "%d %f", &pitch, &length)) {
      /* a single short note */
      if (length <= 0) {
        if ((pitch > 0) && (pitch == (pitch % 128))) {
          //seq_open();
          play_note (pitch, 0.1);
          //seq_close();
        }
      }
      else {
        //seq_open();
        if ((pitch > 0) && (pitch == (pitch % 128)))
          play_note (pitch, length);
        else
          play_rest (length);
      }
    }
  }
  seq_close();
  return 0;
}
Beispiel #4
0
void filelist_dealloc(FileList *flist)
{
  size_t i;
  for(i = 0; i < flist->num_files; i++) seq_close(flist->files[i]);
  seq_read_dealloc(&flist->read);
  free(flist->files);
  free(flist->fqoffsets);
  free(flist->errors);
}
Beispiel #5
0
int main(int argc, char **argv)
{
  if(argc != 2) exit(EXIT_FAILURE);
  seq_file_t *f = seq_open(argv[1]);
  read_t *r = seq_read_alloc();
  if(f == NULL) exit(EXIT_FAILURE);
  while(seq_read(f,r) > 0)
    printf("%s\t[%lu,%lu,%lu]\n", r->name.b, r->name.end, r->seq.end, r->qual.end);
  seq_close(f);
  seq_read_destroy(r);
  return EXIT_SUCCESS;
}
Beispiel #6
0
void seq_parse_se(const char *path, uint8_t ascii_fq_offset,
                  read_t *r1,
                  void (*read_func)(read_t *_r1, read_t *_r2,
                                    uint8_t _qoffset1, uint8_t _qoffset2,
                                    void *_ptr),
                  void *reader_ptr)
{
  seq_file_t *sf;
  if((sf = seq_open(path)) == NULL) die("Cannot open: %s", path);
  seq_parse_se_sf(sf, ascii_fq_offset, r1, read_func, reader_ptr);
  seq_close(sf);
}
/**
 * Performs simple tests for abstract types implementations
 *
 * @return void
 */
static void test(void)
{
  char a = 'b';
  stack_ptr stack = NULL;
  seq_ptr seq = NULL;

  seq_prepare(&seq, "sequence.test");
  seq_write(&seq, 'o');
  seq_init(&seq);
  log_info("main", "Character written in sequence: %c", a = seq_read_first(&seq));
  seq_close(&seq);

  stack_create(&stack);
  log_info("main", "Stack is: %s", stack_empty(&stack) ? "Empty" : "Not empty");
  stack_push(&stack, a);
  log_info("main", "Stack is: %s", stack_empty(&stack) ? "Empty" : "Not empty");
  log_info("main", "First character in stack: %c", stack_pop(&stack));
  log_info("main", "Stack is: %s", stack_empty(&stack) ? "Empty" : "Not empty");
}
Beispiel #8
0
read_t* filelist_read(FileList *flist)
{
  read_t *r = &flist->read;
  size_t i; // i is number of file changes
  for(i = 0; seq_read(flist->files[flist->curr], r) <= 0 && i <= flist->num_files; i++)
  {
    flist->curr++;
    if(flist->curr == flist->num_files) { flist->curr = flist->filesready = 0; }
    if(!flist->filesready) {
      char path[PATH_MAX+1];
      assert(strlen(flist->files[flist->curr]->path) <= PATH_MAX);
      strcpy(path, flist->files[flist->curr]->path);
      seq_close(flist->files[flist->curr]);
      flist->files[flist->curr] = seq_open(path);
    }
  }
  if(i > flist->num_files) die("All seq files empty");
  return r;
}
/**
 * Solves the exrecise
 *
 * Exercise 10 from the 4th workbook
 * 
 * @param seq Sequence pointer
 * @param stack Stack pointer
 * @return void
 */
static void solve_exercise(seq_ptr *seq, stack_ptr *stack)
{
  char c; int aux = 0;
  if( ! seq_end(seq)) c = seq_read_first(seq);

  while( ! seq_end(seq) && ! stack_full(stack)) {
    if( ! isdigit(c)) {
      stack_push(stack, c);
    } else {
      aux = atoi(&c);
      while(aux > 0 && ! stack_empty(stack)) {
        stack_pop(stack);
        aux--;
      }
    }
    c = seq_read_next(seq);
  }

  seq_close(seq);
}
Beispiel #10
0
// Load all reads from files into a read buffer and close the seq_files
// Returns the number of reads loaded
size_t seq_load_all_reads(seq_file_t **seq_files, size_t num_files,
                          ReadBuffer *rbuf)
{
  status("Loading sequences...");

  size_t i, nreads = rbuf->len;
  read_t r;
  seq_read_alloc(&r);
  for(i = 0; i < num_files; i++) {
    status("  file: %s", seq_files[i]->path);
    while(seq_read_primary(seq_files[i], &r) > 0) {
      read_buf_push(rbuf, &r, 1); // copy read
      seq_read_alloc(&r); // allocate new read
    }
    seq_close(seq_files[i]);
  }
  seq_read_dealloc(&r);

  return rbuf->len - nreads;
}
Beispiel #11
0
// If seq2 is NULL, read pair of entries from first file
// Otherwise read an entry from each
void align_from_file(const char *path1, const char *path2,
                     void (align)(read_t *r1, read_t *r2),
                     bool use_zlib)
{
  seq_file_t *sf1, *sf2;

  if((sf1 = open_seq_file(path1, use_zlib)) == NULL)
  {
    fprintf(stderr, "Alignment Error: couldn't open file %s\n", path1);
    fflush(stderr);
    return;
  }

  if(path2 == NULL)
  {
    sf2 = sf1;
  }
  else if((sf2 = open_seq_file(path2, use_zlib)) == NULL)
  {
    fprintf(stderr, "Alignment Error: couldn't open file %s\n", path1);
    fflush(stderr);
    return;
  }

  // fprintf(stderr, "File buffer %zu zlib: %i\n", sf1->in.size, seq_use_gzip(sf1));

  read_t read1, read2;
  seq_read_alloc(&read1);
  seq_read_alloc(&read2);

  // Loop while we can read a sequence from the first file
  unsigned long alignments;

  for(alignments = 0; seq_read(sf1, &read1) > 0; alignments++)
  {
    if(seq_read(sf2, &read2) <= 0)
    {
      fprintf(stderr, "Alignment Error: Odd number of sequences - "
                      "I read in pairs!\n");
      fflush(stderr);
      break;
    }

    (align)(&read1, &read2);
  }

  // warn if no bases read
  if(alignments == 0)
  {
    fprintf(stderr, "Alignment Warning: empty input\n");
    fflush(stderr);
  }

  // Close files
  seq_close(sf1);

  if(path2 != NULL)
    seq_close(sf2);

  // Free memory
  seq_read_dealloc(&read1);
  seq_read_dealloc(&read2);
}
Beispiel #12
0
int ctx_contigs(int argc, char **argv)
{
  size_t nthreads = 0;
  struct MemArgs memargs = MEM_ARGS_INIT;
  const char *out_path = NULL;
  size_t i, contig_limit = 0, colour = 0;
  bool cmd_reseed = false, cmd_no_reseed = false; // -r, -R
  const char *conf_table_path = NULL; // save confidence table to here
  bool use_missing_info_check = true, seed_with_unused_paths = false;
  double min_step_confid = -1.0, min_cumul_confid = -1.0; // < 0 => no min

  // Read length and expected depth for calculating confidences
  size_t genome_size = 0;

  seq_file_t *tmp_seed_file = NULL;
  SeqFilePtrBuffer seed_buf;
  seq_file_ptr_buf_alloc(&seed_buf, 16);

  GPathReader tmp_gpfile;
  GPathFileBuffer gpfiles;
  gpfile_buf_alloc(&gpfiles, 8);

  // Arg parsing
  char cmd[100], shortopts[300];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int c;

  // silence error messages from getopt_long
  // opterr = 0;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 'o': cmd_check(!out_path,cmd); out_path = optarg; break;
      case 't': cmd_check(!nthreads,cmd); nthreads = cmd_uint32_nonzero(cmd, optarg); break;
      case 'm': cmd_mem_args_set_memory(&memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break;
      case 'p':
        memset(&tmp_gpfile, 0, sizeof(GPathReader));
        gpath_reader_open(&tmp_gpfile, optarg);
        gpfile_buf_push(&gpfiles, &tmp_gpfile, 1);
        break;
      case '1':
      case 's': // --seed <in.fa>
        if((tmp_seed_file = seq_open(optarg)) == NULL)
          die("Cannot read --seed file: %s", optarg);
        seq_file_ptr_buf_add(&seed_buf, tmp_seed_file);
        break;
      case 'r': cmd_check(!cmd_reseed,cmd); cmd_reseed = true; break;
      case 'R': cmd_check(!cmd_no_reseed,cmd); cmd_no_reseed = true; break;
      case 'N':
        cmd_check(!contig_limit,cmd);
        contig_limit = cmd_uint32_nonzero(cmd, optarg);
        break;
      case 'c': cmd_check(!colour,cmd); colour = cmd_uint32(cmd, optarg); break;
      case 'G': cmd_check(!genome_size,cmd); genome_size = cmd_bases(cmd, optarg); break;
      case 'S': cmd_check(!conf_table_path,cmd); conf_table_path = optarg; break;
      case 'M': cmd_check(use_missing_info_check,cmd); use_missing_info_check = false; break;
      case 'P': cmd_check(!seed_with_unused_paths,cmd); seed_with_unused_paths = true; break;
      case 'C':
        cmd_check(min_cumul_confid < 0,cmd);
        min_cumul_confid = cmd_udouble(cmd,optarg);
        if(min_cumul_confid > 1) die("%s must be 0 <= x <= 1", cmd);
        break;
      case 'T':
        cmd_check(min_step_confid < 0,cmd);
        min_step_confid = cmd_udouble(cmd,optarg);
        if(min_step_confid > 1) die("%s must be 0 <= x <= 1", cmd);
        break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        die("`"CMD" contigs -h` for help. Bad option: %s", argv[optind-1]);
      default: abort();
    }
  }

  if(cmd_no_reseed && cmd_reseed)
    cmd_print_usage("Cannot specify both -r and -R");

  if(contig_limit && seed_with_unused_paths)
    cmd_print_usage("Cannot combine --ncontigs with --use-seed-paths");

  bool sample_with_replacement = cmd_reseed;

  // Defaults
  if(nthreads == 0) nthreads = DEFAULT_NTHREADS;

  if(!seed_buf.len && !contig_limit && sample_with_replacement) {
    cmd_print_usage("Please specify one or more of: "
                    "--no-reseed | --ncontigs | --seed <in.fa>");
  }

  if(optind >= argc) cmd_print_usage("Require input graph files (.ctx)");

  //
  // Open graph files
  //
  const size_t num_gfiles = argc - optind;
  char **graph_paths = argv + optind;
  ctx_assert(num_gfiles > 0);

  GraphFileReader *gfiles = ctx_calloc(num_gfiles, sizeof(GraphFileReader));
  size_t ncols, ctx_max_kmers = 0, ctx_sum_kmers = 0;

  graph_files_open(graph_paths, gfiles, num_gfiles,
                   &ctx_max_kmers, &ctx_sum_kmers);

  // char *ctx_path = argv[optind];

  //
  // Open Graph file
  //
  // GraphFileReader gfile;
  // memset(&gfile, 0, sizeof(GraphFileReader));
  // graph_file_open(&gfile, ctx_path);

  // Update colours in graph file - sample in 0, all others in 1
  // never need more than two colours
  ncols = gpath_load_sample_pop(gfiles, num_gfiles,
                                gpfiles.b, gpfiles.len, colour);

  // Check for compatibility between graph files and path files
  // pop_colour is colour 1
  graphs_gpaths_compatible(gfiles, num_gfiles, gpfiles.b, gpfiles.len, 1);

  if(!genome_size)
  {
    char nk_str[50];
    if(ctx_max_kmers <= 0) die("Please pass --genome <G> if streaming");
    genome_size = ctx_max_kmers;
    ulong_to_str(genome_size, nk_str);
    status("Taking number of kmers as genome size: %s", nk_str);
  }

  //
  // Decide on memory
  //
  size_t bits_per_kmer, kmers_in_hash, graph_mem, path_mem, total_mem;

  // 1 bit needed per kmer if we need to keep track of kmer usage
  bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(Edges)*8 + sizeof(GPath*)*8 +
                  ncols + !sample_with_replacement;

  kmers_in_hash = cmd_get_kmers_in_hash(memargs.mem_to_use,
                                        memargs.mem_to_use_set,
                                        memargs.num_kmers,
                                        memargs.num_kmers_set,
                                        bits_per_kmer,
                                        ctx_max_kmers, ctx_sum_kmers,
                                        false, &graph_mem);

  // Paths memory
  size_t rem_mem = memargs.mem_to_use - MIN2(memargs.mem_to_use, graph_mem);
  path_mem = gpath_reader_mem_req(gpfiles.b, gpfiles.len, ncols, rem_mem, false);

  // Shift path store memory from graphs->paths
  graph_mem -= sizeof(GPath*)*kmers_in_hash;
  path_mem  += sizeof(GPath*)*kmers_in_hash;
  cmd_print_mem(path_mem, "paths");

  // Total memory
  total_mem = graph_mem + path_mem;
  cmd_check_mem_limit(memargs.mem_to_use, total_mem);

  // Load contig hist distribution from ctp files
  ZeroSizeBuffer contig_hist;
  memset(&contig_hist, 0, sizeof(contig_hist));

  for(i = 0; i < gpfiles.len; i++) {
    gpath_reader_load_contig_hist(gpfiles.b[i].json,
                                  gpfiles.b[i].fltr.path.b,
                                  file_filter_fromcol(&gpfiles.b[i].fltr, 0),
                                  &contig_hist);
  }

  // Calculate confidences, only for one colour
  ContigConfidenceTable conf_table;
  conf_table_alloc(&conf_table, 1);
  conf_table_update_hist(&conf_table, 0, genome_size,
                         contig_hist.b, contig_hist.len);

  if(conf_table_path != NULL) {
    conf_table_save(&conf_table, conf_table_path);
  }

  zsize_buf_dealloc(&contig_hist);

  //
  // Output file if printing
  //
  FILE *fout = out_path ? futil_fopen_create(out_path, "w") : NULL;

  // Allocate
  dBGraph db_graph;
  db_graph_alloc(&db_graph, gfiles[0].hdr.kmer_size, ncols, 1, kmers_in_hash,
                 DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL);

  // Paths
  gpath_reader_alloc_gpstore(gpfiles.b, gpfiles.len, path_mem,
                             false, &db_graph);

  uint8_t *visited = NULL;

  if(!sample_with_replacement)
    visited = ctx_calloc(roundup_bits2bytes(db_graph.ht.capacity), 1);

  // Load graph
  LoadingStats stats = LOAD_STATS_INIT_MACRO;

  GraphLoadingPrefs gprefs = {.db_graph = &db_graph,
                              .boolean_covgs = false,
                              .must_exist_in_graph = false,
                              .empty_colours = true};

  for(i = 0; i < num_gfiles; i++) {
    graph_load(&gfiles[i], gprefs, &stats);
    graph_file_close(&gfiles[i]);
    gprefs.empty_colours = false;
  }
  ctx_free(gfiles);

  hash_table_print_stats(&db_graph.ht);

  // Load path files
  for(i = 0; i < gpfiles.len; i++) {
    gpath_reader_load(&gpfiles.b[i], GPATH_DIE_MISSING_KMERS, &db_graph);
    gpath_reader_close(&gpfiles.b[i]);
  }
  gpfile_buf_dealloc(&gpfiles);

  AssembleContigStats assem_stats;
  assemble_contigs_stats_init(&assem_stats);

  assemble_contigs(nthreads, seed_buf.b, seed_buf.len,
                   contig_limit, visited,
                   use_missing_info_check, seed_with_unused_paths,
                   min_step_confid, min_cumul_confid,
                   fout, out_path, &assem_stats, &conf_table,
                   &db_graph, 0); // Sample always loaded into colour zero

  if(fout && fout != stdout) fclose(fout);

  assemble_contigs_stats_print(&assem_stats);
  assemble_contigs_stats_destroy(&assem_stats);

  conf_table_dealloc(&conf_table);

  for(i = 0; i < seed_buf.len; i++)
    seq_close(seed_buf.b[i]);

  seq_file_ptr_buf_dealloc(&seed_buf);

  ctx_free(visited);
  db_graph_dealloc(&db_graph);

  return EXIT_SUCCESS;
}
Beispiel #13
0
int main(int argc, char **argv) {
	SEQ *sf;
	uchar *s;
	FILE *f;
	char chr_name[100], info[1000], dir;
	int i = 0, j = 0, k = 0, B = 0, E = 0;
	int max_len = 0;
	char *cur_seq;
	int seq_len = 0;
	bool is_correct_splicing = false;
	int num_genes = 0;
	int num_exons = 0;
	int num = 0;
	struct g_list *genes;
	struct exons_list *exons;
	bool no_branchpoint = false;

	if( argc == 5 ) {
		
	}	
	else if( argc == 4 ) {
		if( strcmp( argv[3], "NO_BRANCHPOINT") == 0 ) {
			no_branchpoint = true;
		}
		else {
			fatalf("args: fasta gff (NO_BRANCHPOINT)");
		}
	}
	else if (argc != 3)
		fatalf("args: fasta gff (NO_BRANCHPOINT)");

	if((f = ckopen(argv[2], "r")) == NULL )
	{
		fatalf("Cannot open file %s\n", argv[1]);
	}
	else {
		num_genes = count_genes_in_gff(f, &num_exons);
		if( num_genes > 0 ) {
			genes = (struct g_list *) ckalloc(num_genes * sizeof(struct g_list));	
			if( num_exons < num_genes ) num_exons = num_genes;
			exons = (struct exons_list *) ckalloc(num_exons * sizeof(struct exons_list));	
			initialize_genes(genes, num_genes);
			initialize_exons(exons, num_exons);
		}
	}
	fseek(f, 0, SEEK_SET);
	
	branchpoints = (char **) ckalloc(sizeof(char *) * NUM_BP_SEQ);
	for( i = 0; i < NUM_BP_SEQ; i++ ) {
		branchpoints[i] = (char *) ckalloc(sizeof(char) * 8);	
	}
	
	strcpy(branchpoints[0], "AACTAAC");
	strcpy(branchpoints[1], "AATTAAC");
	strcpy(branchpoints[2], "CACTAAC");
	strcpy(branchpoints[3], "GACTAAC");
	strcpy(branchpoints[4], "TACTAAC");
	strcpy(branchpoints[5], "TACTAAT");
	strcpy(branchpoints[6], "TATTAAC");
	strcpy(branchpoints[7], "TGCTAAC");
	strcpy(branchpoints[8], "GATTAAC");

	num = input_genes_in_gff(f, genes, exons);	
	if( num != num_genes ) {
		fatalf("gene counter error in %s\n", argv[1]);
	}

	if( num_genes > 0 ) {
		quick_sort_inc_genes(genes, 0, num_genes-1, POS_BASE);
	}

	i = 0;
	while( i < num_genes ) {
		j = 0;
    while( ((i+j) < num_genes) && (genes[i].txStart == genes[i+j].txStart )) j++;
    quick_sort_dec_genes(genes, i, i+j-1, LEN_BASE);
    i = i+j;
	}
	fclose(f);

	compl['a'] = compl['A'] = 'T';
	compl['c'] = compl['C'] = 'G';
	compl['g'] = compl['G'] = 'C';
	compl['t'] = compl['T'] = 'A';
	sf = seq_get(argv[1]);
	s = SEQ_CHARS(sf) - 1;
	seq_len = SEQ_LEN(sf);

	for( i = 0; i < num_genes; i++ ) {
		B = genes[i].txStart;
		E = genes[i].txEnd;
		if( E > seq_len ) {
			fatalf("gene boundary [%d,%d] over the sequence length %d\n", B, E, seq_len);
		}

		if( (E - B + 1) > max_len ) {
			max_len = E - B + 1;
		}
	}

	cur_seq = (char *) ckalloc(sizeof(char) * (max_len+1));
	for( i = 0; i < num_genes; i++ ) {
		if( genes[i].exonCount >= 2 ) {
			strcpy(chr_name, genes[i].sname);
			B = genes[i].txStart;
			E = genes[i].txEnd;
			dir = genes[i].strand;
			strcpy(info, genes[i].gname);	

			k = 0;
			if( dir == '+' ) {
				for (j = B; j <= E; j++) {
					cur_seq[k] = s[j];
					k++;
				}
				cur_seq[k] = '\0';
			}
			else {
				k = 0;
				for (j = E; j >= B; j--) {
					cur_seq[k] = compl[s[j]];
					k++;
				}
				cur_seq[k] = '\0';
			}

			is_correct_splicing = true;
			is_correct_splicing = check_introns(genes, i, exons, cur_seq, k);
			if( is_correct_splicing == false ) {
				if( no_branchpoint == false ) {
					genes[i].type = REDUN;
				}
			}
			else {
				if( no_branchpoint == true ) {
					genes[i].type = REDUN;
				}
			}
		}
	}

  num_genes = rm_redun_genes(genes, 0, num_genes-1);
  write_in_gff(genes, num_genes, exons, num_exons);
	
	free(cur_seq);
	for( i = 0; i < NUM_BP_SEQ; i++ ) free(branchpoints[i]);
	free(branchpoints);
	seq_close(sf);
	return EXIT_SUCCESS;
}
Beispiel #14
0
int main(int argc, char **argv)
{
  if(argc < 3) print_usage(usage, NULL);

  // Sample reads from ref
  char *refpath = NULL;
  // int optt = 0, tlen = 800; double tlen_stddev = 0.1;
  int insert = 250, rlen = 250, single_ended = 0;
  double depth = 1.0, insert_stddev_prop = 0.2; // stddev as proportion of insert
  int optr = 0, opti = 0, optv = 0, optl = 0, optd = 0; // keeps track of values
  uint64_t seed = generate_seed(); // default RNG seed

  char *in0path = NULL, *in1path = NULL;

  char *profile_paths[argc];
  size_t num_profile_paths = 0, i, total_seq = 0;
  float err_rate = -1;

  int c;
  while((c = getopt(argc, argv, "p:r:i:v:l:d:s1:2:e:g:")) >= 0) {
    switch (c) {
      case 'p': profile_paths[num_profile_paths++] = optarg; break;
      case 'r': refpath = optarg; optr++; break;
      // case 't': tlen = atoi(optarg); optt++; break;
      // case 'v': tlen_stddev = atof(optarg); optv++; break;
      case 'i': insert = atoi(optarg); opti++; break;
      case 'v': insert_stddev_prop = atof(optarg); optv++; break;
      case 'l': rlen = atoi(optarg); optl++; break;
      case 'd': depth = atof(optarg); optd++; break;
      case 's': single_ended = 1; break;
      case '1': in0path = optarg; break;
      case '2': in1path = optarg; break;
      case 'e': err_rate = atof(optarg); break;
      case 'g': seed = atoi(optarg); break;
      default: die("Unknown option: %c", c);
    }
  }

  // Set up
  seed_random(seed);
  init_qual_prob();

  char *outbase = NULL;

  if(optind == argc) {}//print_usage(usage, "Missing <out_base>");
  else if(optind + 1 == argc) outbase = argv[optind];
  else if(optind + 1 < argc) print_usage(usage, "Too many args after %s", outbase);

  if(depth <= 0) print_usage(usage, "Depth [-d] cannot be <= 0");

  if(insert_stddev_prop < 0)
    print_usage(usage, "Insert length standard deviation [-v] cannot be < 0");

  if((opti > 0 || optv > 0 || optl > 0 || optd > 0) && refpath == NULL)
    print_usage(usage, "Missing -r <in.fa>");

  if(optr > 1 || opti > 1 || optv > 1 || optl > 1 || optd > 1)
    print_usage(usage, "Duplicate args");

  if(in0path == NULL && in1path != NULL)
    print_usage(usage, "-2 <in> requires -1 <in>");

  if(in0path != NULL && in1path == NULL) {
    if(refpath == NULL) single_ended = 1;
    else if(!single_ended) print_usage(usage, "Missing -2 for paired-end output");
  }

  if(in0path != NULL && num_profile_paths == 0)
    print_usage(usage, "Need at least one -p <profile.fq.gz> to use -1 .. -2 ..");

  if(num_profile_paths == 0 && refpath == NULL)
    print_usage(usage, "Need one of -p or -r");

  if(num_profile_paths == 0 && outbase == NULL)
    print_usage(usage, "More options required");

  if(num_profile_paths > 0 && err_rate >= 0)
    print_usage(usage, "Cannot use both -p and -E");

  // Profile reads
  FileList fliststore, *flist = NULL;
  if(num_profile_paths > 0) {
    flist = &fliststore;
    filelist_alloc(flist, profile_paths, num_profile_paths);
  }

  if(outbase == NULL)
  {
    // Summarise error profile in input
    filelist_mean_err(flist);
  }
  else
  {
    size_t outlen = strlen(outbase), extlen = strlen(".1.fa.gz");
    char out0path[outlen+extlen+1], out1path[outlen+extlen+1];
    memcpy(out0path, outbase, outlen);
    memcpy(out1path, outbase, outlen);

    if(single_ended) strcpy(out0path+outlen, ".fa.gz");
    else {
      strcpy(out0path+outlen, ".1.fa.gz");
      strcpy(out1path+outlen, ".2.fa.gz");
    }

    gzFile gzout0 = NULL, gzout1 = NULL;
    seq_file_t *sf0 = NULL, *sf1 = NULL, *reffile = NULL;

    if(in0path != NULL && (sf0 = seq_open(in0path)) == NULL) die("Cannot read: %s", in0path);
    if(in1path != NULL && (sf1 = seq_open(in1path)) == NULL) die("Cannot read: %s", in1path);

    if(refpath != NULL)
    {
      if((reffile = seq_open(refpath)) == NULL) die("Cannot read: %s", refpath);
      if((gzout0 = gzopen(out0path, "w")) == NULL) die("Cannot open: %s", out0path);
      if(!single_ended && (gzout1 = gzopen(out1path, "w")) == NULL)
        die("Cannot open: %s", out1path);
    }

    if(sf0 != NULL) {
      printf("Adding error to input reads...\n");
      total_seq += mutate_reads(sf0, gzout0, flist, err_rate);
      seq_close(sf0);
    }
    if(sf1 != NULL) {
      total_seq += mutate_reads(sf1, single_ended ? gzout0 : gzout1, flist, err_rate);
      seq_close(sf1);
    }

    if(refpath != NULL)
    {
      printf("Sampling from %s\n", refpath);
      printf(" sequencing depth: %.2f\n", depth);
      printf(" read length: %i\n", rlen);
      printf(" read pairs: %s\n", single_ended ? "no" : "yes");
      if(!single_ended) {
        printf(" insert length: %i\n", insert);
        printf(" insert stddev: %.2f * insert = %.2f\n",
               insert_stddev_prop, insert_stddev_prop*insert);
      }
      if(num_profile_paths > 0) {
        printf(" seq error files: %s", flist->files[0]->path);
        for(i = 1; i < num_profile_paths; i++)
          printf(",%s", flist->files[i]->path);
        printf("\n");
      } else if(err_rate >= 0) {
        printf(" seq error rate: %.2f%%\n", err_rate * 100.0);
      } else {
        printf(" sequencing errors: no\n");
      }
      total_seq += sim_reads(reffile, gzout0, gzout1, flist, err_rate,
                             insert, insert_stddev_prop*insert, rlen, depth);
      seq_close(reffile);
    }

    if(gzout0 != NULL && gzout1 != NULL)
      printf("Wrote %zu bases to: %s and %s\n", total_seq, out0path, out1path);
    else if(gzout0 != NULL)
      printf("Wrote %zu bases to: %s\n", total_seq, out0path);

    if(gzout0 != NULL) gzclose(gzout0);
    if(gzout1 != NULL) gzclose(gzout1);
  }

  if(flist != NULL)
  {
    // Print error distribution
    size_t err_total = 0;
    for(i = 0; i < flist->errors_len; i++) err_total += flist->errors[i];
    printf("Errors: %zu / %zu (%.2f%%)\n", err_total, total_seq,
                                           (100.0*err_total) / total_seq);
    for(i = 0; i < flist->errors_len; i++) printf(" %zu", flist->errors[i]);
    printf("\n");

    filelist_dealloc(flist);
  }

  return EXIT_SUCCESS;
}