void seq_parse_pe(const char *path1, const char *path2, uint8_t ascii_fq_offset, read_t *r1, read_t *r2, void (*read_func)(read_t *_r1, read_t *_r2, uint8_t _qoffset1, uint8_t _qoffset2, void *_ptr), void *reader_ptr) { seq_file_t *sf1, *sf2; if((sf1 = seq_open(path1)) == NULL) die("Cannot open: %s", path1); if((sf2 = seq_open(path2)) == NULL) die("Cannot open: %s", path2); seq_parse_pe_sf(sf1, sf2, ascii_fq_offset, r1, r2, read_func, reader_ptr); seq_close(sf1); seq_close(sf2); }
int load_seqs(const char *path, char ***seqs_ptr, int *cap_ptr) { int cap = 1024; char **seqs = my_malloc(sizeof(char*) * cap,__FILE__,__LINE__); read_t read; seq_read_alloc(&read); seq_file_t *file = seq_open(path); if(file == NULL) die("Cannot open file: %s.", path); int num = 0; while(seq_read(file, &read)) { if(num == cap) { cap *= 2; seqs = realloc(seqs, sizeof(char*) * cap); } seqs[num++] = strdup(read.seq.b); } seq_read_dealloc(&read); seq_close(file); *seqs_ptr = seqs; *cap_ptr = cap; return num; }
int main (int argc, char** argv) { int pitch; float length; char buf[16]; seq_opened = 0; if (argc > 1) seq_client = atoi(argv[1]); seq_open(); while (fgets(buf, 16, stdin) != NULL) { if (sscanf (buf, "%d %f", &pitch, &length)) { /* a single short note */ if (length <= 0) { if ((pitch > 0) && (pitch == (pitch % 128))) { //seq_open(); play_note (pitch, 0.1); //seq_close(); } } else { //seq_open(); if ((pitch > 0) && (pitch == (pitch % 128))) play_note (pitch, length); else play_rest (length); } } } seq_close(); return 0; }
void filelist_dealloc(FileList *flist) { size_t i; for(i = 0; i < flist->num_files; i++) seq_close(flist->files[i]); seq_read_dealloc(&flist->read); free(flist->files); free(flist->fqoffsets); free(flist->errors); }
int main(int argc, char **argv) { if(argc != 2) exit(EXIT_FAILURE); seq_file_t *f = seq_open(argv[1]); read_t *r = seq_read_alloc(); if(f == NULL) exit(EXIT_FAILURE); while(seq_read(f,r) > 0) printf("%s\t[%lu,%lu,%lu]\n", r->name.b, r->name.end, r->seq.end, r->qual.end); seq_close(f); seq_read_destroy(r); return EXIT_SUCCESS; }
void seq_parse_se(const char *path, uint8_t ascii_fq_offset, read_t *r1, void (*read_func)(read_t *_r1, read_t *_r2, uint8_t _qoffset1, uint8_t _qoffset2, void *_ptr), void *reader_ptr) { seq_file_t *sf; if((sf = seq_open(path)) == NULL) die("Cannot open: %s", path); seq_parse_se_sf(sf, ascii_fq_offset, r1, read_func, reader_ptr); seq_close(sf); }
/** * Performs simple tests for abstract types implementations * * @return void */ static void test(void) { char a = 'b'; stack_ptr stack = NULL; seq_ptr seq = NULL; seq_prepare(&seq, "sequence.test"); seq_write(&seq, 'o'); seq_init(&seq); log_info("main", "Character written in sequence: %c", a = seq_read_first(&seq)); seq_close(&seq); stack_create(&stack); log_info("main", "Stack is: %s", stack_empty(&stack) ? "Empty" : "Not empty"); stack_push(&stack, a); log_info("main", "Stack is: %s", stack_empty(&stack) ? "Empty" : "Not empty"); log_info("main", "First character in stack: %c", stack_pop(&stack)); log_info("main", "Stack is: %s", stack_empty(&stack) ? "Empty" : "Not empty"); }
read_t* filelist_read(FileList *flist) { read_t *r = &flist->read; size_t i; // i is number of file changes for(i = 0; seq_read(flist->files[flist->curr], r) <= 0 && i <= flist->num_files; i++) { flist->curr++; if(flist->curr == flist->num_files) { flist->curr = flist->filesready = 0; } if(!flist->filesready) { char path[PATH_MAX+1]; assert(strlen(flist->files[flist->curr]->path) <= PATH_MAX); strcpy(path, flist->files[flist->curr]->path); seq_close(flist->files[flist->curr]); flist->files[flist->curr] = seq_open(path); } } if(i > flist->num_files) die("All seq files empty"); return r; }
/** * Solves the exrecise * * Exercise 10 from the 4th workbook * * @param seq Sequence pointer * @param stack Stack pointer * @return void */ static void solve_exercise(seq_ptr *seq, stack_ptr *stack) { char c; int aux = 0; if( ! seq_end(seq)) c = seq_read_first(seq); while( ! seq_end(seq) && ! stack_full(stack)) { if( ! isdigit(c)) { stack_push(stack, c); } else { aux = atoi(&c); while(aux > 0 && ! stack_empty(stack)) { stack_pop(stack); aux--; } } c = seq_read_next(seq); } seq_close(seq); }
// Load all reads from files into a read buffer and close the seq_files // Returns the number of reads loaded size_t seq_load_all_reads(seq_file_t **seq_files, size_t num_files, ReadBuffer *rbuf) { status("Loading sequences..."); size_t i, nreads = rbuf->len; read_t r; seq_read_alloc(&r); for(i = 0; i < num_files; i++) { status(" file: %s", seq_files[i]->path); while(seq_read_primary(seq_files[i], &r) > 0) { read_buf_push(rbuf, &r, 1); // copy read seq_read_alloc(&r); // allocate new read } seq_close(seq_files[i]); } seq_read_dealloc(&r); return rbuf->len - nreads; }
// If seq2 is NULL, read pair of entries from first file // Otherwise read an entry from each void align_from_file(const char *path1, const char *path2, void (align)(read_t *r1, read_t *r2), bool use_zlib) { seq_file_t *sf1, *sf2; if((sf1 = open_seq_file(path1, use_zlib)) == NULL) { fprintf(stderr, "Alignment Error: couldn't open file %s\n", path1); fflush(stderr); return; } if(path2 == NULL) { sf2 = sf1; } else if((sf2 = open_seq_file(path2, use_zlib)) == NULL) { fprintf(stderr, "Alignment Error: couldn't open file %s\n", path1); fflush(stderr); return; } // fprintf(stderr, "File buffer %zu zlib: %i\n", sf1->in.size, seq_use_gzip(sf1)); read_t read1, read2; seq_read_alloc(&read1); seq_read_alloc(&read2); // Loop while we can read a sequence from the first file unsigned long alignments; for(alignments = 0; seq_read(sf1, &read1) > 0; alignments++) { if(seq_read(sf2, &read2) <= 0) { fprintf(stderr, "Alignment Error: Odd number of sequences - " "I read in pairs!\n"); fflush(stderr); break; } (align)(&read1, &read2); } // warn if no bases read if(alignments == 0) { fprintf(stderr, "Alignment Warning: empty input\n"); fflush(stderr); } // Close files seq_close(sf1); if(path2 != NULL) seq_close(sf2); // Free memory seq_read_dealloc(&read1); seq_read_dealloc(&read2); }
int ctx_contigs(int argc, char **argv) { size_t nthreads = 0; struct MemArgs memargs = MEM_ARGS_INIT; const char *out_path = NULL; size_t i, contig_limit = 0, colour = 0; bool cmd_reseed = false, cmd_no_reseed = false; // -r, -R const char *conf_table_path = NULL; // save confidence table to here bool use_missing_info_check = true, seed_with_unused_paths = false; double min_step_confid = -1.0, min_cumul_confid = -1.0; // < 0 => no min // Read length and expected depth for calculating confidences size_t genome_size = 0; seq_file_t *tmp_seed_file = NULL; SeqFilePtrBuffer seed_buf; seq_file_ptr_buf_alloc(&seed_buf, 16); GPathReader tmp_gpfile; GPathFileBuffer gpfiles; gpfile_buf_alloc(&gpfiles, 8); // Arg parsing char cmd[100], shortopts[300]; cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts)); int c; // silence error messages from getopt_long // opterr = 0; while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) { cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd)); switch(c) { case 0: /* flag set */ break; case 'h': cmd_print_usage(NULL); break; case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break; case 'o': cmd_check(!out_path,cmd); out_path = optarg; break; case 't': cmd_check(!nthreads,cmd); nthreads = cmd_uint32_nonzero(cmd, optarg); break; case 'm': cmd_mem_args_set_memory(&memargs, optarg); break; case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break; case 'p': memset(&tmp_gpfile, 0, sizeof(GPathReader)); gpath_reader_open(&tmp_gpfile, optarg); gpfile_buf_push(&gpfiles, &tmp_gpfile, 1); break; case '1': case 's': // --seed <in.fa> if((tmp_seed_file = seq_open(optarg)) == NULL) die("Cannot read --seed file: %s", optarg); seq_file_ptr_buf_add(&seed_buf, tmp_seed_file); break; case 'r': cmd_check(!cmd_reseed,cmd); cmd_reseed = true; break; case 'R': cmd_check(!cmd_no_reseed,cmd); cmd_no_reseed = true; break; case 'N': cmd_check(!contig_limit,cmd); contig_limit = cmd_uint32_nonzero(cmd, optarg); break; case 'c': cmd_check(!colour,cmd); colour = cmd_uint32(cmd, optarg); break; case 'G': cmd_check(!genome_size,cmd); genome_size = cmd_bases(cmd, optarg); break; case 'S': cmd_check(!conf_table_path,cmd); conf_table_path = optarg; break; case 'M': cmd_check(use_missing_info_check,cmd); use_missing_info_check = false; break; case 'P': cmd_check(!seed_with_unused_paths,cmd); seed_with_unused_paths = true; break; case 'C': cmd_check(min_cumul_confid < 0,cmd); min_cumul_confid = cmd_udouble(cmd,optarg); if(min_cumul_confid > 1) die("%s must be 0 <= x <= 1", cmd); break; case 'T': cmd_check(min_step_confid < 0,cmd); min_step_confid = cmd_udouble(cmd,optarg); if(min_step_confid > 1) die("%s must be 0 <= x <= 1", cmd); break; case ':': /* BADARG */ case '?': /* BADCH getopt_long has already printed error */ die("`"CMD" contigs -h` for help. Bad option: %s", argv[optind-1]); default: abort(); } } if(cmd_no_reseed && cmd_reseed) cmd_print_usage("Cannot specify both -r and -R"); if(contig_limit && seed_with_unused_paths) cmd_print_usage("Cannot combine --ncontigs with --use-seed-paths"); bool sample_with_replacement = cmd_reseed; // Defaults if(nthreads == 0) nthreads = DEFAULT_NTHREADS; if(!seed_buf.len && !contig_limit && sample_with_replacement) { cmd_print_usage("Please specify one or more of: " "--no-reseed | --ncontigs | --seed <in.fa>"); } if(optind >= argc) cmd_print_usage("Require input graph files (.ctx)"); // // Open graph files // const size_t num_gfiles = argc - optind; char **graph_paths = argv + optind; ctx_assert(num_gfiles > 0); GraphFileReader *gfiles = ctx_calloc(num_gfiles, sizeof(GraphFileReader)); size_t ncols, ctx_max_kmers = 0, ctx_sum_kmers = 0; graph_files_open(graph_paths, gfiles, num_gfiles, &ctx_max_kmers, &ctx_sum_kmers); // char *ctx_path = argv[optind]; // // Open Graph file // // GraphFileReader gfile; // memset(&gfile, 0, sizeof(GraphFileReader)); // graph_file_open(&gfile, ctx_path); // Update colours in graph file - sample in 0, all others in 1 // never need more than two colours ncols = gpath_load_sample_pop(gfiles, num_gfiles, gpfiles.b, gpfiles.len, colour); // Check for compatibility between graph files and path files // pop_colour is colour 1 graphs_gpaths_compatible(gfiles, num_gfiles, gpfiles.b, gpfiles.len, 1); if(!genome_size) { char nk_str[50]; if(ctx_max_kmers <= 0) die("Please pass --genome <G> if streaming"); genome_size = ctx_max_kmers; ulong_to_str(genome_size, nk_str); status("Taking number of kmers as genome size: %s", nk_str); } // // Decide on memory // size_t bits_per_kmer, kmers_in_hash, graph_mem, path_mem, total_mem; // 1 bit needed per kmer if we need to keep track of kmer usage bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(Edges)*8 + sizeof(GPath*)*8 + ncols + !sample_with_replacement; kmers_in_hash = cmd_get_kmers_in_hash(memargs.mem_to_use, memargs.mem_to_use_set, memargs.num_kmers, memargs.num_kmers_set, bits_per_kmer, ctx_max_kmers, ctx_sum_kmers, false, &graph_mem); // Paths memory size_t rem_mem = memargs.mem_to_use - MIN2(memargs.mem_to_use, graph_mem); path_mem = gpath_reader_mem_req(gpfiles.b, gpfiles.len, ncols, rem_mem, false); // Shift path store memory from graphs->paths graph_mem -= sizeof(GPath*)*kmers_in_hash; path_mem += sizeof(GPath*)*kmers_in_hash; cmd_print_mem(path_mem, "paths"); // Total memory total_mem = graph_mem + path_mem; cmd_check_mem_limit(memargs.mem_to_use, total_mem); // Load contig hist distribution from ctp files ZeroSizeBuffer contig_hist; memset(&contig_hist, 0, sizeof(contig_hist)); for(i = 0; i < gpfiles.len; i++) { gpath_reader_load_contig_hist(gpfiles.b[i].json, gpfiles.b[i].fltr.path.b, file_filter_fromcol(&gpfiles.b[i].fltr, 0), &contig_hist); } // Calculate confidences, only for one colour ContigConfidenceTable conf_table; conf_table_alloc(&conf_table, 1); conf_table_update_hist(&conf_table, 0, genome_size, contig_hist.b, contig_hist.len); if(conf_table_path != NULL) { conf_table_save(&conf_table, conf_table_path); } zsize_buf_dealloc(&contig_hist); // // Output file if printing // FILE *fout = out_path ? futil_fopen_create(out_path, "w") : NULL; // Allocate dBGraph db_graph; db_graph_alloc(&db_graph, gfiles[0].hdr.kmer_size, ncols, 1, kmers_in_hash, DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL); // Paths gpath_reader_alloc_gpstore(gpfiles.b, gpfiles.len, path_mem, false, &db_graph); uint8_t *visited = NULL; if(!sample_with_replacement) visited = ctx_calloc(roundup_bits2bytes(db_graph.ht.capacity), 1); // Load graph LoadingStats stats = LOAD_STATS_INIT_MACRO; GraphLoadingPrefs gprefs = {.db_graph = &db_graph, .boolean_covgs = false, .must_exist_in_graph = false, .empty_colours = true}; for(i = 0; i < num_gfiles; i++) { graph_load(&gfiles[i], gprefs, &stats); graph_file_close(&gfiles[i]); gprefs.empty_colours = false; } ctx_free(gfiles); hash_table_print_stats(&db_graph.ht); // Load path files for(i = 0; i < gpfiles.len; i++) { gpath_reader_load(&gpfiles.b[i], GPATH_DIE_MISSING_KMERS, &db_graph); gpath_reader_close(&gpfiles.b[i]); } gpfile_buf_dealloc(&gpfiles); AssembleContigStats assem_stats; assemble_contigs_stats_init(&assem_stats); assemble_contigs(nthreads, seed_buf.b, seed_buf.len, contig_limit, visited, use_missing_info_check, seed_with_unused_paths, min_step_confid, min_cumul_confid, fout, out_path, &assem_stats, &conf_table, &db_graph, 0); // Sample always loaded into colour zero if(fout && fout != stdout) fclose(fout); assemble_contigs_stats_print(&assem_stats); assemble_contigs_stats_destroy(&assem_stats); conf_table_dealloc(&conf_table); for(i = 0; i < seed_buf.len; i++) seq_close(seed_buf.b[i]); seq_file_ptr_buf_dealloc(&seed_buf); ctx_free(visited); db_graph_dealloc(&db_graph); return EXIT_SUCCESS; }
int main(int argc, char **argv) { SEQ *sf; uchar *s; FILE *f; char chr_name[100], info[1000], dir; int i = 0, j = 0, k = 0, B = 0, E = 0; int max_len = 0; char *cur_seq; int seq_len = 0; bool is_correct_splicing = false; int num_genes = 0; int num_exons = 0; int num = 0; struct g_list *genes; struct exons_list *exons; bool no_branchpoint = false; if( argc == 5 ) { } else if( argc == 4 ) { if( strcmp( argv[3], "NO_BRANCHPOINT") == 0 ) { no_branchpoint = true; } else { fatalf("args: fasta gff (NO_BRANCHPOINT)"); } } else if (argc != 3) fatalf("args: fasta gff (NO_BRANCHPOINT)"); if((f = ckopen(argv[2], "r")) == NULL ) { fatalf("Cannot open file %s\n", argv[1]); } else { num_genes = count_genes_in_gff(f, &num_exons); if( num_genes > 0 ) { genes = (struct g_list *) ckalloc(num_genes * sizeof(struct g_list)); if( num_exons < num_genes ) num_exons = num_genes; exons = (struct exons_list *) ckalloc(num_exons * sizeof(struct exons_list)); initialize_genes(genes, num_genes); initialize_exons(exons, num_exons); } } fseek(f, 0, SEEK_SET); branchpoints = (char **) ckalloc(sizeof(char *) * NUM_BP_SEQ); for( i = 0; i < NUM_BP_SEQ; i++ ) { branchpoints[i] = (char *) ckalloc(sizeof(char) * 8); } strcpy(branchpoints[0], "AACTAAC"); strcpy(branchpoints[1], "AATTAAC"); strcpy(branchpoints[2], "CACTAAC"); strcpy(branchpoints[3], "GACTAAC"); strcpy(branchpoints[4], "TACTAAC"); strcpy(branchpoints[5], "TACTAAT"); strcpy(branchpoints[6], "TATTAAC"); strcpy(branchpoints[7], "TGCTAAC"); strcpy(branchpoints[8], "GATTAAC"); num = input_genes_in_gff(f, genes, exons); if( num != num_genes ) { fatalf("gene counter error in %s\n", argv[1]); } if( num_genes > 0 ) { quick_sort_inc_genes(genes, 0, num_genes-1, POS_BASE); } i = 0; while( i < num_genes ) { j = 0; while( ((i+j) < num_genes) && (genes[i].txStart == genes[i+j].txStart )) j++; quick_sort_dec_genes(genes, i, i+j-1, LEN_BASE); i = i+j; } fclose(f); compl['a'] = compl['A'] = 'T'; compl['c'] = compl['C'] = 'G'; compl['g'] = compl['G'] = 'C'; compl['t'] = compl['T'] = 'A'; sf = seq_get(argv[1]); s = SEQ_CHARS(sf) - 1; seq_len = SEQ_LEN(sf); for( i = 0; i < num_genes; i++ ) { B = genes[i].txStart; E = genes[i].txEnd; if( E > seq_len ) { fatalf("gene boundary [%d,%d] over the sequence length %d\n", B, E, seq_len); } if( (E - B + 1) > max_len ) { max_len = E - B + 1; } } cur_seq = (char *) ckalloc(sizeof(char) * (max_len+1)); for( i = 0; i < num_genes; i++ ) { if( genes[i].exonCount >= 2 ) { strcpy(chr_name, genes[i].sname); B = genes[i].txStart; E = genes[i].txEnd; dir = genes[i].strand; strcpy(info, genes[i].gname); k = 0; if( dir == '+' ) { for (j = B; j <= E; j++) { cur_seq[k] = s[j]; k++; } cur_seq[k] = '\0'; } else { k = 0; for (j = E; j >= B; j--) { cur_seq[k] = compl[s[j]]; k++; } cur_seq[k] = '\0'; } is_correct_splicing = true; is_correct_splicing = check_introns(genes, i, exons, cur_seq, k); if( is_correct_splicing == false ) { if( no_branchpoint == false ) { genes[i].type = REDUN; } } else { if( no_branchpoint == true ) { genes[i].type = REDUN; } } } } num_genes = rm_redun_genes(genes, 0, num_genes-1); write_in_gff(genes, num_genes, exons, num_exons); free(cur_seq); for( i = 0; i < NUM_BP_SEQ; i++ ) free(branchpoints[i]); free(branchpoints); seq_close(sf); return EXIT_SUCCESS; }
int main(int argc, char **argv) { if(argc < 3) print_usage(usage, NULL); // Sample reads from ref char *refpath = NULL; // int optt = 0, tlen = 800; double tlen_stddev = 0.1; int insert = 250, rlen = 250, single_ended = 0; double depth = 1.0, insert_stddev_prop = 0.2; // stddev as proportion of insert int optr = 0, opti = 0, optv = 0, optl = 0, optd = 0; // keeps track of values uint64_t seed = generate_seed(); // default RNG seed char *in0path = NULL, *in1path = NULL; char *profile_paths[argc]; size_t num_profile_paths = 0, i, total_seq = 0; float err_rate = -1; int c; while((c = getopt(argc, argv, "p:r:i:v:l:d:s1:2:e:g:")) >= 0) { switch (c) { case 'p': profile_paths[num_profile_paths++] = optarg; break; case 'r': refpath = optarg; optr++; break; // case 't': tlen = atoi(optarg); optt++; break; // case 'v': tlen_stddev = atof(optarg); optv++; break; case 'i': insert = atoi(optarg); opti++; break; case 'v': insert_stddev_prop = atof(optarg); optv++; break; case 'l': rlen = atoi(optarg); optl++; break; case 'd': depth = atof(optarg); optd++; break; case 's': single_ended = 1; break; case '1': in0path = optarg; break; case '2': in1path = optarg; break; case 'e': err_rate = atof(optarg); break; case 'g': seed = atoi(optarg); break; default: die("Unknown option: %c", c); } } // Set up seed_random(seed); init_qual_prob(); char *outbase = NULL; if(optind == argc) {}//print_usage(usage, "Missing <out_base>"); else if(optind + 1 == argc) outbase = argv[optind]; else if(optind + 1 < argc) print_usage(usage, "Too many args after %s", outbase); if(depth <= 0) print_usage(usage, "Depth [-d] cannot be <= 0"); if(insert_stddev_prop < 0) print_usage(usage, "Insert length standard deviation [-v] cannot be < 0"); if((opti > 0 || optv > 0 || optl > 0 || optd > 0) && refpath == NULL) print_usage(usage, "Missing -r <in.fa>"); if(optr > 1 || opti > 1 || optv > 1 || optl > 1 || optd > 1) print_usage(usage, "Duplicate args"); if(in0path == NULL && in1path != NULL) print_usage(usage, "-2 <in> requires -1 <in>"); if(in0path != NULL && in1path == NULL) { if(refpath == NULL) single_ended = 1; else if(!single_ended) print_usage(usage, "Missing -2 for paired-end output"); } if(in0path != NULL && num_profile_paths == 0) print_usage(usage, "Need at least one -p <profile.fq.gz> to use -1 .. -2 .."); if(num_profile_paths == 0 && refpath == NULL) print_usage(usage, "Need one of -p or -r"); if(num_profile_paths == 0 && outbase == NULL) print_usage(usage, "More options required"); if(num_profile_paths > 0 && err_rate >= 0) print_usage(usage, "Cannot use both -p and -E"); // Profile reads FileList fliststore, *flist = NULL; if(num_profile_paths > 0) { flist = &fliststore; filelist_alloc(flist, profile_paths, num_profile_paths); } if(outbase == NULL) { // Summarise error profile in input filelist_mean_err(flist); } else { size_t outlen = strlen(outbase), extlen = strlen(".1.fa.gz"); char out0path[outlen+extlen+1], out1path[outlen+extlen+1]; memcpy(out0path, outbase, outlen); memcpy(out1path, outbase, outlen); if(single_ended) strcpy(out0path+outlen, ".fa.gz"); else { strcpy(out0path+outlen, ".1.fa.gz"); strcpy(out1path+outlen, ".2.fa.gz"); } gzFile gzout0 = NULL, gzout1 = NULL; seq_file_t *sf0 = NULL, *sf1 = NULL, *reffile = NULL; if(in0path != NULL && (sf0 = seq_open(in0path)) == NULL) die("Cannot read: %s", in0path); if(in1path != NULL && (sf1 = seq_open(in1path)) == NULL) die("Cannot read: %s", in1path); if(refpath != NULL) { if((reffile = seq_open(refpath)) == NULL) die("Cannot read: %s", refpath); if((gzout0 = gzopen(out0path, "w")) == NULL) die("Cannot open: %s", out0path); if(!single_ended && (gzout1 = gzopen(out1path, "w")) == NULL) die("Cannot open: %s", out1path); } if(sf0 != NULL) { printf("Adding error to input reads...\n"); total_seq += mutate_reads(sf0, gzout0, flist, err_rate); seq_close(sf0); } if(sf1 != NULL) { total_seq += mutate_reads(sf1, single_ended ? gzout0 : gzout1, flist, err_rate); seq_close(sf1); } if(refpath != NULL) { printf("Sampling from %s\n", refpath); printf(" sequencing depth: %.2f\n", depth); printf(" read length: %i\n", rlen); printf(" read pairs: %s\n", single_ended ? "no" : "yes"); if(!single_ended) { printf(" insert length: %i\n", insert); printf(" insert stddev: %.2f * insert = %.2f\n", insert_stddev_prop, insert_stddev_prop*insert); } if(num_profile_paths > 0) { printf(" seq error files: %s", flist->files[0]->path); for(i = 1; i < num_profile_paths; i++) printf(",%s", flist->files[i]->path); printf("\n"); } else if(err_rate >= 0) { printf(" seq error rate: %.2f%%\n", err_rate * 100.0); } else { printf(" sequencing errors: no\n"); } total_seq += sim_reads(reffile, gzout0, gzout1, flist, err_rate, insert, insert_stddev_prop*insert, rlen, depth); seq_close(reffile); } if(gzout0 != NULL && gzout1 != NULL) printf("Wrote %zu bases to: %s and %s\n", total_seq, out0path, out1path); else if(gzout0 != NULL) printf("Wrote %zu bases to: %s\n", total_seq, out0path); if(gzout0 != NULL) gzclose(gzout0); if(gzout1 != NULL) gzclose(gzout1); } if(flist != NULL) { // Print error distribution size_t err_total = 0; for(i = 0; i < flist->errors_len; i++) err_total += flist->errors[i]; printf("Errors: %zu / %zu (%.2f%%)\n", err_total, total_seq, (100.0*err_total) / total_seq); for(i = 0; i < flist->errors_len; i++) printf(" %zu", flist->errors[i]); printf("\n"); filelist_dealloc(flist); } return EXIT_SUCCESS; }