/** * DESCRIPTION: same as seq_filter_eq * except criteria is "greater than x" * instead of "equal to x" * RUNTIME REQ: O(n) */ extern Seq seq_filter_gt(Seq seq, int (*cmp)(ETYPE a, ETYPE b), ETYPE x){ Seq found = seq_create(); Seq notFound = seq_create(); Node* hopper = seq->head; while( hopper != NULL){ ///O(N) if( 0 < cmp( hopper->a, x) ) seq_add_back( found, hopper->a); ///O(1) else seq_add_back( notFound, hopper->a); ///O(1) hopper = hopper->fw; } //reflect the changes in seq from the notFound seq // frees the Nodes, not the "body" while( seq->head != NULL){ hopper = seq->head; //assign the placeholder seq->head = seq->head->fw; //move forward free( hopper); //delete the "head" (placeholder) } // asssign new values to the "body" seq->numN = notFound->numN; seq->head = notFound->head; seq->tail = notFound->tail; free(notFound); //frees the "body," not the Nodes return found; }
void fastq_grep(FILE* fin, FILE* fout, FILE* mismatch_file, pcre* re) { int rc; int ovector[3]; size_t count = 0; fastq_t* fqf = fastq_create(fin); seq_t* seq = seq_create(); while (fastq_read(fqf, seq)) { rc = pcre_exec(re, /* pattern */ NULL, /* extra data */ id_flag ? seq->id1.s : seq->seq.s, id_flag ? seq->id1.n : seq->seq.n, 0, /* subject offset */ 0, /* options */ ovector, /* output vector */ 3 ); /* output vector length */ if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || (!invert_flag && rc >= 0)) { if (count_flag) count++; else fastq_print_maybe_trim(fout, seq, ovector); } else if (mismatch_file) { fastq_print(mismatch_file, seq); } } seq_free(seq); fastq_free(fqf); if (count_flag) fprintf(fout, "%zu\n", count); }
static int handle_seq_fetch (seqhash_t *s, JSON in, JSON *outp) { const char *name; bool create = false; bool created = false; int64_t v, pre, post, *valp; if (!Jget_str (in, "name", &name) || !Jget_bool (in, "create", &create) || !Jget_int64 (in, "preincrement", &pre) || !Jget_int64 (in, "postincrement", &post)) { errno = EPROTO; return (-1); } if (seq_fetch_and_add (s, name, pre, post, &v) < 0) { if (!create || (errno != ENOENT)) return (-1); /* Create and initialize */ valp = seq_create (s, name); *valp += pre; v = *valp; *valp += post; created = true; } *outp = Jnew (); Jadd_str (*outp, "name", name); Jadd_int64 (*outp, "value", v); if (create && created) Jadd_bool (*outp, "created", true); return (0); }
void* pique_thread(void* arg) { pique_ctx_t* ctx = arg; seq_t* seq = seq_create(); twobit_t* tb = twobit_alloc(); rng_t* rng = rng_alloc(1234); bool r; while (true) { pthread_mutex_lock(ctx->f_mutex); if (ctx->fmt == INPUT_FMT_FASTA) r = fasta_read(ctx->f, seq); else if (ctx->fmt == INPUT_FMT_FASTQ) r = fastq_read(ctx->f, seq); pthread_mutex_unlock(ctx->f_mutex); if (!r) break; /* TODO: remove sequences with Ns? */ twobit_copy_str_n(tb, seq->seq.s, seq->seq.n); dbg_add_twobit_seq(ctx->G, rng, tb); } rng_free(rng); seq_free(seq); return NULL; }
LVAL xlc_seq_create(void) { seq_type result; xllastarg(); result = seq_create(); return cvseq(result); }
/* count the number of entries in a fastq file */ unsigned long count_entries(fastq_t* fqf) { seq_t* seq = seq_create(); unsigned long n = 0; while (fastq_read(fqf, seq)) ++n; seq_free(seq); return n; }
/** * DESCRIPTION: returns a deep copy of the given * sequence. (Note: there is no way for actual * elements to be cloned in this framework). * RUNTIME REQ: O(n) */ extern Seq seq_clone(Seq seq){ Seq cpy = seq_create(); Node *hopper = seq->head; while(hopper != NULL){ seq_add_back( cpy, hopper->a); //cpy the NODE with the value at that node hopper = hopper->fw; //move forward } return cpy; }
static void init_clock(struct clock *c, clockid_t id) { memset(c, 0, sizeof *c); c->id = id; ovs_mutex_init(&c->mutex); atomic_init(&c->slow_path, false); xclock_gettime(c->id, &c->cache); timewarp_seq = seq_create(); }
static void do_init_time(void) { struct timespec ts; coverage_init(); timewarp_seq = seq_create(); init_clock(&monotonic_clock, (!clock_gettime(CLOCK_MONOTONIC, &ts) ? CLOCK_MONOTONIC : CLOCK_REALTIME)); init_clock(&wall_clock, CLOCK_REALTIME); boot_time = timespec_to_msec(&monotonic_clock.cache); }
/* n-way merge sort to stdout */ void merge_sort(const seq_dumps_t* d, int (*cmp)(const void*, const void*)) { FILE** files = malloc_or_die(d->n * sizeof(FILE*)); size_t i; for (i = 0; i < d->n; ++i) { files[i] = fopen(d->fns[i], "rb"); if (files[i] == NULL) { fprintf(stderr, "Cannot open temporary file %s for reading.\n", d->fns[i]); exit(EXIT_FAILURE); } } fastq_t** fs = malloc_or_die(d->n * sizeof(fastq_t*)); seq_t** seqs = malloc_or_die(d->n * sizeof(seq_t*)); for (i = 0; i < d->n; ++i) { fs[i] = fastq_create(files[i]); seqs[i] = seq_create(); } /* A binary heap of indexes to fs. We use this to repeatedly pop the * smallest fastq entry. */ size_t* heap = malloc_or_die(d->n * sizeof(size_t)); /* heap size */ size_t m = 0; for (i = 0; i < d->n; ++i) { if (fastq_read(fs[i], seqs[i])) { heap_push(heap, d->n, &m, seqs, cmp, i); } } while (m > 0) { i = heap_pop(heap, &m, seqs, cmp); fastq_print(stdout, seqs[i]); if (fastq_read(fs[i], seqs[i])) { heap_push(heap, d->n, &m, seqs, cmp, i); } } for (i = 0; i < d->n; ++i) { seq_free(seqs[i]); fastq_free(fs[i]); fclose(files[i]); } free(files); free(fs); }
/** * DESCRIPTION: s1 becomes the result of concatenating * s1 and s2. * * postconditions: s1 and s2 become empty sequences. * * Notes: if the client passes the same sequence as * both s1 and s2 * * RUNTIME REQ: O(1) */ extern Seq seq_concat(Seq s1, Seq s2){ if( s1->head == s2->head) return NULL; Seq nseq = seq_create(); nseq->head = s1->head; nseq->tail = s2->head; s1->tail->fw = s2->head; s2->head->bw = s1->tail; nseq->numN = s1->numN + s2->numN; return nseq; }
void count_fastq_kmers(FILE* fin, uint32_t* cs) { seq_t* seq = seq_create(); fastq_t* fqf = fastq_create(fin); int i; int n; uint32_t kmer; while (fastq_read(fqf, seq)) { n = (int)seq->seq.n - k + 1; for (i = 0; i < n; i++) { if( packkmer(seq->seq.s + i, &kmer, k) ) { cs[kmer]++; } } } seq_free(seq); fastq_free(fqf); }
void fastq_print_maybe_trim(FILE* fout, seq_t* seq, int* ovector) { if (!trim_before_flag && !trim_after_flag) { fastq_print(fout, seq); return; } // trimming seq_t* trimmed = seq_create(); int trimmed_start = 0; int trimmed_end = 0; int match_start = ovector[0]; int match_end = ovector[1]; if (trim_before_flag) { trimmed_end = seq->seq.n; trimmed_start = trim_match_flag ? match_end : match_start; } else if (trim_after_flag) { trimmed_start = 0; trimmed_end = trim_match_flag ? match_start : match_end; } seq_trim(seq, trimmed, trimmed_start, trimmed_end); fastq_print(fout, trimmed); seq_free(trimmed); }
void fastq_sample(unsigned long rng_seed, const char* prefix, const char* cprefix, FILE* file1, FILE* file2, unsigned long k, double p) { /* * The basic idea is this: * * 1. Count the number of lines in the file, n. * * 2a. If sampling with replacement, generate k random integers in [0, n-1]. * * 2b. If sampling without replacement, generate a list of integers 0..(n-1), * shuffle with fisher-yates, then consider the first k. * * 3. Sort the integer list. * * 3. Read through the file again, when the number at the front of the integer * list matches the index of the fastq etry, print the entry, and pop the * number. */ unsigned long n, n2; fastq_t* f1 = fastq_create(file1); fastq_t* f2 = file2 == NULL ? NULL : fastq_create(file2); n = count_entries(f1); if (f2 != NULL) { n2 = count_entries(f2); if (n != n2) { fprintf(stderr, "Input files have differing numbers of entries (%lu != %lu).\n", n, n2); exit(1); } } fastq_rewind(f1); if (f2 != NULL) fastq_rewind(f2); if (p > 0.0) { k = (unsigned long) round(p * (double) n); if (!replacement_flag && k > n) k = n; } rng_t* rng = fastq_rng_alloc(); fastq_rng_seed(rng, rng_seed); unsigned long* xs; if (replacement_flag) xs = index_with_replacement(rng, n, k); else xs = index_without_replacement(rng, n); qsort(xs, k, sizeof(unsigned long), cmpul); /* open output */ FILE* fout1; FILE* fout2; char* output_name; size_t output_len; if (file2 == NULL) { output_len = strlen(prefix) + 7; output_name = malloc_or_die((output_len + 1) * sizeof(char)); snprintf(output_name, output_len, "%s.fastq", prefix); fout1 = open_without_clobber(output_name); if (fout1 == NULL) { fprintf(stderr, "Cannot open file %s for writing.\n", output_name); exit(1); } fout2 = NULL; free(output_name); } else { output_len = strlen(prefix) + 9; output_name = malloc_or_die((output_len + 1) * sizeof(char)); snprintf(output_name, output_len, "%s.1.fastq", prefix); fout1 = open_without_clobber(output_name); if (fout1 == NULL) { fprintf(stderr, "Cannot open file %s for writing.\n", output_name); exit(1); } snprintf(output_name, output_len, "%s.2.fastq", prefix); fout1 = open_without_clobber(output_name); if (fout1 == NULL) { fprintf(stderr, "Cannot open file %s for writing.\n", output_name); exit(1); } free(output_name); } /* open complement output */ FILE* cfout1 = NULL; FILE* cfout2 = NULL; if (cprefix != NULL && file2 == NULL) { output_len = strlen(cprefix) + 7; output_name = malloc_or_die((output_len + 1) * sizeof(char)); snprintf(output_name, output_len, "%s.fastq", cprefix); cfout1 = fopen(output_name, "wb"); if (cfout1 == NULL) { fprintf(stderr, "Cannot open file %s for writing.\n", output_name); exit(1); } cfout2 = NULL; free(output_name); } else if (cprefix != NULL) { output_len = strlen(cprefix) + 9; output_name = malloc_or_die((output_len + 1) * sizeof(char)); snprintf(output_name, output_len, "%s.1.fastq", cprefix); cfout1 = fopen(output_name, "wb"); if (cfout1 == NULL) { fprintf(stderr, "Cannot open file %s for writing.\n", output_name); exit(1); } snprintf(output_name, output_len, "%s.2.fastq", cprefix); cfout2 = fopen(output_name, "wb"); if (cfout1 == NULL) { fprintf(stderr, "Cannot open file %s for writing.\n", output_name); exit(1); } free(output_name); } unsigned long i = 0; // read number unsigned long j = 0; // index into xs int ret; seq_t* seq1 = seq_create(); seq_t* seq2 = seq_create(); while (j < k && fastq_read(f1, seq1)) { if (f2 != NULL) { ret = fastq_read(f2, seq2); if (ret == 0) { fputs("Input files have differing numbers of entries.\n", stderr); exit(1); } } if (xs[j] == i) { while (j < k && xs[j] == i) { fastq_print(fout1, seq1); if (f2 != NULL) fastq_print(fout2, seq2); ++j; } } else if (cfout1 != NULL) { fastq_print(cfout1, seq1); if (f2 != NULL) fastq_print(cfout2, seq2); } ++i; } seq_free(seq1); seq_free(seq2); fastq_free(f1); if (f2 != NULL) fastq_free(f2); fclose(fout1); if (fout2 != NULL) fclose(fout2); if (cfout1 != NULL) fclose(cfout1); if (cfout2 != NULL) fclose(cfout2); fastq_rng_free(rng); free(xs); }