Exemple #1
0
/**
* DESCRIPTION:  same as seq_filter_eq
*   except criteria is "greater than x"
*   instead of "equal to x"
* RUNTIME REQ: O(n)
*/
extern Seq seq_filter_gt(Seq seq,
int (*cmp)(ETYPE a, ETYPE b),
ETYPE x){
    Seq found = seq_create();
    Seq notFound = seq_create();
    Node* hopper = seq->head;
    while( hopper != NULL){ ///O(N)
	if( 0 < cmp( hopper->a, x) )
	    seq_add_back( found, hopper->a); ///O(1)
	else
	    seq_add_back( notFound, hopper->a); ///O(1)
	hopper = hopper->fw;
    }
    //reflect the changes in seq from the notFound seq
    //   frees the Nodes, not the "body"
    while( seq->head != NULL){
	hopper = seq->head; //assign the  placeholder
	seq->head = seq->head->fw; //move forward
	free( hopper); //delete the "head" (placeholder)
    }
    // asssign new values to the "body"
    seq->numN = notFound->numN;
    seq->head = notFound->head;
    seq->tail = notFound->tail;
    free(notFound); //frees the "body," not the Nodes   

    return found;
}
Exemple #2
0
void fastq_grep(FILE* fin, FILE* fout, FILE* mismatch_file, pcre* re)
{
    int rc;
    int ovector[3];
    size_t count = 0;

    fastq_t* fqf = fastq_create(fin);
    seq_t* seq = seq_create();

    while (fastq_read(fqf, seq)) {
        rc = pcre_exec(re,          /* pattern */
                       NULL,        /* extra data */
                       id_flag ? seq->id1.s : seq->seq.s,
                       id_flag ? seq->id1.n : seq->seq.n,
                       0,           /* subject offset */
                       0,           /* options */
                       ovector,     /* output vector */
                       3         ); /* output vector length */

        if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || (!invert_flag && rc >= 0)) {
            if (count_flag) count++;
            else            fastq_print_maybe_trim(fout, seq, ovector);
        }
        else if (mismatch_file) {
            fastq_print(mismatch_file, seq);
        }
    }

    seq_free(seq);
    fastq_free(fqf);

    if (count_flag) fprintf(fout, "%zu\n", count);
}
Exemple #3
0
static int handle_seq_fetch (seqhash_t *s, JSON in, JSON *outp)
{
    const char *name;
    bool create = false;
    bool created = false;
    int64_t v, pre, post, *valp;

    if (!Jget_str (in, "name", &name)
        || !Jget_bool (in, "create", &create)
        || !Jget_int64 (in, "preincrement", &pre)
        || !Jget_int64 (in, "postincrement", &post)) {
        errno = EPROTO;
        return (-1);
    }
    if (seq_fetch_and_add (s, name, pre, post, &v) < 0) {
        if (!create || (errno != ENOENT))
            return (-1);
        /*  Create and initialize
         */
        valp = seq_create (s, name);
        *valp += pre;
        v = *valp;
        *valp += post;
        created = true;
    }

    *outp = Jnew ();
    Jadd_str (*outp, "name", name);
    Jadd_int64 (*outp, "value", v);
    if (create && created)
        Jadd_bool (*outp, "created", true);
    return (0);
}
Exemple #4
0
void* pique_thread(void* arg)
{
    pique_ctx_t* ctx = arg;
    seq_t* seq = seq_create();
    twobit_t* tb = twobit_alloc();
    rng_t* rng = rng_alloc(1234);
    bool r;

    while (true) {
        pthread_mutex_lock(ctx->f_mutex);
        if (ctx->fmt == INPUT_FMT_FASTA)      r = fasta_read(ctx->f, seq);
        else if (ctx->fmt == INPUT_FMT_FASTQ) r = fastq_read(ctx->f, seq);
        pthread_mutex_unlock(ctx->f_mutex);
        if (!r) break;

        /* TODO: remove sequences with Ns? */

        twobit_copy_str_n(tb, seq->seq.s, seq->seq.n);
        dbg_add_twobit_seq(ctx->G, rng, tb);
    }

    rng_free(rng);
    seq_free(seq);
    return NULL;
}
Exemple #5
0
LVAL xlc_seq_create(void)
{
    seq_type result;

    xllastarg();
    result = seq_create();
    return cvseq(result);
}
Exemple #6
0
/* count the number of entries in a fastq file */
unsigned long count_entries(fastq_t* fqf)
{
    seq_t* seq = seq_create();
    unsigned long n = 0;
    while (fastq_read(fqf, seq)) ++n;
    seq_free(seq);

    return n;
}
Exemple #7
0
/**
* DESCRIPTION:  returns a deep copy of the given
*   sequence.  (Note:  there is no way for actual
*   elements to be cloned in this framework).
* RUNTIME REQ: O(n)
*/
extern Seq seq_clone(Seq seq){
    Seq cpy = seq_create();
    Node *hopper = seq->head;
    while(hopper != NULL){
	seq_add_back( cpy, hopper->a); //cpy the NODE with the value at that node
	hopper = hopper->fw; //move forward
    }
    return cpy;
}
Exemple #8
0
static void
init_clock(struct clock *c, clockid_t id)
{
    memset(c, 0, sizeof *c);
    c->id = id;
    ovs_mutex_init(&c->mutex);
    atomic_init(&c->slow_path, false);
    xclock_gettime(c->id, &c->cache);
    timewarp_seq = seq_create();
}
Exemple #9
0
static void
do_init_time(void)
{
    struct timespec ts;

    coverage_init();

    timewarp_seq = seq_create();
    init_clock(&monotonic_clock, (!clock_gettime(CLOCK_MONOTONIC, &ts)
                                  ? CLOCK_MONOTONIC
                                  : CLOCK_REALTIME));
    init_clock(&wall_clock, CLOCK_REALTIME);
    boot_time = timespec_to_msec(&monotonic_clock.cache);
}
Exemple #10
0
/* n-way merge sort to stdout */
void merge_sort(const seq_dumps_t* d, int (*cmp)(const void*, const void*))
{
    FILE** files = malloc_or_die(d->n * sizeof(FILE*));
    size_t i;
    for (i = 0; i < d->n; ++i) {
        files[i] = fopen(d->fns[i], "rb");
        if (files[i] == NULL) {
            fprintf(stderr, "Cannot open temporary file %s for reading.\n",
                    d->fns[i]);
            exit(EXIT_FAILURE);
        }
    }

    fastq_t** fs = malloc_or_die(d->n * sizeof(fastq_t*));
    seq_t** seqs = malloc_or_die(d->n * sizeof(seq_t*));
    for (i = 0; i < d->n; ++i) {
        fs[i] = fastq_create(files[i]);
        seqs[i] = seq_create();
    }

    /* A binary heap of indexes to fs. We use this to repeatedly pop the
     * smallest fastq entry. */
    size_t* heap = malloc_or_die(d->n * sizeof(size_t));

    /* heap size */
    size_t m = 0;

    for (i = 0; i < d->n; ++i) {
        if (fastq_read(fs[i], seqs[i])) {
            heap_push(heap, d->n, &m, seqs, cmp, i);
        }
    }

    while (m > 0) {
        i = heap_pop(heap, &m, seqs, cmp);
        fastq_print(stdout, seqs[i]);
        if (fastq_read(fs[i], seqs[i])) {
            heap_push(heap, d->n, &m, seqs, cmp, i);
        }
    }

    for (i = 0; i < d->n; ++i) {
        seq_free(seqs[i]);
        fastq_free(fs[i]);
        fclose(files[i]);
    }

    free(files);
    free(fs);
}
Exemple #11
0
/**
* DESCRIPTION:  s1 becomes the result of concatenating
*    s1 and s2.
*
* postconditions:  s1 and s2 become empty sequences.
*
* Notes:  if the client passes the same sequence as
*   both s1 and s2
*    
* RUNTIME REQ: O(1)
*/
extern Seq seq_concat(Seq s1, Seq s2){
    if( s1->head == s2->head)
	return NULL;

    Seq nseq = seq_create();
    nseq->head = s1->head;
    nseq->tail = s2->head;

    s1->tail->fw = s2->head;
    s2->head->bw = s1->tail;
    nseq->numN = s1->numN + s2->numN;

    return nseq;
}
Exemple #12
0
void count_fastq_kmers(FILE* fin, uint32_t* cs)
{
    seq_t* seq = seq_create();
    fastq_t* fqf = fastq_create(fin);
    int i;
    int n;
    uint32_t kmer;

    while (fastq_read(fqf, seq)) {
        n = (int)seq->seq.n - k + 1;
        for (i = 0; i < n; i++) {
            if( packkmer(seq->seq.s + i, &kmer, k) ) {
                cs[kmer]++;
            }
        }
    }

    seq_free(seq);
    fastq_free(fqf);
}
Exemple #13
0
void fastq_print_maybe_trim(FILE* fout, seq_t* seq, int* ovector) 
{
    if (!trim_before_flag && !trim_after_flag) {
        fastq_print(fout, seq);
        return;
    }

    // trimming
    seq_t* trimmed = seq_create();
    int trimmed_start = 0;
    int trimmed_end   = 0;
    int match_start   = ovector[0];
    int match_end     = ovector[1];
    if (trim_before_flag) {
        trimmed_end = seq->seq.n;
        trimmed_start = trim_match_flag ? match_end : match_start;
    } else if (trim_after_flag) {
        trimmed_start = 0;
        trimmed_end = trim_match_flag ? match_start : match_end;
    }
    seq_trim(seq, trimmed, trimmed_start, trimmed_end);
    fastq_print(fout, trimmed);
    seq_free(trimmed);
}
Exemple #14
0
void fastq_sample(unsigned long rng_seed,
                  const char* prefix, const char* cprefix,
                  FILE* file1, FILE* file2, unsigned long k, double p)
{
    /*
     * The basic idea is this:
     *
     * 1. Count the number of lines in the file, n.
     *
     * 2a. If sampling with replacement, generate k random integers in [0, n-1].
     *
     * 2b. If sampling without replacement, generate a list of integers 0..(n-1),
     *     shuffle with fisher-yates, then consider the first k.
     *
     * 3. Sort the integer list.
     *
     * 3. Read through the file again, when the number at the front of the integer
     *    list matches the index of the fastq etry, print the entry, and pop the
     *    number.
     */


    unsigned long n, n2;

    fastq_t* f1 = fastq_create(file1);
    fastq_t* f2 = file2 == NULL ? NULL : fastq_create(file2);

    n = count_entries(f1);
    if (f2 != NULL) {
        n2 = count_entries(f2);
        if (n != n2) {
            fprintf(stderr, "Input files have differing numbers of entries (%lu != %lu).\n", n, n2);
            exit(1);
        }
    }

    fastq_rewind(f1);
    if (f2 != NULL) fastq_rewind(f2);

    if (p > 0.0) {
        k = (unsigned long) round(p * (double) n);
        if (!replacement_flag && k > n) k = n;
    }

    rng_t* rng = fastq_rng_alloc();
    fastq_rng_seed(rng, rng_seed);

    unsigned long* xs;
    if (replacement_flag) xs = index_with_replacement(rng, n, k);
    else                  xs = index_without_replacement(rng, n);

    qsort(xs, k, sizeof(unsigned long), cmpul);

    /* open output */
    FILE* fout1;
    FILE* fout2;

    char* output_name;
    size_t output_len;
    if (file2 == NULL) {
        output_len = strlen(prefix) + 7;
        output_name = malloc_or_die((output_len + 1) * sizeof(char));

        snprintf(output_name, output_len, "%s.fastq", prefix);
        fout1 = open_without_clobber(output_name);
        if (fout1 == NULL) {
            fprintf(stderr, "Cannot open file %s for writing.\n", output_name);
            exit(1);
        }

        fout2 = NULL;

        free(output_name);
    }
    else {
        output_len = strlen(prefix) + 9;
        output_name = malloc_or_die((output_len + 1) * sizeof(char));

        snprintf(output_name, output_len, "%s.1.fastq", prefix);
        fout1 = open_without_clobber(output_name);
        if (fout1 == NULL) {
            fprintf(stderr, "Cannot open file %s for writing.\n", output_name);
            exit(1);
        }

        snprintf(output_name, output_len, "%s.2.fastq", prefix);
        fout1 = open_without_clobber(output_name);
        if (fout1 == NULL) {
            fprintf(stderr, "Cannot open file %s for writing.\n", output_name);
            exit(1);
        }

        free(output_name);
    }

    /* open complement output */
    FILE* cfout1 = NULL;
    FILE* cfout2 = NULL;

    if (cprefix != NULL && file2 == NULL) {
        output_len = strlen(cprefix) + 7;
        output_name = malloc_or_die((output_len + 1) * sizeof(char));

        snprintf(output_name, output_len, "%s.fastq", cprefix);
        cfout1 = fopen(output_name, "wb");
        if (cfout1 == NULL) {
            fprintf(stderr, "Cannot open file %s for writing.\n", output_name);
            exit(1);
        }

        cfout2 = NULL;

        free(output_name);
    }
    else if (cprefix != NULL) {
        output_len = strlen(cprefix) + 9;
        output_name = malloc_or_die((output_len + 1) * sizeof(char));

        snprintf(output_name, output_len, "%s.1.fastq", cprefix);
        cfout1 = fopen(output_name, "wb");
        if (cfout1 == NULL) {
            fprintf(stderr, "Cannot open file %s for writing.\n", output_name);
            exit(1);
        }

        snprintf(output_name, output_len, "%s.2.fastq", cprefix);
        cfout2 = fopen(output_name, "wb");
        if (cfout1 == NULL) {
            fprintf(stderr, "Cannot open file %s for writing.\n", output_name);
            exit(1);
        }

        free(output_name);
    }

    unsigned long i = 0; // read number
    unsigned long j = 0; // index into xs

    int ret;
    seq_t* seq1 = seq_create();
    seq_t* seq2 = seq_create();

    while (j < k && fastq_read(f1, seq1)) {
        if (f2 != NULL) {
            ret = fastq_read(f2, seq2);
            if (ret == 0) {
                fputs("Input files have differing numbers of entries.\n", stderr);
                exit(1);
            }
        }

        if (xs[j] == i) {
            while (j < k && xs[j] == i) {
                fastq_print(fout1, seq1);
                if (f2 != NULL) fastq_print(fout2, seq2);
                ++j;
            }
        }
        else if (cfout1 != NULL) {
            fastq_print(cfout1, seq1);
            if (f2 != NULL) fastq_print(cfout2, seq2);
        }

        ++i;
    }

    seq_free(seq1);
    seq_free(seq2);
    fastq_free(f1);
    if (f2 != NULL) fastq_free(f2);

    fclose(fout1);
    if (fout2 != NULL) fclose(fout2);

    if (cfout1 != NULL) fclose(cfout1);
    if (cfout2 != NULL) fclose(cfout2);

    fastq_rng_free(rng);
    free(xs);
}