/* Third pass: extract all sequences and their execution counts. We * walk the toplevel and then recur throughout each tree; if the count * at one level is less than the count at the previous level then one * sequence ended at the previous level but continues with the lower * count at this level. * * Discard sequences of length 1 (duh) and anything below the * user-settable cutoff. * * Every sequence that is a proper suffix of another sequence with the * same execution count is discarded too (subsumed by the longer * sequence). */ void extract_superwords() { int i, j, ki, kj; for ( i=0 ; i < INSTRCOUNT ; i++ ) descend(&toplevel[i], 0); qsort(seqs, nextseq, sizeof(seq_t), seq_cmp); for ( i=0 ; i < nextseq ; i++ ) { for ( j=i+1 ; j < nextseq && seqs[i].count == seqs[j].count ; j++ ) { if (seqs[j].length < seqs[i].length) { for ( ki=seqs[i].length-1, kj=seqs[j].length-1 ; kj >= 0 && seqs[i].opcode[ki] == seqs[j].opcode[kj] ; ki--, kj-- ) ; if (kj < 0) seqs[j].suffix = 1; } } } /* Remove suffix sequences */ j=0; for ( i=0 ; i < nextseq ; i++ ) if (!seqs[i].suffix) seqs[j++] = seqs[i]; nextseq = j; /* Flat */ if (flat) for ( i=0 ; i < nextseq ; i++ ) printseq(i, 0); /* Hierarchical. Every sequence that is a proper prefix of another sequence will necessarily have a higher execution count, and will be printed with the longer sequence as a child. Note this messes with the suffix flag. */ if (hierarchical) { if (flat) printf("\n\n----------\n\n"); for ( i=0 ; i < nextseq ; i++ ) { if (!seqs[i].suffix) { printseq(i, 0); printhseq(i, 1); } } } }
void main(void) { FILE *fin, *fout; int a, i, impossible; fin = fopen("lamps.in", "r"); fout = fopen("lamps.out", "w"); assert(fin != NULL && fout != NULL); fscanf(fin, "%d %d", &nlamp, &nswitch); for(;;) { fscanf(fin, "%d", &a); if(a == -1) break; a = MAXLAMP-1 - (a-1) % MAXLAMP; ison |= 1<<a; known |= 1<<a; } for(;;) { fscanf(fin, "%d", &a); if(a == -1) break; a = MAXLAMP-1 - (a-1) % MAXLAMP; assert((ison & (1<<a)) == 0); known |= 1<<a; } if(nswitch > 4) if(nswitch%2 == 0) nswitch = 4; else nswitch = 3; for(; nswitch >= 0; nswitch -= 2) search(LAMPMASK, 0, nswitch); impossible = 1; for(i=0; i<(1<<MAXLAMP); i++) { if(poss[i]) { printseq(fout, i); impossible = 0; } } if(impossible) fprintf(fout, "IMPOSSIBLE\n"); exit(0); }
void printhseq(int i, int level) { int j, k, l; l=seqs[i].length; for ( j=i+1 ; j < nextseq ; j++ ) { if (seqs[j].suffix || seqs[j].length < l) goto next_j; for ( k=0 ; k < l ; k++ ) if (seqs[i].opcode[k] != seqs[j].opcode[k]) goto next_j; printseq(j, level); seqs[j].suffix = 1; printhseq(j, level+1); next_j: ; } }
int pairs_join(int argc, char *argv[]) { gzFile fp[2]; kseq_t *ks[2]; int c, i, tag=0, strict=0, l[] = {0, 0}; while ((c = getopt(argc, argv, "ts")) >= 0) { switch (c) { case 't': tag = 1; break; case 's': strict = 1; break; default: return 1; } } if (optind == argc) return join_usage(); for (i = 0; i < 2; ++i) { fp[i] = gzopen(argv[optind + i], "r"); ks[i] = kseq_init(fp[i]); } for (;;) { for (i = 0; i < 2; ++i) l[i] = kseq_read(ks[i]); if (l[0] < 0 || l[1] < 0) break; if (!is_interleaved_pair(ks[0]->name.s, ks[1]->name.s)) { fprintf(stderr, "[%s] warning: different sequence names: %s != %s\n", __func__, ks[0]->name.s, ks[1]->name.s); if (strict) return 1; } for (i = 0; i < 2; ++i) printseq(stdout, ks[i], ks[i]->seq.l, tag ? i+1 : 0); } if (l[0] > 0 || l[1] > 0) { fprintf(stderr, "[%s] error: paired end files have differing numbers of reads.\n", __func__); exit(1); } for (i = 0; i < 2; ++i) { kseq_destroy(ks[i]); gzclose(fp[i]); } return 0; }
int pairs_split(int argc, char *argv[]) { kseq_t **seq, *tmp; FILE *fpout[] = {NULL, NULL, NULL}; gzFile fp; int c, l, i, strict=1, min_length=0; unsigned total[]={0, 0}, removed[]={0, 0}, both_removed = 0; unsigned is_empty[]={0, 0}; char *p, *tags[] = {"/1", "/2"}; while ((c = getopt(argc, argv, "1:2:u:n")) >= 0) { switch (c) { case '1': fpout[0] = fopen(optarg, "w"); break; case '2': fpout[1] = fopen(optarg, "w"); break; case 'u': fpout[2] = fopen(optarg, "w"); break; case 'm': min_length = atoi(optarg); if (min_length < 1) fprintf(stderr, "[%s] error: minimum length must be >= 1\n", __func__); return 1; break; case 'n': strict = 0; break; default: return 1; } } if (optind == argc) return split_usage(); for (i = 0; i < 3; ++i) { if (!fpout[i]) { fprintf(stderr, "[%s] error: arguments -1, -2, and -u are required.", __func__); return 1; } } fp = (strcmp(argv[optind], "-") == 0) ? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r"); seq = calloc(2, sizeof(kseq_t*)); for (i = 0; i < 2; ++i) seq[i] = malloc(sizeof(kseq_t)); tmp = kseq_init(fp); while ((l=kseq_read(tmp)) >= 0) { /* always read in chunks of two FASTX entries */ cpy_kseq(seq[0], tmp); l = kseq_read(tmp); if (l < 0) break; cpy_kseq(seq[1], tmp); for (i = 0; i < 2; ++i) { /* remove /1 and /2 tags */ p = strstr(seq[i]->name.s, tags[i]); if (p) { strncpy(p, "\0", 1); } } if (strcmp(seq[0]->name.s, seq[1]->name.s) != 0) { fprintf(stderr, "[%s] warning: interleaved reads names differ '%s' != '%s'\n", __func__, seq[0]->name.s, seq[1]->name.s); if (strict) return 1; } /* deal with unpaired cases (either no seq or single 'N') */ for (i = 0; i < 2; ++i) { is_empty[i] = seq[i]->seq.l <= min_length || strcmp(seq[i]->seq.s, "N") == 0; removed[i] += is_empty[i]; total[i] += 1; } if (!is_empty[0] && !is_empty[1]) { for (i = 0; i < 2; i++) printseq(fpout[i], seq[i], seq[i]->seq.l, 0); } else if (is_empty[0] && is_empty[1]) { both_removed += 1; continue; } else { i = is_empty[0] ? 1 : 0; printseq(fpout[2], seq[i], seq[i]->seq.l, 0); } } if (total[0] != total[1]) { fprintf(stderr, "[%s] error: mismatched totals of interleaved pairs! %u != %u\n", __func__, total[0], total[1]); return 1; } fprintf(stderr, "totals: %u %u\nremoved: %u %u\n", total[0], total[1], removed[0], removed[1]); return 0; }