Пример #1
0
/* Third pass: extract all sequences and their execution counts.  We
 * walk the toplevel and then recur throughout each tree; if the count
 * at one level is less than the count at the previous level then one
 * sequence ended at the previous level but continues with the lower
 * count at this level.
 *
 * Discard sequences of length 1 (duh) and anything below the
 * user-settable cutoff.
 *
 * Every sequence that is a proper suffix of another sequence with the
 * same execution count is discarded too (subsumed by the longer
 * sequence).
 */
void extract_superwords()
{
    int i, j, ki, kj;

    for ( i=0 ; i < INSTRCOUNT ; i++ ) 
	descend(&toplevel[i], 0);

    qsort(seqs, nextseq, sizeof(seq_t), seq_cmp);

    for ( i=0 ; i < nextseq ; i++ ) {
	for ( j=i+1 ; j < nextseq && seqs[i].count == seqs[j].count ; j++ ) {
	    if (seqs[j].length < seqs[i].length) {
		for ( ki=seqs[i].length-1, kj=seqs[j].length-1 ; kj >= 0 && seqs[i].opcode[ki] == seqs[j].opcode[kj] ; ki--, kj-- )
		    ;
		if (kj < 0)
		    seqs[j].suffix = 1;
	    }
	}
    }

    /* Remove suffix sequences */
    j=0;
    for ( i=0 ; i < nextseq ; i++ )
	if (!seqs[i].suffix)
	    seqs[j++] = seqs[i];
    nextseq = j;

    /* Flat */

    if (flat)
	for ( i=0 ; i < nextseq ; i++ )
	    printseq(i, 0);
	

    /* Hierarchical.  Every sequence that is a proper prefix of
       another sequence will necessarily have a higher execution
       count, and will be printed with the longer sequence as a
       child. 

       Note this messes with the suffix flag.
    */

    if (hierarchical) {
	if (flat)
	    printf("\n\n----------\n\n");
	for ( i=0 ; i < nextseq ; i++ ) {
	    if (!seqs[i].suffix) {
		printseq(i, 0);
		printhseq(i, 1);
	    }
	}
    }
}
Пример #2
0
void
main(void)
{
    FILE *fin, *fout;
    int a, i, impossible;
 
    fin = fopen("lamps.in", "r");
    fout = fopen("lamps.out", "w");
    assert(fin != NULL && fout != NULL);
 
    fscanf(fin, "%d %d", &nlamp, &nswitch);
 
    for(;;) {
	fscanf(fin, "%d", &a);
	if(a == -1)
	    break;
	a = MAXLAMP-1 - (a-1) % MAXLAMP;
	ison |= 1<<a;
	known |= 1<<a;
    }
 
    for(;;) {
	fscanf(fin, "%d", &a);
	if(a == -1)
	    break;
	a = MAXLAMP-1 - (a-1) % MAXLAMP;
	assert((ison & (1<<a)) == 0);
	known |= 1<<a;
    }
 
    if(nswitch > 4)
	if(nswitch%2 == 0)
	    nswitch = 4;
	else
	    nswitch = 3;
 
    for(; nswitch >= 0; nswitch -= 2)
	    search(LAMPMASK, 0, nswitch);
 
    impossible = 1;
    for(i=0; i<(1<<MAXLAMP); i++) {
	if(poss[i]) {
	    printseq(fout, i);
	    impossible = 0;
	}
    }
    if(impossible)
	fprintf(fout, "IMPOSSIBLE\n");
 
    exit(0);
}
Пример #3
0
void printhseq(int i, int level) 
{
    int j, k, l;

    l=seqs[i].length;
    for ( j=i+1 ; j < nextseq ; j++ ) {
	if (seqs[j].suffix || seqs[j].length < l) 
	    goto next_j;
	for ( k=0 ; k < l ; k++ )
	    if (seqs[i].opcode[k] != seqs[j].opcode[k])
		goto next_j;
	printseq(j, level);
	seqs[j].suffix = 1;
	printhseq(j, level+1);
    next_j: 
	;
    }
}
Пример #4
0
int pairs_join(int argc, char *argv[]) {
  gzFile fp[2];
  kseq_t *ks[2];
  int c, i, tag=0, strict=0, l[] = {0, 0};
  while ((c = getopt(argc, argv, "ts")) >= 0) {
    switch (c) {
    case 't': tag = 1; break;
    case 's': strict = 1; break;
    default: return 1;
    }
  }

  if (optind == argc) return join_usage();

  for (i = 0; i < 2; ++i) {
    fp[i] = gzopen(argv[optind + i], "r");
    ks[i] = kseq_init(fp[i]);
  }
  for (;;) {
    for (i = 0; i < 2; ++i) l[i] = kseq_read(ks[i]);
    if (l[0] < 0 || l[1] < 0)
      break;
    if (!is_interleaved_pair(ks[0]->name.s, ks[1]->name.s)) {
      fprintf(stderr, "[%s] warning: different sequence names: %s != %s\n", __func__, ks[0]->name.s, ks[1]->name.s);
      if (strict) return 1;
    }
   
    for (i = 0; i < 2; ++i) printseq(stdout, ks[i], ks[i]->seq.l, tag ? i+1 : 0);
  }
  
  if (l[0] > 0 || l[1] > 0) {
    fprintf(stderr, "[%s] error: paired end files have differing numbers of reads.\n", __func__);
    exit(1);
  }

  for (i = 0; i < 2; ++i) {
    kseq_destroy(ks[i]);
    gzclose(fp[i]);
  }
  return 0;
}
Пример #5
0
int pairs_split(int argc, char *argv[]) {
  kseq_t **seq, *tmp;
  FILE *fpout[] = {NULL, NULL, NULL};
  gzFile fp;
  int c, l, i, strict=1, min_length=0;
  unsigned total[]={0, 0}, removed[]={0, 0}, both_removed = 0;
  unsigned is_empty[]={0, 0};
  char *p, *tags[] = {"/1", "/2"};
  while ((c = getopt(argc, argv, "1:2:u:n")) >= 0) {
    switch (c) {
    case '1': 
      fpout[0] = fopen(optarg, "w");
      break;
    case '2':
      fpout[1] = fopen(optarg, "w");
      break;
    case 'u':
      fpout[2] = fopen(optarg, "w");
      break;
    case 'm': 
      min_length = atoi(optarg); 
      if (min_length < 1) fprintf(stderr, "[%s] error: minimum length must be >= 1\n", __func__);
      return 1;
      break;
    case 'n': strict = 0; break;
    default: return 1;
    }
  }

  if (optind == argc) return split_usage();

  for (i = 0; i < 3; ++i) {
    if (!fpout[i]) {
      fprintf(stderr, "[%s] error: arguments -1, -2, and -u are required.", __func__);
      return 1;
    }
  }

  fp = (strcmp(argv[optind], "-") == 0) ? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
  
  seq = calloc(2, sizeof(kseq_t*));
  for (i = 0; i < 2; ++i) seq[i] = malloc(sizeof(kseq_t));
  tmp = kseq_init(fp);
  while ((l=kseq_read(tmp)) >= 0) {
    /* always read in chunks of two FASTX entries */
    cpy_kseq(seq[0], tmp);
    l = kseq_read(tmp);
    if (l < 0) break;
    cpy_kseq(seq[1], tmp);

    for (i = 0; i < 2; ++i) {
      /* remove /1 and /2 tags */
      p = strstr(seq[i]->name.s, tags[i]);
      if (p) {
	strncpy(p, "\0", 1);
      }
    }

    if (strcmp(seq[0]->name.s, seq[1]->name.s) != 0) {
      fprintf(stderr, "[%s] warning: interleaved reads names differ '%s' != '%s'\n", __func__, seq[0]->name.s, seq[1]->name.s);
      if (strict) return 1;
    }

    /* deal with unpaired cases (either no seq or single 'N') */
    for (i = 0; i < 2; ++i) {
      is_empty[i] = seq[i]->seq.l <= min_length || strcmp(seq[i]->seq.s, "N") == 0;
      removed[i] += is_empty[i];
      total[i] += 1;
    }
    
    if (!is_empty[0] && !is_empty[1]) {
      for (i = 0; i < 2; i++)      
	printseq(fpout[i], seq[i], seq[i]->seq.l, 0);
    } else if (is_empty[0] && is_empty[1]) {
      both_removed += 1;
      continue;
    } else {
      i = is_empty[0] ? 1 : 0;
      printseq(fpout[2], seq[i], seq[i]->seq.l, 0);
    }
  }
  if (total[0] != total[1]) {
    fprintf(stderr, "[%s] error: mismatched totals of interleaved pairs! %u != %u\n", __func__, total[0], total[1]);
    return 1;
  }
  fprintf(stderr, "totals: %u %u\nremoved: %u %u\n", total[0], total[1], removed[0], removed[1]);
  return 0;
}