Example #1
0
int main(int arfc, char **arfv)
{
    /*
       arfv[1] is the input file with sequence on the first line and constraint on the second line
       takes constraints of the ( ) and [ ] variety
       arfv[2] is the list of structures output by subopt to check
     */

    if (arfc != 3) {
        usage(arfv[0]);
        exit(1);
    }

    FILE *fin, *fout;
    int i, *pos, *neg;
    char *C;

    if (arfv[1][0] == '-' && arfv[1][1] == '\0')
        fin = stdin;
    else if (!(fin = fopen(arfv[1], "rb"))) {
        fprintf(stderr, "unable to open %s, bailing\n", arfv[1]);
        exit(1);
    }

    if (arfv[2][0] == '-' && arfv[2][1] == '\0')
        fout = stdin;
    else if (!(fout = fopen(arfv[2], "rb"))) {
        fprintf(stderr, "unable to open %s, bailing\n", arfv[2]);
        if (stdin != fin)
            fclose(fin);
        exit(1);
    }

    skip_sequence(fin);
    C = get_structure(fin);
    make_pair_table(C, &pos, &neg);

    printf("constraint string:\n%s\n", C);
    free(C);

    printf("forced pairs:");
    for (i = 1; i <= len; ++i) {
        if (pos[i])
            printf(" (%d,%d)", i, pos[i]);
    }
    printf("\n");

    printf("forbidden pairs:");
    for (i = 1; i <= len; ++i) {
        if (neg[i])
            printf(" (%d,%d)", i, neg[i]);
    }
    printf("\n");

    while (!feof(fout)) {
        char *s;
        int *ps;
        s = get_structure(fout);
        make_pair_table(s, &ps, NULL);
        if (s[0]) {
            compare(s, ps, pos, neg);
        }
        free(s);
        free(ps);
    }

    free(pos);
    free(neg);
    if (stdin != fin)
        fclose(fin);
    if (stdin != fin)
        fclose(fout);

    return 0;
}
Example #2
0
static bool
process_sequence_aux (Genomicpos_T *seglength, Table_T accsegmentpos_table, Tableint_T chrlength_table,
		      char *fileroot, int ncontigs) {
  char Buffer[BUFFERSIZE], accession_p[BUFFERSIZE], *accession, 
    chrpos_string[BUFFERSIZE], *chr_string, *coords;
  Genomicpos_T chrpos1, chrpos2, lower, upper;
  Genomicpos_T universal_coord = 0U;
  bool revcompp;
  int nitems;

  /* Store sequence info */
  if (fgets(Buffer,BUFFERSIZE,stdin) == NULL) {
    return false;
  }

  if ((nitems = sscanf(Buffer,"%s %s %u",accession_p,chrpos_string,&universal_coord)) < 2) {
    fprintf(stderr,"Can't parse line %s\n",Buffer);
    exit(1);
  } else {
    if (ncontigs < 100) {
      fprintf(stderr,"Logging contig %s at %s in genome %s\n",accession_p,chrpos_string,fileroot);
    } else if (ncontigs == 100) {
      fprintf(stderr,"More than 100 contigs.  Will stop printing messages\n");
    }

    if (!index(chrpos_string,':')) {
      fprintf(stderr,"Can't parse chromosomal coordinates %s\n",chrpos_string);
      exit(1);
    } else {
      chr_string = strtok(chrpos_string,":");
      coords = strtok(NULL,":");
      if (sscanf(coords,"%u..%u",&chrpos1,&chrpos2) == 2) {
	/* 1:3..5, one-based, inclusive => (2,5), zero-based, boundaries */
	if (chrpos1 <= chrpos2) {
	  chrpos1--;
	  revcompp = false;
	  lower = chrpos1;
	  upper = chrpos2;
	} else {
	  chrpos2--;
	  revcompp = true;
	  lower = chrpos2;
	  upper = chrpos1;
	}
      } else if (sscanf(coords,"%u",&chrpos1) == 1) {
	/* 1:3, one-based, inclusive => (3,3), zero-based, boundaries */
	revcompp = false;
	lower = upper = chrpos1;
      } else {
	fprintf(stderr,"Can't parse chromosomal coordinates %s\n",coords);
	exit(1);
      }
    }

#if 0
    /* No longer supporting strains/types */
    p = Buffer;
    while (*p != '\0' && !isspace((int) *p)) { p++; } /* Skip to first space */
    while (*p != '\0' && isspace((int) *p)) { p++; } /* Skip past first space */
    while (*p != '\0' && !isspace((int) *p)) { p++; } /* Skip to second space */
    while (*p != '\0' && isspace((int) *p)) { p++; } /* Skip past second space */

    if (*p == '\0') {
      contigtype = 0;		/* Empty type string */
    } else {
      if ((ptr = rindex(p,'\n')) != NULL) {
	while (isspace((int) *ptr)) { ptr--; } /* Erase empty space */
	ptr++;
	*ptr = '\0';
      }
      if ((contigtype = Tableint_get(contigtype_table,(void *) p)) == 0) {
	debug(printf("Storing type %s.\n",p));
	/* Store types as 1-based */
	contigtype = Tableint_length(contigtype_table) + 1;
	typestring = (char *) CALLOC(strlen(p)+1,sizeof(char));
	strcpy(typestring,p);
	Tableint_put(contigtype_table,(void *) typestring,contigtype);
	*contigtypelist = List_push(*contigtypelist,typestring);
      }
    }
#endif

    /* The '>' character was already stripped off by the last call to count_sequence() */
    accession = (char *) CALLOC(strlen(accession_p)+1,sizeof(char));
    strcpy(accession,accession_p);
  }

  if (rawp == true) {
    *seglength = upper - lower;
    fprintf(stderr,"Skipping %u characters\n",*seglength);
    skip_sequence(*seglength);
  } else {
    *seglength = count_sequence();
    if (*seglength != upper - lower) {
      fprintf(stderr,"%s has expected sequence length %u-%u=%u but actual length %u\n",
	      accession,upper,lower,upper-lower,*seglength);
    }
  }

  if (nitems < 3) {
    universal_coord = 0U;
  }

  store_accession(accsegmentpos_table,chrlength_table,
		  accession,chr_string,lower,upper,revcompp,
		  *seglength,/*contigtype*/0,universal_coord);

  return true;
}