Пример #1
0
/* Assumes position is 0-based */
static char *
convert_to_chrpos_iit (unsigned int *chrpos, IIT_T chromosome_iit, unsigned int position) {
  char *chromosome;
  
  /* Subtract 1 to make 0-based */
  chromosome = IIT_string_from_position(&(*chrpos),position-1U,chromosome_iit);
  *chrpos += 1U;		/* Bring back to 1-based */
  IIT_free(&chromosome_iit);

  return chromosome;
}
Пример #2
0
/* Assumes position is 0-based */
static char *
convert_to_chrpos (unsigned int *chrpos, char *genomesubdir, char *fileroot, unsigned int position) {
  char *chromosome, *filename;
  IIT_T chromosome_iit;
  
  filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
			     strlen(".chromosome.iit")+1,sizeof(char));
  sprintf(filename,"%s/%s.chromosome.iit",genomesubdir,fileroot);
  if ((chromosome_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
				 /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true)) == NULL) {
    fprintf(stderr,"Can't read IIT file %s\n",filename);
    exit(9);
  }
  FREE(filename);

  /* Subtract 1 to make 0-based */
  chromosome = IIT_string_from_position(&(*chrpos),position-1U,chromosome_iit);
  *chrpos += 1U;		/* Bring back to 1-based */
  IIT_free(&chromosome_iit);

  return chromosome;
}
Пример #3
0
int 
main (int argc, char *argv[]) {
  char *filename;
  char *divstring = NULL, *lasttypestring, *ptr;
  Univcoord_T univ_coordstart, univ_coordend;
  Chrpos_T coordstart, coordend, lastcoord = 0U;
  char Buffer[BUFLEN], nocomment[BUFLEN], query[BUFLEN], typestring[BUFLEN];
  int typeint, *types, c;
  int nargs, ntypes, ndivs;
  int *value_matches = NULL, *matches = NULL;
  int n_value_matches = 0, nmatches = 0, i;
  int *leftflanks, *rightflanks, nleftflanks = 0, nrightflanks = 0;
  long int total;
  int n;
  IIT_T iit = NULL;
  Univ_IIT_T chromosome_iit = NULL;
  bool skipp, universalp;
  
  int opt;
  extern int optind;
  extern char *optarg;
  const char *long_name;
  int long_option_index = 0;

  while ((opt = getopt_long(argc,argv,"a:b:f:LCASUu:c:HRTZN",long_options,&long_option_index)) != -1) {
    switch (opt) {
    case 0:
      long_name = long_options[long_option_index].name;
      if (!strcmp(long_name,"version")) {
	print_program_version();
	exit(0);
      } else if (!strcmp(long_name,"help")) {
	print_program_usage();
	exit(0);
      } else if (!strcmp(long_name,"exact")) {
	exactp = true;
      } else {
	/* Shouldn't reach here */
	fprintf(stderr,"Don't recognize option %s.  For usage, run 'gsnap --help'",long_name);
	exit(9);
      }
      break;

    case 'a': lowval = atof(optarg); user_lowvalue_p = true; value_matches_p = true; break;
    case 'b': highval = atof(optarg); user_highvalue_p = true; value_matches_p = true; break;

    case 'f': fieldstring = optarg; break;
    case 'L': force_label_p = true; break;
    case 'C': force_coords_p = true; break;
    case 'A': annotationonlyp = true; break;
    case 'S': sortp = true; break;
    case 'U': signedp = false; break;
    case 'u': nflanking = atoi(optarg); break;

    case 'c': centerp = true; centerlength = atoi(optarg); break;
    case 'H': centeruc = true; break;
    case 'R': runlengthp = true; break;
    case 'T': tallyp = true; break;
    case 'Z': zeroesp = true; break;
    case 'N': statsp = true; break;

    case 'V': print_program_version(); exit(0);
    case '?': print_program_usage(); exit(0);
    default: exit(9);
    }
  }

  argc -= (optind - 1);
  argv += (optind - 1);

  if (argc <= 1) {
    fprintf(stderr,"Need to specify an iit file.  Type \"iit_get --help\" for help.\n");
    exit(9);
  } else {
    filename = argv[1];
  }

  if (value_matches_p == true) {
    if ((iit = IIT_read(filename,/*name*/NULL,true,/*divread*/READ_ALL,/*divstring*/NULL,
			/*add_iit_p*/true,/*labels_read_p*/true)) == NULL) {
      if (Access_file_exists_p(filename) == false) {
	fprintf(stderr,"Cannot read file %s\n",filename);
      } else {
	fprintf(stderr,"File %s appears to be an invalid IIT file\n",filename);
      }
      exit(9);

    } else if (fieldstring != NULL) {
      if ((fieldint = IIT_fieldint(iit,fieldstring)) < 0) {
	fprintf(stderr,"No field %s defined in iit file.\n",fieldstring);
	exit(9);
      }
    }

    if (IIT_valuep(iit) == false) {
      fprintf(stderr,"Error: This IIT file does not have values stored\n");
      exit(9);
    }

    if (user_lowvalue_p == true && user_highvalue_p == true) {
      if (lowval > highval) {
	fprintf(stderr,"Cannot have lowval %f > highval %f\n",lowval,highval);
	exit(9);
      } else {
	value_matches = IIT_get_values_between(&n_value_matches,iit,lowval,highval,/*sortp*/false);
      }
    } else if (user_lowvalue_p == true) {
      value_matches = IIT_get_values_above(&n_value_matches,iit,lowval,/*sortp*/false);
      
    } else { /* user_highvalue_p == true */
      value_matches = IIT_get_values_below(&n_value_matches,iit,highval,/*sortp*/false);
    }
  }

  if (0 && statsp == true && argc == 2) {
    /* Want total over entire IIT */
    if ((iit = IIT_read(filename,NULL,true,/*divread*/READ_ALL,/*divstring*/NULL,/*add_iit_p*/true,
			/*labels_read_p*/false)) == NULL) {
      if (Access_file_exists_p(filename) == false) {
	fprintf(stderr,"Cannot read file %s\n",filename);
      } else {
	fprintf(stderr,"File %s appears to be an invalid IIT file\n",filename);
      }
      exit(9);

    } else if (fieldstring != NULL) {
      if ((fieldint = IIT_fieldint(iit,fieldstring)) < 0) {
	fprintf(stderr,"No field %s defined in iit file.\n",fieldstring);
	exit(9);
      }
    } 

    total = 0;
    n = 0;
    for (i = 0; i < IIT_total_nintervals(iit); i++) {
      debug(printf("index = %d\n",matches[i]));
      compute_totals_tally(&total,&n,/*coordstart*/0,/*coordend*/-1U,i,iit);
    }
    printf("counts:%ld non-zero-positions:%u mean-over-nonzero:%.3f\n",total,n,(double) total/(double) n);
      
    IIT_free(&iit);
    return 0;

  } else if ((universalp = IIT_universalp(filename,/*add_iit_p*/true)) == true) {
    chromosome_iit = Univ_IIT_read(filename,/*readonlyp*/true,/*add_iit_p*/true);
    if (argc != 3) {
      fprintf(stderr,"For chromosome IIT file, need to specify a query on the command line\n");
      exit(9);
    } else {
      /* Try as 0:<iitfile> 1:<query> */
      matches = get_matches_univ(&nmatches,&divstring,&univ_coordstart,&univ_coordend,
				 &leftflanks,&nleftflanks,&rightflanks,&nrightflanks,
				 argv[2],/*typestring*/NULL,&chromosome_iit,filename);

      for (i = 0; i < nmatches; i++) {
	debug(printf("\nindex = %d\n",matches[i]));
	print_interval_univ(univ_coordstart,univ_coordend,matches[i],chromosome_iit);
      }
    }

    if (divstring != NULL) {
      FREE(divstring);
    }
    Univ_IIT_free(&chromosome_iit);
    return 0;

  } else if (argc == 2 && value_matches_p == true) {
    /* Note: Could potentially handle input from stdin, but currently just deal with value_matches */

    ndivs = IIT_ndivs(iit);
    for (i = 0; i < n_value_matches; i++) {
      debug(printf("\nindex = %d\n",matches[i]));
      print_interval(&lastcoord,/*total*/0,/*divstring*/NULL,/*coordstart*/0,/*coordend*/0,
		     value_matches[i],iit,ndivs,fieldint);
    }
    
    FREE(value_matches);
    IIT_free(&iit);
    return 0;

  } else if (argc == 2) {
    debug(printf("Running under argc 2\n"));
    /* Expecting input from stdin */
      
    if ((iit = IIT_read(filename,NULL,true,/*divread*/READ_ALL,/*divstring*/NULL,/*add_iit_p*/true,
			/*labels_read_p*/true)) == NULL) {
      if (Access_file_exists_p(filename) == false) {
	fprintf(stderr,"Cannot read file %s\n",filename);
      } else {
	fprintf(stderr,"File %s appears to be an invalid IIT file\n",filename);
      }
      exit(9);
    } else if (fieldstring != NULL) {
      if ((fieldint = IIT_fieldint(iit,fieldstring)) < 0) {
	fprintf(stderr,"No field %s defined in iit file.\n",fieldstring);
	exit(9);
      }
    }
	
    while (fgets(Buffer,BUFLEN,stdin) != NULL) {
      if ((ptr = rindex(Buffer,'\n')) != NULL) {
	*ptr = '\0';
      }
      strcpy(nocomment,Buffer);
      if ((ptr = rindex(nocomment,'#')) != NULL) {
	*ptr = '\0';
      }

      skipp = false;

      if ((nargs = sscanf(nocomment,"%s %s",query,typestring)) == 2) {
	debug(printf("typestring is %s\n",typestring));
	matches = get_matches(&nmatches,&divstring,&univ_coordstart,&univ_coordend,
			      &leftflanks,&nleftflanks,&rightflanks,&nrightflanks,
			      query,typestring,&iit,filename);
	coordstart = (Chrpos_T) univ_coordstart;
	coordend = (Chrpos_T) univ_coordend;

      } else if (nargs == 1) {
	debug(printf("typestring is NULL\n"));
	matches = get_matches(&nmatches,&divstring,&univ_coordstart,&univ_coordend,
			      &leftflanks,&nleftflanks,&rightflanks,&nrightflanks,
			      query,/*typestring*/NULL,&iit,filename);
	coordstart = (Chrpos_T) univ_coordstart;
	coordend = (Chrpos_T) univ_coordend;

      } else {
	fprintf(stderr,"Can't parse line %s.  Ignoring.\n",nocomment);
	skipp = true;
      }
	
      total = 0;
      if (skipp == false) {
	fprintf(stdout,"# Query: %s\n",Buffer);
	ndivs = IIT_ndivs(iit);
	if (nflanking > 0) {
	  for (i = nleftflanks-1; i >= 0; i--) {
	    debug(printf("\nleft index = %d\n",leftflanks[i]));
	    print_interval(&lastcoord,/*total*/0,divstring,coordstart,coordend,leftflanks[i],iit,ndivs,fieldint);
	  }
	  printf("====================\n");
	  FREE(leftflanks);
	}

	lastcoord = coordstart;
	for (i = 0; i < nmatches; i++) {
	  debug(printf("\nindex = %d\n",matches[i]));
	  total = print_interval(&lastcoord,total,divstring,coordstart,coordend,matches[i],iit,ndivs,fieldint);
	}

	if (nflanking > 0) {
	  printf("====================\n");
	  for (i = 0; i < nrightflanks; i++) {
	    debug(printf("\nright index = %d\n",rightflanks[i]));
	    print_interval(&lastcoord,/*total*/0,divstring,coordstart,coordend,rightflanks[i],iit,ndivs,fieldint);
	  }
	  FREE(rightflanks);
	}

	if (zeroesp == true) {
	  while (lastcoord <= coordend) {
	    printf("%s\t%u\t%d\n",divstring,lastcoord,0);
	    lastcoord++;
	  }
	}
      }

      if (divstring != NULL) {
	FREE(divstring);
      }
      FREE(matches);
      printf("%ld\n",total);
      fprintf(stdout,"# End\n");
      fflush(stdout);
    }

    IIT_free(&iit);
    return 0;

  } else {
    /* Get coordinates/type from command line */
    if (argc == 3) {
      /* Try as 0:<iitfile> 1:<query> */
      matches = get_matches(&nmatches,&divstring,&univ_coordstart,&univ_coordend,
			    &leftflanks,&nleftflanks,&rightflanks,&nrightflanks,
			    argv[2],/*typestring*/NULL,&iit,filename);
      coordstart = (Chrpos_T) univ_coordstart;
      coordend = (Chrpos_T) univ_coordend;
    
    } else if (argc == 4) {
      /* Try as 0:<iitfile> 1:<query> 2:<type> */
      debug(printf("Running under argc 4\n"));
      matches = get_matches(&nmatches,&divstring,&univ_coordstart,&univ_coordend,
			    &leftflanks,&nleftflanks,&rightflanks,&nrightflanks,
			    argv[2],argv[3],&iit,filename);
      coordstart = (Chrpos_T) univ_coordstart;
      coordend = (Chrpos_T) univ_coordend;

    } else {
      types = (int *) CALLOC(argc-3,sizeof(int));
      for (c = 3, ntypes = 0; c < argc; c++) {
	if ((typeint = IIT_typeint(iit,argv[c])) < 0) {
	  fprintf(stderr,"No such type as %s.  Ignoring the type.\n",argv[c]);
	} else {
	  types[ntypes++] = typeint;
	  lasttypestring = argv[c];
	}
      }
      if (ntypes == 0) {
	matches = get_matches(&nmatches,&divstring,&univ_coordstart,&univ_coordend,
			      &leftflanks,&nleftflanks,&rightflanks,&nrightflanks,
			      argv[2],/*typestring*/NULL,&iit,filename);
	coordstart = (Chrpos_T) univ_coordstart;
	coordend = (Chrpos_T) univ_coordend;

      } else if (ntypes == 1) {
	matches = get_matches(&nmatches,&divstring,&univ_coordstart,&univ_coordend,
			      &leftflanks,&nleftflanks,&rightflanks,&nrightflanks,
			      argv[2],lasttypestring,&iit,filename);
	coordstart = (Chrpos_T) univ_coordstart;
	coordend = (Chrpos_T) univ_coordend;

      } else {
	matches = get_matches_multiple_typed(&nmatches,&divstring,&univ_coordstart,&univ_coordend,
					     &leftflanks,&nleftflanks,&rightflanks,&nrightflanks,
					     argv[2],types,ntypes,&iit,filename);
	coordstart = (Chrpos_T) univ_coordstart;
	coordend = (Chrpos_T) univ_coordend;
      }
    }

    if (value_matches_p == true) {
      matches = match_intersection(&nmatches,/*matches1*/matches,/*nmatches1*/nmatches,
				   /*matches2*/value_matches,/*nmatches2*/n_value_matches);
      FREE(value_matches);
    }

#if 0
    if (centerp == true) {
      print_spaces(centerlength);
      printf("*");
      print_spaces(centerlength-1);
      printf("\n");
    }
#endif

    if (statsp == true) {
      total = 0;
      n = 0;
      for (i = 0; i < nmatches; i++) {
	debug(printf("index = %d\n",matches[i]));
	compute_totals_tally(&total,&n,coordstart,coordend,matches[i],iit);
      }
      n = coordend - coordstart + 1;
      printf("counts:%ld width:%u mean:%.3f\n",total,n,(double)total/(double) n);

#if 0
    } else if (geomeanp == true) {
      logtotal = 0.0;
      total = 0;
      n = 0;
      for (i = 0; i < nmatches; i++) {
	debug(printf("index = %d\n",matches[i]));
	logtotal = compute_logtotal_tally(&total,&n,coordstart,coordend,matches[i],iit);
      }
      printf("geomean:%f totalcounts:%ld posrange:%d\n",
	     exp(logtotal/(double) (coordend - coordstart + 1)) - 1.0,total,n);
#endif

    } else {
      ndivs = IIT_ndivs(iit);
      if (nflanking > 0) {
	for (i = nleftflanks-1; i >= 0; i--) {
	  debug(printf("\nleft index = %d\n",leftflanks[i]));
	  print_interval(&lastcoord,/*total*/0,divstring,coordstart,coordend,leftflanks[i],iit,ndivs,fieldint);
	}
	printf("====================\n");
	FREE(leftflanks);
      }

      lastcoord = coordstart;
      for (i = 0; i < nmatches; i++) {
	debug(printf("\nindex = %d\n",matches[i]));
	print_interval(&lastcoord,/*total*/0,divstring,coordstart,coordend,matches[i],iit,ndivs,fieldint);
      }
      
      if (nflanking > 0) {
	printf("====================\n");
	for (i = 0; i < nrightflanks; i++) {
	  debug(printf("\nright index = %d\n",rightflanks[i]));
	  print_interval(&lastcoord,/*total*/0,divstring,coordstart,coordend,rightflanks[i],iit,ndivs,fieldint);
	}
	FREE(rightflanks);
      }
    }

    if (divstring != NULL) {
      FREE(divstring);
    }
    FREE(matches);
    IIT_free(&iit);
    return 0;
  }
}
Пример #4
0
bool
Parserange_universal_iit (char **div, bool *revcomp,
			  Genomicpos_T *genomicstart, Genomicpos_T *genomiclength,
			  Genomicpos_T *chrstart, Genomicpos_T *chrend,
			  Genomicpos_T *chroffset, Genomicpos_T *chrlength,
			  char *query, IIT_T chromosome_iit, IIT_T contig_iit) {
  char *coords;
  Genomicpos_T result, left, length;
  Interval_T interval;
  int theindex;
  int rc;
  
  *revcomp = false;
  if (index(query,':')) {
    /* Segment must be a genome, chromosome, or contig */
    debug(printf("Parsed query %s into ",query));
    *div = strtok(query,":");
    if ((*div)[0] == '+') {
      *revcomp = false;
      *div = &((*div)[1]);
    } else if ((*div)[0] == '_') {
      *revcomp = true;
      *div = &((*div)[1]);
    }
    coords = strtok(NULL,":");
    debug(printf("segment %s and coords %s\n",*div,coords));


    debug(printf("Interpreting segment %s as a chromosome\n",*div));
    if (coords == NULL) {
      debug(printf("  entire chromosome\n"));
      rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left=0,length=0,chromosome_iit);
    } else if (isnumberp(&result,coords)) {
      debug(printf("  and coords %s as a number\n",coords));
      rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left=result-1,length=1,chromosome_iit);
    } else if (isrange(&left,&length,&(*revcomp),coords)) {
      debug(printf("  and coords %s as a range starting at %u with length %u and revcomp = %d\n",
		   coords,left,length,*revcomp));
      rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left,length,chromosome_iit);
    } else {
      debug(printf("  but coords %s is neither a number nor a range\n",coords));
      rc = -1;
    }

    /* Compute chromosomal coordinates */
    *chrstart = left;
    *chrend = *chrstart + *genomiclength;
    *chrstart += 1U;		/* Make 1-based */

    /* Get chromosomal information */
    if ((theindex = IIT_find_one(chromosome_iit,*div)) < 0) {
      fprintf(stderr,"Cannot find chromosome %s in chromosome IIT file\n",*div);
      /* exit(9); */
    } else {
      interval = IIT_interval(chromosome_iit,theindex);
      *chroffset = Interval_low(interval);
      *chrlength = Interval_length(interval);
    }

    if (rc != 0) {
      /* Try contig */
      debug(printf("Interpreting segment %s as a contig\n",*div));
      if (isnumberp(&result,coords)) {
	debug(printf("  and coords %s as a number\n",coords));
	rc = translate_contig(&(*genomicstart),&(*genomiclength),*div,left=result-1,length=1,contig_iit);
      } else if (isrange(&left,&length,&(*revcomp),coords)) {
	debug(printf("  and coords %s as a range starting at %u with length %u and revcomp = %d\n",
		     coords,left,length,*revcomp));
	rc = translate_contig(&(*genomicstart),&(*genomiclength),*div,left,length,contig_iit);
      } else {
	debug(printf("  but coords %s is neither a number nor a range\n",coords));
	rc = -1;
      }
    }

    if (rc != 0) {
      fprintf(stderr,"Can't find coordinates %s:%s\n",*div,coords);
      return false;
    } else {
      return true;
    }

  } else {
    /* Query must be a genomic position, genomic range, or contig */
    *chrstart = *chroffset = *chrlength = 0;

    debug(printf("Parsed query %s as atomic ",query));
    if (isnumberp(&result,query)) {
      debug(printf("number\n"));
      *genomicstart = result-1;
      *genomiclength = 1;

    } else if (isrange(&left,&length,&(*revcomp),query)) {
      debug(printf("range\n"));
      *genomicstart = left;
      *genomiclength = length;

    } else {

      debug(printf("contig\n"));
      return false;
#if 0
      rc = translate_contig(&(*genomicstart),&(*genomiclength),query,left=0,length=0,contig_iit);
      IIT_free(&contig_iit);
#endif
    }

    *div = convert_to_chrpos_iit(&(*chrstart),chromosome_iit,*genomicstart);
    *chrend = *chrstart + *genomiclength;
    *chrstart += 1U;		/* Make 1-based */

    /* Try chromosome first */
    if ((theindex = IIT_find_one(chromosome_iit,*div)) < 0) {
      fprintf(stderr,"Cannot find chromosome %s in chromosome IIT file\n",*div);
      return false;
    } else {
      interval = IIT_interval(chromosome_iit,theindex);
      *chroffset = Interval_low(interval);
      *chrlength = Interval_length(interval);
      return true;
    }
  }
}
Пример #5
0
bool
Parserange_universal (char **div, bool *revcomp,
		      Genomicpos_T *genomicstart, Genomicpos_T *genomiclength,
		      Genomicpos_T *chrstart, Genomicpos_T *chrend,
		      Genomicpos_T *chroffset, Genomicpos_T *chrlength,
		      char *query, char *genomesubdir, char *fileroot) {
  char *coords, *filename;
  Genomicpos_T result, left, length;
  IIT_T chromosome_iit, contig_iit;
  Interval_T interval;
  int theindex;
  int rc;
  
  *revcomp = false;
  if (index(query,':')) {
    /* Segment must be a genome, chromosome, or contig */
    debug(printf("Parsed query %s into ",query));
    *div = strtok(query,":");
    if ((*div)[0] == '+') {
      *revcomp = false;
      *div = &((*div)[1]);
    } else if ((*div)[0] == '_') {
      *revcomp = true;
      *div = &((*div)[1]);
    }
    coords = strtok(NULL,":");
    debug(printf("segment %s and coords %s\n",*div,coords));

    /* Try chromosome first */
    filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
			       strlen(".chromosome.iit")+1,sizeof(char));
    sprintf(filename,"%s/%s.chromosome.iit",genomesubdir,fileroot);
    chromosome_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
			      /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true);
    FREE(filename);

    debug(printf("Interpreting segment %s as a chromosome\n",*div));
    if (coords == NULL) {
      debug(printf("  entire chromosome\n"));
      rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left=0,length=0,chromosome_iit);
    } else if (isnumberp(&result,coords)) {
      debug(printf("  and coords %s as a number\n",coords));
      rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left=result-1,length=1,chromosome_iit);
    } else if (isrange(&left,&length,&(*revcomp),coords)) {
      debug(printf("  and coords %s as a range starting at %u with length %u and revcomp = %d\n",
		   coords,left,length,*revcomp));
      rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left,length,chromosome_iit);
    } else {
      debug(printf("  but coords %s is neither a number nor a range\n",coords));
      rc = -1;
    }

    /* Compute chromosomal coordinates */
    *chrstart = left;
    *chrend = *chrstart + *genomiclength;
    *chrstart += 1U;		/* Make 1-based */

    /* Get chromosomal information */
    if ((theindex = IIT_find_one(chromosome_iit,*div)) < 0) {
      fprintf(stderr,"Cannot find chromosome %s in chromosome IIT file\n",*div);
      /* exit(9); */
    } else {
      interval = IIT_interval(chromosome_iit,theindex);
      *chroffset = Interval_low(interval);
      *chrlength = Interval_length(interval);
    }

    IIT_free(&chromosome_iit);

#if 0
    /* Contig IIT's are of type 1, which require some work to compute
       on current div-based scheme.  Just abandoning for now. */
    if (rc != 0) {
      /* Try contig */
      filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
				 strlen(".contig.iit")+1,sizeof(char));
      sprintf(filename,"%s/%s.contig.iit",genomesubdir,fileroot);
      contig_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
			    /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true);
      FREE(filename);

      debug(printf("Interpreting segment %s as a contig\n",*div));
      if (coords == NULL) {
	debug(printf("  entire contig\n"));
	rc = translate_contig_universal(&(*genomicstart),&(*genomiclength),*div,left=0,length=0,chromosome_iit);
      } else if (isnumberp(&result,coords)) {
	debug(printf("  and coords %s as a number\n",coords));
	rc = translate_contig(&(*genomicstart),&(*genomiclength),*div,left=result-1,length=1,contig_iit);
      } else if (isrange(&left,&length,&(*revcomp),coords)) {
	debug(printf("  and coords %s as a range starting at %u with length %u and revcomp = %d\n",
		     coords,left,length,*revcomp));
	rc = translate_contig(&(*genomicstart),&(*genomiclength),*div,left,length,contig_iit);
      } else {
	debug(printf("  but coords %s is neither a number nor a range\n",coords));
	rc = -1;
      }

      IIT_free(&contig_iit);
    }
#endif

    if (rc != 0) {
      fprintf(stderr,"Can't find coordinates %s:%s\n",*div,coords);
      return false;
    } else {
      return true;
    }

  } else {
    /* Query must be a genomic position, genomic range, or contig */
    *chrstart = *chroffset = *chrlength = 0;

    debug(printf("Parsed query %s as atomic ",query));
    if (isnumberp(&result,query)) {
      debug(printf("number\n"));
      *genomicstart = result-1;
      *genomiclength = 1;

    } else if (isrange(&left,&length,&(*revcomp),query)) {
      debug(printf("range\n"));
      *genomicstart = left;
      *genomiclength = length;

    } else {

      debug(printf("contig\n"));
      return false;
#if 0
      filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
				 strlen(".contig.iit")+1,sizeof(char));
      sprintf(filename,"%s/%s.contig.iit",genomesubdir,fileroot);
      contig_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
			    /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true);
      FREE(filename);

      rc = translate_contig(&(*genomicstart),&(*genomiclength),query,left=0,length=0,contig_iit);
      IIT_free(&contig_iit);
#endif
    }

    *div = convert_to_chrpos(&(*chrstart),genomesubdir,fileroot,*genomicstart);
    *chrend = *chrstart + *genomiclength;
    *chrstart += 1U;		/* Make 1-based */

    /* Try chromosome first */
    filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
			       strlen(".chromosome.iit")+1,sizeof(char));
    sprintf(filename,"%s/%s.chromosome.iit",genomesubdir,fileroot);
    chromosome_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
			      /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true);
    FREE(filename);

    if ((theindex = IIT_find_one(chromosome_iit,*div)) < 0) {
      fprintf(stderr,"Cannot find chromosome %s in chromosome IIT file\n",*div);
      IIT_free(&chromosome_iit);
      return false;
    } else {
      interval = IIT_interval(chromosome_iit,theindex);
      *chroffset = Interval_low(interval);
      *chrlength = Interval_length(interval);
      IIT_free(&chromosome_iit);
      return true;
    }
  }
}
Пример #6
0
bool
Parserange_query (char **divstring, unsigned int *coordstart, unsigned int *coordend, bool *revcomp,
		  char *query, char *filename) {
  char *coords;
  unsigned int result, left, length;
  int div_strlen;
  IIT_T iit;
  
  *divstring = NULL;
  *revcomp = false;

  if ((coords = find_div(&div_strlen,query,':')) != NULL) {
    /* Query may have a div */
    *divstring = (char *) CALLOC(div_strlen+1,sizeof(char));
    strncpy(*divstring,query,div_strlen);

    debug(printf("Parsed query %s into divstring %s and coords %s\n",
		 query,*divstring,coords));

    if (IIT_read_divint(filename,*divstring,/*add_iit_p*/true) < 0) {
      fprintf(stderr,"Chromosome %s not found in IIT file\n",*divstring);
      debug(printf("  but divstring not found, so treat as label\n"));
      FREE(*divstring);		/* free only when returning false */
      return false;
    } else if (coords == NULL || *coords == '\0') {
      debug(printf("  entire div\n"));
      if ((iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ONE,*divstring,
			  /*add_iit_p*/true,/*labels_read_p*/false)) == NULL) {
	if (Access_file_exists_p(filename) == false) {
	  fprintf(stderr,"Cannot read file %s\n",filename);
	} else {
	  fprintf(stderr,"File %s appears to be an invalid IIT file\n",filename);
	}
	exit(9);
      } else {
	*coordstart= 0;
	*coordend = IIT_divlength(iit,*divstring);
	debug(printf("  divlength is %u\n",*coordend));
	IIT_free(&iit);
      }
      return true;
    } else if (isnumberp(&result,coords)) {
      debug(printf("  and coords %s as a number\n",coords));
      *coordstart = result;
      *coordend = result;
      return true;
    } else if (isrange(&left,&length,&(*revcomp),coords)) {
      debug(printf("  and coords %s as a range starting at %u with length %u and revcomp = %d\n",
		   coords,left,length,*revcomp));
      *coordstart = left + 1;	/* Because isrange is 0-based */
      *coordend = left + length;
      return true;
    } else {
      debug(printf("  but coords %s is neither a number nor a range.  Interpret as a label.\n",coords));
      FREE(*divstring);		/* free only when returning false */
      return false;
    }

  } else {
    /* No div.  Query must be a number, range, or label */

    debug(printf("Parsed query %s without a div ",query));
    if (isnumberp(&result,query)) {
      debug(printf("number\n"));
      *coordstart = result;
      *coordend = result;
      return true;
    } else if (isrange(&left,&length,&(*revcomp),query)) {
      debug(printf("range\n"));
      *coordstart = left + 1;	/* Because isrange is 0-based */
      *coordend = left + length;
      return true;
    } else {
      debug(printf("label\n"));
      return false;
    }
  }
}