示例#1
0
文件: sff_sort.c 项目: Brainiarc7/TS
void
sff_sort(sff_file_t *fp_in, sff_file_t *fp_out)
{
  int32_t i, row, col;
  sff_t *sff;
  int32_t requires_sort = 0;
  sff_sort_t *sffs = NULL;
  int32_t sffs_mem = 0, sffs_len = 0;

  // initialize memory
  sffs_mem = 1024;
  sffs = ion_malloc(sizeof(sff_sort_t) * sffs_mem, __func__, "sffs");

  // go through the input file
  while(NULL != (sff = sff_read(fp_in))) {
      // get the row/col co-ordinates
      if(0 == ion_readname_to_rowcol(sff->rheader->name->s, &row, &col)) {
          ion_error(__func__, "could not understand the read name", Exit, OutOfRange);
      }
      // copy over
      while(sffs_mem <= sffs_len) {
          sffs_mem <<= 1; // double
          sffs = ion_realloc(sffs, sizeof(sff_sort_t) * sffs_mem, __func__, "sffs");
      }
      sffs[sffs_len].row = row;
      sffs[sffs_len].col = col;
      sffs[sffs_len].sff = sff;
      sff = NULL;

      // check if we need to sort, for later
      if(0 < sffs_len && __sff_sort_lt(sffs[sffs_len], sffs[sffs_len-1])) {
          requires_sort = 1;
      }

      sffs_len++;
  }

  // resize
  sffs_mem = sffs_len; 
  sffs = ion_realloc(sffs, sizeof(sff_sort_t) * sffs_mem, __func__, "sffs");

  if(1 == requires_sort) {
      // sort
      ion_sort_introsort(sff_sort, sffs_len, sffs);
  }

  // write
  for(i=0;i<sffs_len;i++) {
      if(0 == sff_write(fp_out, sffs[i].sff)) {
          ion_error(__func__, "sff_write", Exit, WriteFileError);
      }
  }

  // destroy
  for(i=0;i<sffs_len;i++) {
      sff_destroy(sffs[i].sff);
  }
  free(sffs);
}
示例#2
0
文件: sff.c 项目: golharam/TS
sff_t *
sff_read1(FILE *fp, sff_header_t *header)
{
  sff_t *sff;

  sff = sff_init();

  sff->gheader = header;
  sff->rheader = sff_read_header_read(fp);
  if(NULL == sff->rheader) { // EOF
      sff_destroy(sff);
      return NULL;
  }
  sff->read = sff_read_read(fp, sff->gheader, sff->rheader);
  if(NULL == sff->read) { // EOF
      sff_destroy(sff);
      return NULL;
  }

  return sff;
}
示例#3
0
文件: sff.c 项目: golharam/TS
int
sff_view_main(int argc, char *argv[])
{
  int i, c;
  sff_file_t *sff_file_in=NULL, *sff_file_out=NULL;
  sff_iter_t *sff_iter = NULL;
  sff_t *sff = NULL;
  char *fn_names = NULL;
  char **names = NULL;
  int32_t names_num = 0, names_mem = 0;
  int32_t out_mode, min_row, max_row, min_col, max_col;

  out_mode = 0;
  min_row = max_row = min_col = max_col = -1;

  while((c = getopt(argc, argv, "r:c:R:bqh")) >= 0) {
      switch(c) {
        case 'r':
          if(ion_parse_range(optarg, &min_row, &max_row) < 0) {
              ion_error(__func__, "-r : format not recognized", Exit, OutOfRange);
          }
          break;
        case 'c':
          if(ion_parse_range(optarg, &min_col, &max_col) < 0) {
              ion_error(__func__, "-c : format not recognized", Exit, OutOfRange);
          }
          break;
        case 'R':
          free(fn_names);
          fn_names = strdup(optarg); break;
        case 'q':
          out_mode |= 1;
          break;
        case 'b':
          out_mode |= 2;
          break;
        case 'h': 
        default: 
          return usage();
      }
  }
  if(argc != 1+optind) {
      return usage();
  }
  else {
      sff_header_t *header = NULL;
      if(3 == out_mode) {
          ion_error(__func__, "options -b and -q cannot be used together", Exit, CommandLineArgument);
      }

      // open the input SFF
      if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) {
          sff_file_in = sff_fopen(argv[optind], "rbi", NULL, NULL);
      }
      else {
          sff_file_in = sff_fopen(argv[optind], "rb", NULL, NULL);
      }

      header = sff_header_clone(sff_file_in->header); /* copy header, but update n_reads if using index or names */

      // read in the names
      if(NULL != fn_names) {
          FILE *fp = NULL;
          char name[1024]="\0"; // lets hope we don't exceed this length
          names_num = names_mem = 0;
          names = NULL;
          if(!(fp = fopen(fn_names, "rb"))) {
              fprintf(stderr, "** Could not open %s for reading. **\n", fn_names);
              ion_error(__func__, fn_names, Exit, OpenFileError);
          }
          while(EOF != fscanf(fp, "%s", name)) {
              while(names_num == names_mem) {
                  if(0 == names_mem) names_mem = 4;
                  else names_mem *= 2;
                  names = ion_realloc(names, sizeof(char*) * names_mem, __func__, "names");
              }
              names[names_num] = strdup(name);
              if(NULL == names[names_num]) {
                  ion_error(__func__, name, Exit, MallocMemory);
              }
              names_num++;
          }
          names = ion_realloc(names, sizeof(char*) * names_num, __func__, "names");
          fclose(fp);
          header->n_reads = names_num;
      }
      else {
	// if using index, then iterate once through the index to count the entries
	// so we can set the count correctly in the header
	if (-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) {
	  int entries = 0;
          sff_iter = sff_iter_query(sff_file_in, min_row, max_row, min_col, max_col);
	  while (NULL != (sff = sff_iter_read(sff_file_in, sff_iter)))
	    entries++;
	  header->n_reads = entries;
	  /* reset sff_iter */
	  sff_iter_destroy(sff_iter);
	  sff_iter = sff_iter_query(sff_file_in, min_row, max_row, min_col, max_col);
	}
      }

      // print the header
      switch(out_mode) {
        case 0:
          sff_header_print(stdout, header);
          break;
        case 1:
          // do nothing: FASTQ
          break;
        case 2:
          sff_file_out = sff_fdopen(fileno(stdout), "wb", header, NULL);
          break;
      }


      while(1) {
          int32_t to_print = 1;
          if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) {
              if(NULL == (sff = sff_iter_read(sff_file_in, sff_iter))) {
                  break;
              }
          }
          else {
              if(NULL == (sff = sff_read(sff_file_in))) {
                  break;
              }
          }
          if(0 < names_mem) {
              to_print = 0;
              for(i=0;i<names_num;i++) {
                  if(0 == strcmp(names[i], sff_name(sff))) {
                      to_print = 1;
                      break;
                  }
              }
              // shift down
              if(1 == to_print) { // i < names_num
                  free(names[i]);
                  names[i] = NULL;
                  for(;i<names_num-1;i++) {
                      names[i] = names[i+1];
                      names[i+1] = NULL;
                  }
                  names_num--;
              }
          }
          if(1 == to_print) {
              switch(out_mode) {
                case 0:
                  sff_print(stdout, sff);
                  break;
                case 1:
                  if(fprintf(stdout, "@%s\n%s\n+\n",
                             sff->rheader->name->s,
                             sff->read->bases->s + sff->gheader->key_length) < 0) {
                      ion_error(__func__, "stdout", Exit, WriteFileError);
                  }
                  for(i=sff->gheader->key_length;i<sff->read->quality->l;i++) {
                      if(fputc(QUAL2CHAR(sff->read->quality->s[i]), stdout) < 0) {
                          ion_error(__func__, "stdout", Exit, WriteFileError);
                      }
                  }
                  if(fputc('\n', stdout) < 0) {
                      ion_error(__func__, "stdout", Exit, WriteFileError);
                  }
                  break;
                case 2:
                  sff_write(sff_file_out, sff);
                  break;
              }
          }
          sff_destroy(sff);
      }

      sff_fclose(sff_file_in);
      if(2 == out_mode) {
          sff_fclose(sff_file_out);
      }
      if(-1 != min_row || -1 != max_row || -1 != min_col || -1 != max_col) {
          sff_iter_destroy(sff_iter);
      }

      if(0 != names_num) {
          fprintf(stderr, "** Did not find all the reads with (-R). **\n");
          ion_error(__func__, fn_names, Exit, OutOfRange);
      }

      sff_header_destroy(header);

  }
  if(NULL != names && 0 < names_num) {
      free(names);
  }
  free(fn_names);
  return 0;
}
示例#4
0
文件: sff_index.c 项目: Brainiarc7/TS
// TODO: should we change the header:
// - must trake index_length
// - assumes row-major order
sff_index_t*
sff_index_create(sff_file_t *fp_in, sff_header_t *fp_out_header, int32_t num_rows, int32_t num_cols, int32_t type)
{
  int64_t len = 0;
  int32_t i, prev_row, prev_col, row, col;
  sff_index_t *idx;
  sff_t *sff;
  uint64_t fp_in_start, prev_pos;

  idx = sff_index_init();

  idx->num_rows = num_rows;
  idx->num_cols = num_cols;
  idx->type = type;

  // alloc
  switch(type) {
    case SFF_INDEX_ROW_ONLY:
      len = 1 + idx->num_rows;
      idx->offset = ion_malloc(len * sizeof(uint64_t), __func__, "idx->offset");
      break;
    case SFF_INDEX_ALL:
      len = 1 + (idx->num_rows * idx->num_cols);
      idx->offset = ion_malloc(len * sizeof(uint64_t), __func__, "idx->offset");
      break;
    default:
      ion_error(__func__, "this index type is currently not supported", Exit, OutOfRange);
  }

  // save where the sff entries started
  prev_pos = fp_in_start = ftell(fp_in->fp);
  if(-1L == fp_in_start) {
      ion_error(__func__, "ftell", Exit, ReadFileError);
  }

  // go through the input file
  i = 0;
  prev_row = prev_col = 0;
  while(NULL != (sff = sff_read(fp_in))) {
      // out of range
      if(len-1 <= i) {
          ion_error(__func__, "bug encountered", Exit, OutOfRange);
      }

      // get the row/col co-ordinates
      if(0 == ion_readname_to_rowcol(sff->rheader->name->s, &row, &col)) {
          ion_error(__func__, "could not understand the read name", Exit, OutOfRange);
      }

      // assumes row-major order, skips over reads that are not present
      if(row < prev_row || (row == prev_row && col < prev_col)) {
          ion_error(__func__, "SFF file was not sorted in row-major order", Exit, OutOfRange);
      }
      while(row != prev_row || col != prev_col) {
          // add in empty entry
          switch(type) {
            case SFF_INDEX_ROW_ONLY:
              if(0 == prev_col) { // first column
                  idx->offset[i] = UINT64_MAX;
                  // do not increment i, since we only do this when moving to a new row
              }
              break;
            case SFF_INDEX_ALL:
              // all rows and columns
              idx->offset[i] = UINT64_MAX;
              i++;
              break;
            default:
              ion_error(__func__, "this index type is currently not supported", Exit, OutOfRange);
          }
          if(len-1 <= i) {
              ion_error(__func__, "x/y was out of range", Exit, OutOfRange);
          }

          prev_col++;
          if(prev_col == idx->num_cols) {
              // new row
              prev_col = 0;
              prev_row++;
              if(SFF_INDEX_ROW_ONLY == type) {
                  i++;
              }
          }
      }

      // add to the index
      switch(type) {
        case SFF_INDEX_ROW_ONLY:
          if(0 == col) { // first column
              idx->offset[i] = prev_pos;
          }
          else if(0 < col && UINT64_MAX == idx->offset[i]) {
              idx->offset[i] = prev_pos;
              // do not move onto the next
          }
          break;
        case SFF_INDEX_ALL:
          // all rows and columns
          idx->offset[i] = prev_pos;
          i++;
          break;
        default:
          ion_error(__func__, "this index type is currently not supported", Exit, OutOfRange);
      }
      prev_row = row;
      prev_col = col;

      // destroy
      sff_destroy(sff);

      // next
      prev_col++;
      if(prev_col == idx->num_cols) {
          // new row
          prev_col = 0;
          prev_row++;
          if(SFF_INDEX_ROW_ONLY == type) {
              i++;
          }
      }

      prev_pos = ftell(fp_in->fp);
      if(-1L == prev_pos) {
          ion_error(__func__, "ftell", Exit, ReadFileError);
      }
  }
  // get the last offset
  idx->offset[len-1] = prev_pos;

  // update the index offset in the header
  fp_out_header->index_offset = fp_in_start; // insert between the header and sff entries
  // update the index length in the header
  fp_out_header->index_length = sff_index_length(idx);
  // update the offsets based on the index length
  for(i=0;i<len;i++) {
      if(UINT64_MAX != idx->offset[i]) {
          idx->offset[i] += fp_out_header->index_length;
      }
  }

  return idx;
}
示例#5
0
文件: sff_index.c 项目: Brainiarc7/TS
int
sff_index_create_main(int argc, char *argv[])
{
  int c;
  sff_file_t *fp_in, *fp_out;
  int32_t num_rows, num_cols, type;
  sff_header_t *fp_out_header;
  sff_index_t* index;
  sff_t *sff;

  num_rows = num_cols = -1;
  type = SFF_INDEX_ALL;

  while((c = getopt(argc, argv, "r:c:C:Rh")) >= 0) {
      switch(c) {
        case 'r':
          num_rows = atoi(optarg);
          break;
        case 'c':
          num_cols = atoi(optarg);
          break;
        case 'C':
          switch(atoi(optarg)) {
            case 0:
              num_rows = 1152;
              num_cols = 1280;
              break;
            case 1:
              num_rows = 2640;
              num_cols = 2736;
              break;
            case 2:
              num_rows = 3792;
              num_cols = 3392;
              break;
            default:
              break;
          }
        case 'R':
          type = SFF_INDEX_ROW_ONLY;
          break;
        case 'h':
        default:
          return usage();
      }
  }

  if(argc != 1+optind) {
      return usage();
  }
  else {
      // check cmd line args
      if(num_rows < 0) {
          ion_error(__func__, "-r must be specified and greater than zero", Exit, CommandLineArgument);
      }
      if(num_cols < 0) {
          ion_error(__func__, "-c must be specified and greater than zero", Exit, CommandLineArgument);
      }
      switch(type) {
        case SFF_INDEX_ROW_ONLY:
        case SFF_INDEX_ALL:
          break;
        default:
          ion_error(__func__, "bug encountered", Exit, OutOfRange);
          break;
      }

      fp_in = sff_fopen(argv[optind], "rb", NULL, NULL);
      fp_out_header = sff_header_clone(fp_in->header);
      index = sff_index_create(fp_in, fp_out_header, num_rows, num_cols, type);

      fp_out = sff_fdopen(fileno(stdout), "wbi", fp_out_header, index);

      // seek the input file to the beginning of the the entries, which is the same
      // location as where the index begins in the output file.
      if(0 != fseek(fp_in->fp, fp_out_header->index_offset, SEEK_SET)) {
	ion_error(__func__, "fseek", Exit, ReadFileError);
      }

      // write the sff entries
      while(NULL != (sff = sff_read(fp_in))) {
	sff_write(fp_out, sff);
	sff_destroy(sff);
      }

      // destroy the header.  Don't destroy index, sff_fclose does that
      sff_header_destroy(fp_out_header);
      //      sff_index_destroy(index);

      sff_fclose(fp_in);
      sff_fclose(fp_out);
  }

  return 0;
}