Esempio n. 1
0
void sort_block_q(uint64_t* buffer, int count) {
    if ( count <= 1 ) return;

    if ( count <= 32 ) {
        sort_block(buffer,count);
        return;
    };

    uint64_t z[2] = {buffer[0], buffer[1]};

    int nel = -1;
    bool alleq = true;
    for ( int i = 1; i < count; i++ ) {
        uint64_t a[2] = {buffer[i*2], buffer[i*2+1]};
        if ( !compareeq (a,z) ) {
            alleq = false;
            nel = i;
            break;
        }
    }
    if ( alleq ) return;

    if ( nel >= 0 ) {
        swap(buffer,0,nel);
    }
    z[0] = buffer[0];
    z[1] = buffer[1];

    int lc = 1;
    for ( int i = 1; i < count; i++ ) {
        uint64_t a[2] = {buffer[i*2], buffer[i*2+1]};
        if ( compare (a,z) ) {
            swap(buffer, i,lc);
            lc++;
        }
    }
    swap(buffer,0,lc-1);

    sort_block_q(buffer, lc);
    sort_block_q(buffer+(lc*2), count-lc);

    if ( count < SORT_BLOCK_SIZE ) return;

    if ( !check_sorted(buffer, count) ) {
        printf( "Block sort has errors!\n" );
    }
}
Esempio n. 2
0
int ctx_sort(int argc, char **argv)
{
  const char *out_path = NULL;
  struct MemArgs memargs = MEM_ARGS_INIT;

  // Arg parsing
  char cmd[100];
  char shortopts[300];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int c;

  // silence error messages from getopt_long
  // opterr = 0;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 'm': cmd_mem_args_set_memory(&memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break;
      case 'o': cmd_check(!out_path, cmd); out_path = optarg; break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        // cmd_print_usage(NULL);
        die("`"CMD" sort -h` for help. Bad option: %s", argv[optind-1]);
      default: die("Bad option: [%c]: %s", c, cmd);
    }
  }

  if(optind+1 != argc)
    cmd_print_usage("Require exactly one input graph file (.ctx)");

  const char *ctx_path = argv[optind];

  //
  // Open Graph file
  //
  GraphFileReader gfile;
  memset(&gfile, 0, sizeof(GraphFileReader));
  graph_file_open2(&gfile, ctx_path, out_path ? "r" : "r+", true, 0);

  if(!file_filter_is_direct(&gfile.fltr))
    die("Cannot open graph file with a filter ('in.ctx:blah' syntax)");

  size_t num_kmers, memory;

  // Reading from a stream
  if(gfile.num_of_kmers < 0) {
    if(!memargs.num_kmers_set)
      die("If reading from a stream, must give -n <num_kmers>");
    num_kmers = memargs.num_kmers;
  }
  else num_kmers = gfile.num_of_kmers;

  // Open output path (if given)
  FILE *fout = out_path ? futil_fopen_create(out_path, "w") : NULL;

  size_t i;
  size_t ncols = gfile.hdr.num_of_cols;
  size_t kmer_mem = sizeof(BinaryKmer) + (sizeof(Edges)+sizeof(Covg))*ncols;

  memory = (sizeof(char*) + kmer_mem) * num_kmers;

  char mem_str[50];
  bytes_to_str(memory, 1, mem_str);

  if(memory > memargs.mem_to_use)
    die("Require at least %s memory", mem_str);

  status("[memory] Total: %s", mem_str);

  char *mem = ctx_malloc(kmer_mem * num_kmers);
  char **kmers = ctx_malloc(num_kmers*sizeof(char*));

  // Read in whole file
  // if(graph_file_fseek(gfile, gfile.hdr_size, SEEK_SET) != 0) die("fseek failed");
  size_t nkread = gfr_fread_bytes(&gfile, mem, num_kmers*kmer_mem);

  if(nkread != num_kmers*kmer_mem)
    die("Could only read %zu bytes [<%zu]", nkread, num_kmers*kmer_mem);

  // check we are at the end of the file
  char tmpc;
  if(gfr_fread_bytes(&gfile, &tmpc, 1) != 0) {
    die("More kmers in file than believed (kmers: %zu ncols: %zu).",
        num_kmers, ncols);
  }

  status("Read %zu kmers with %zu colour%s", num_kmers,
         ncols, util_plural_str(ncols));

  for(i = 0; i < num_kmers; i++)
    kmers[i] = mem + kmer_mem*i;

  sort_block(kmers, num_kmers);

  // Print
  if(out_path != NULL) {
    // saving to a different destination - write header
    graph_write_header(fout, &gfile.hdr);
  }
  else {
    // Directly manipulating gfile.fh here, using it to write later
    // Not doing any more reading
    if(fseek(gfile.fh, gfile.hdr_size, SEEK_SET) != 0) die("fseek failed");
    fout = gfile.fh;
  }

  for(i = 0; i < num_kmers; i++)
    if(fwrite(kmers[i], 1, kmer_mem, fout) != kmer_mem)
      die("Cannot write to file");

  if(out_path) fclose(fout);

  graph_file_close(&gfile);
  ctx_free(kmers);
  ctx_free(mem);

  return EXIT_SUCCESS;
}