Пример #1
0
void graph_crawler_alloc(GraphCrawler *crawler, const dBGraph *db_graph)
{
  ctx_assert(db_graph->node_in_cols != NULL);

  size_t ncols = db_graph->num_of_cols;

  int *col_paths = ctx_calloc(ncols, sizeof(int));
  GCMultiColPath *multicol_paths = ctx_calloc(ncols, sizeof(GCMultiColPath));
  GCUniColPath *unicol_paths = ctx_calloc(ncols, sizeof(GCUniColPath));
  uint32_t *col_list = ctx_calloc(ncols, sizeof(uint32_t));

  GraphCrawler tmp = {.num_paths = 0,
                      .col_paths = col_paths,
                      .multicol_paths = multicol_paths,
                      .unicol_paths = unicol_paths,
                      .col_list = col_list};

  memcpy(crawler, &tmp, sizeof(GraphCrawler));

  graph_cache_alloc(&crawler->cache, db_graph);
  graph_walker_alloc(&crawler->wlk, db_graph);
  rpt_walker_alloc(&crawler->rptwlk, db_graph->ht.capacity, 22); // 4MB
}

void graph_crawler_dealloc(GraphCrawler *crawler)
{
  ctx_free(crawler->col_paths);
  ctx_free(crawler->multicol_paths);
  ctx_free(crawler->unicol_paths);
  ctx_free(crawler->col_list);
  graph_cache_dealloc(&crawler->cache);
  graph_walker_dealloc(&crawler->wlk);
  rpt_walker_dealloc(&crawler->rptwlk);
  memset(crawler, 0, sizeof(GraphCrawler)); // reset
}
Пример #2
0
void gpath_store_dealloc(GPathStore *gpstore)
{
  gpath_set_dealloc(&gpstore->gpset);
  gpath_store_merge_read_write(gpstore);
  ctx_free(gpstore->paths_all);
  if(gpstore->paths_traverse != gpstore->paths_all) ctx_free(gpstore->paths_traverse);
  memset(gpstore, 0, sizeof(*gpstore));
}
Пример #3
0
void call_decomp_destroy(CallDecomp *dc)
{
  alignment_free(dc->aln);
  needleman_wunsch_free(dc->nw_aligner);
  ctx_free(dc->scoring);
  bcf_destroy(dc->v);
  strbuf_dealloc(&dc->sbuf);
  ctx_free(dc);
}
Пример #4
0
/**
 * Calculate cleaning threshold for supernodes from a given distribution
 * of supernode coverages
 * @param covgs histogram of supernode coverages
 */
size_t cleaning_pick_supernode_threshold(const uint64_t *covgs, size_t len,
                                         double seq_depth,
                                         const dBGraph *db_graph)
{
  ctx_assert(len > 5);
  ctx_assert(db_graph->ht.num_kmers > 0);

  size_t i, d1len = len-2, d2len = len-3, f1, f2;
  double *tmp = ctx_malloc((d1len+d2len) * sizeof(double));
  double *delta1 = tmp, *delta2 = tmp + d1len;

  // Get sequencing depth from coverage
  uint64_t covg_sum = 0, capacity = db_graph->ht.capacity * db_graph->num_of_cols;
  for(i = 0; i < capacity; i++) covg_sum += db_graph->col_covgs[i];
  double seq_depth_est = (double)covg_sum / db_graph->ht.num_kmers;

  status("[cleaning] Kmer depth before cleaning supernodes: %.2f", seq_depth_est);
  if(seq_depth <= 0) seq_depth = seq_depth_est;
  else status("[cleaning] Using sequence depth argument: %f", seq_depth);

  size_t fallback_thresh = (size_t)MAX2(1, (seq_depth+1)/2);

  // +1 to ensure covgs is never 0
  for(i = 0; i < d1len; i++) delta1[i] = (double)(covgs[i+1]+1) / (covgs[i+2]+1);

  d1len = i;
  d2len = d1len - 1;

  if(d1len <= 2) {
    status("[cleaning]  (using fallback1)\n");
    ctx_free(tmp);
    return fallback_thresh;
  }

  // d2len is d1len-1
  for(i = 0; i < d2len; i++) delta2[i] = delta1[i] / delta1[i+1];

  for(f1 = 0; f1 < d1len && delta1[f1] >= 1; f1++);
  for(f2 = 0; f2 < d2len && delta2[f2] > 1; f2++);

  ctx_free(tmp);

  if(f1 < d1len && f1 < (seq_depth*0.75))
  { status("[cleaning]   (using f1)"); return f1+1; }
  else if(f2 < d2len)
  { status("[cleaning]   (using f2)"); return f2+1; }
  else
  { status("[cleaning]   (using fallback1)"); return fallback_thresh+1; }
}
Пример #5
0
size_t infer_edges(size_t nthreads, bool add_all_edges, const dBGraph *db_graph)
{
  ctx_assert(db_graph->node_in_cols != NULL);
  ctx_assert(db_graph->col_edges != NULL);

  size_t i, num_nodes_modified = 0;
  status("[inferedges] Processing stream");

  InferEdgesWorker *wrkrs = ctx_calloc(nthreads, sizeof(InferEdgesWorker));

  for(i = 0; i < nthreads; i++) {
    InferEdgesWorker tmp = {.threadid = i, .nthreads = nthreads,
                            .add_all_edges = add_all_edges,
                            .db_graph = db_graph,
                            .num_nodes_modified = 0};
    memcpy(&wrkrs[i], &tmp, sizeof(InferEdgesWorker));
  }

  util_run_threads(wrkrs, nthreads, sizeof(InferEdgesWorker),
                   nthreads, infer_edges_worker);

  // Sum up nodes modified
  for(i = 0; i < nthreads; i++)
    num_nodes_modified += wrkrs[i].num_nodes_modified;

  ctx_free(wrkrs);

  return num_nodes_modified;
}
Пример #6
0
/* Execute one instruction from each running context. */
void ke_run(void)
{
	struct ctx_t *ctx, *ctx_trav; 
	int k = 0;

	/* Run an instruction from every running process */
	for (k=0, ctx = ke->suspended_list_head; ctx; ctx = ctx->suspended_next, k++);
	//printf ("Instruction number: %lld, suspended processes:%d, queue_size: %d\n", instr_num, k, sched_count);
	for (ctx = ke->running_list_head; ctx; ctx = ctx->running_next) {
		int i;
		for ( i = 0 ; i < ctx->instr_slice && ctx_get_status(ctx, ctx_running); ++i) {
			while (interrupts_exist() && instr_num >= next_interrupt_num())
				handle_interrupt (pop_interrupt());
			
			ctx_execute_inst(ctx);
			instr_num++;
		}
	}
	
	/* Free finished contexts */
	while (ke->finished_list_head)
		ctx_free(ke->finished_list_head);
	
	/* Process list of suspended contexts */
	//ke_process_events();
	while (!(ke->running_list_head) && interrupts_exist()) {
		//printf ("Instruction number updated from %lld to %lld\n", instr_num, next_interrupt_num());
		instr_num = next_interrupt_num();
		handle_interrupt(pop_interrupt());
	}
}
Пример #7
0
// Merge temporary files, closes tmp files
void futil_merge_tmp_files(FILE **tmp_files, size_t num_files, FILE *fout)
{
  #define TMP_BUF_SIZE (32 * ONE_MEGABYTE)

  char *data = ctx_malloc(TMP_BUF_SIZE);
  size_t i, len;
  FILE *fh;

  for(i = 0; i < num_files; i++)
  {
    fh = tmp_files[i];
    if(fseek(fh, 0L, SEEK_SET) != 0) die("fseek error");

    while((len = fread(data, 1, TMP_BUF_SIZE, fh)) > 0)
      if(fwrite(data, 1, len, fout) != len)
        die("write error [%s]", strerror(errno));

    if(ferror(fh)) warn("fread error: %s", strerror(errno));
    fclose(fh);
  }

  ctx_free(data);

  #undef TMP_BUF_SIZE
}
Пример #8
0
// Remember to free the result
void futil_get_strbuf_of_dir_path(const char *path, StrBuf *dir)
{
  char *tmp = strdup(path);
  strbuf_set(dir, dirname(tmp));
  strbuf_append_char(dir, '/');
  ctx_free(tmp);
}
Пример #9
0
/* try to get the module's context, returns a PAM status code */
static int ctx_get(pam_handle_t *pamh,const char *username,struct pld_ctx **pctx)
{
  struct pld_ctx *ctx=NULL;
  int rc;
  /* try to get the context from PAM */
  rc=pam_get_data(pamh,PLD_CTX,(const void **)&ctx);
  if ((rc==PAM_SUCCESS)&&(ctx!=NULL))
  {
    /* if the user is different clear the context */
    if ((ctx->user!=NULL)&&(strcmp(ctx->user,username)!=0))
      ctx_clear(ctx);
  }
  else
  {
    /* allocate a new context */
    ctx=calloc(1,sizeof(struct pld_ctx));
    if (ctx==NULL)
    {
      pam_syslog(pamh,LOG_CRIT,"calloc(): failed to allocate memory: %s",strerror(errno));
      return PAM_BUF_ERR;
    }
    ctx_clear(ctx);
    /* store the new context with the handler to free it */
    rc=pam_set_data(pamh,PLD_CTX,ctx,ctx_free);
    if (rc!=PAM_SUCCESS)
    {
      ctx_free(pamh,ctx,0);
      pam_syslog(pamh,LOG_ERR,"failed to store context: %s",pam_strerror(pamh,rc));
      return rc;
    }
  }
  /* return the context */
  *pctx=ctx;
  return PAM_SUCCESS;
}
Пример #10
0
static void run_exp_abc(const dBGraph *db_graph, bool prime_AB,
                        size_t nthreads, size_t num_repeats,
                        size_t max_AB_dist, bool print_failed_contigs)
{
  ExpABCWorker *wrkrs = ctx_calloc(nthreads, sizeof(ExpABCWorker));
  size_t i, j;

  if(max_AB_dist == 0) max_AB_dist = SIZE_MAX;

  for(i = 0; i < nthreads; i++) {
    wrkrs[i].colour = 0;
    wrkrs[i].nthreads = nthreads;
    wrkrs[i].db_graph = db_graph;
    wrkrs[i].prime_AB = prime_AB;
    wrkrs[i].num_limit = num_repeats / nthreads;
    wrkrs[i].max_AB_dist = max_AB_dist;
    wrkrs[i].print_failed_contigs = print_failed_contigs;
    db_node_buf_alloc(&wrkrs[i].nbuf, 1024);
    graph_walker_alloc(&wrkrs[i].gwlk, db_graph);
    rpt_walker_alloc(&wrkrs[i].rptwlk, db_graph->ht.capacity, 22); // 4MB
  }

  util_run_threads(wrkrs, nthreads, sizeof(ExpABCWorker),
                   nthreads, run_exp_abc_thread);

  // Merge results
  size_t num_tests = 0, results[NUM_RESULT_VALUES] = {0};
  size_t ab_fail_state[GRPHWLK_NUM_STATES] = {0};
  size_t bc_fail_state[GRPHWLK_NUM_STATES] = {0};

  for(i = 0; i < nthreads; i++) {
    num_tests += wrkrs[i].num_tests;
    for(j = 0; j < NUM_RESULT_VALUES; j++) results[j] += wrkrs[i].results[j];
    for(j = 0; j < GRPHWLK_NUM_STATES; j++) ab_fail_state[j] += wrkrs[i].ab_fail_state[j];
    for(j = 0; j < GRPHWLK_NUM_STATES; j++) bc_fail_state[j] += wrkrs[i].bc_fail_state[j];
    db_node_buf_dealloc(&wrkrs[i].nbuf);
    graph_walker_dealloc(&wrkrs[i].gwlk);
    rpt_walker_dealloc(&wrkrs[i].rptwlk);
  }

  // Print results
  char nrunstr[50];
  ulong_to_str(num_tests, nrunstr);
  status("Ran %s tests with %zu threads", nrunstr, nthreads);

  const char *titles[] = {"RES_ABC_SUCCESS", "RES_AB_WRONG",
                          "RES_AB_FAILED",   "RES_BC_WRONG",
                          "RES_BC_FAILED",   "RES_BC_OVERSHOT",
                          "RES_LOST_IN_RPT", "RES_NO_TRAVERSAL"};

  util_print_nums(titles, results, NUM_RESULT_VALUES, 30);

  status("AB_FAILED:");
  graph_step_print_state_hist(ab_fail_state);
  status("BC_FAILED:");
  graph_step_print_state_hist(bc_fail_state);

  ctx_free(wrkrs);
}
Пример #11
0
rc_t CC KMain ( int argc, char *argv [] )
{
    Args * args;

    rc_t rc = ArgsMakeAndHandle ( &args, argc, argv, 2,
                                  MyOptions, sizeof MyOptions / sizeof ( OptDef ),
                                  XMLLogger_Args, XMLLogger_ArgsQty  );

    KLogHandlerSetStdErr();
    if ( rc != 0 )
    {
        LOGERR( klogErr, rc, "error creating internal structure" );
    }
    else
    {
        ld_context lctx;

        lctx_init( &lctx );
        rc = KDirectoryNativeDir ( &lctx.wd );
        if ( rc != 0 )
        {
            LOGERR( klogErr, rc, "error creating internal structure" );
        }
        else
        {
            rc = XMLLogger_Make( &lctx.xml_logger, lctx.wd, args );
            if ( rc != 0 )
            {
                LOGERR( klogErr, rc, "error creating internal structure" );
            }
            else
            {
                context ctx;
                rc = ctx_init( args, &ctx );
                if ( rc == 0 )
                {
                    rc = pacbio_check_sourcefile( &ctx, &lctx );
                    if ( rc == 0 )
                    {
                        lctx.with_progress = ctx.with_progress;
                        ctx_show( &ctx );
                        lctx.dst_path = ctx.dst_path;

                        rc = pacbio_load( &ctx, &lctx, false, false );
                        if ( rc == 0 )
                        {
                            rc = pacbio_meta_entry( &lctx, argv[ 0 ] );
                        }
                    }
                    ctx_free( &ctx );
                }
            }
        }
        lctx_free( &lctx );
        ArgsWhack ( args );
    }

    return rc;
}
Пример #12
0
void hash_table_alloc(HashTable *ht, uint64_t req_capacity)
{
  uint64_t num_of_buckets, capacity;
  uint8_t bucket_size;

  capacity = hash_table_cap(req_capacity, &num_of_buckets, &bucket_size);
  uint_fast32_t hash_mask = (uint_fast32_t)(num_of_buckets - 1);

  size_t mem = capacity * sizeof(BinaryKmer) +
               num_of_buckets * sizeof(uint8_t[2]);

  char num_bkts_str[100], bkt_size_str[100], cap_str[100], mem_str[100];
  ulong_to_str(num_of_buckets, num_bkts_str);
  ulong_to_str(bucket_size, bkt_size_str);
  ulong_to_str(capacity, cap_str);
  bytes_to_str(mem, 1, mem_str);
  status("[hasht] Allocating table with %s entries, using %s", cap_str, mem_str);
  status("[hasht]  number of buckets: %s, bucket size: %s", num_bkts_str, bkt_size_str);

  // calloc is required for bucket_data to set the first element of each bucket
  // to the 0th pos
  BinaryKmer *table = ctx_malloc(capacity * sizeof(BinaryKmer));
  uint8_t (*const buckets)[2] = ctx_calloc(num_of_buckets, sizeof(uint8_t[2]));

  size_t i;
  for(i = 0; i < capacity; i++) table[i] = unset_bkmer;

  HashTable data = {
    .table = table,
    .num_of_buckets = num_of_buckets,
    .hash_mask = hash_mask,
    .bucket_size = bucket_size,
    .capacity = capacity,
    .buckets = buckets,
    .num_kmers = 0,
    .collisions = {0},
    .seed = rand()};

  memcpy(ht, &data, sizeof(data));
}

void hash_table_dealloc(HashTable *hash_table)
{
  ctx_free(hash_table->table);
  ctx_free(hash_table->buckets);
}
Пример #13
0
static void supernode_cleaner_alloc(SupernodeCleaner *cl, size_t nthreads,
                                    size_t covg_threshold, size_t min_keep_tip,
                                    uint8_t *keep_flags,
                                    const dBGraph *db_graph)
{
  size_t i;
  CovgBuffer *cbufs = ctx_calloc(nthreads, sizeof(CovgBuffer));
  for(i = 0; i < nthreads; i++)
    covg_buf_alloc(&cbufs[i], 1024);

  uint64_t *covg_hist_init, *covg_hist_cleaned;
  uint64_t *mean_covg_hist_init, *mean_covg_hist_cleaned;
  uint64_t *len_hist_init, *len_hist_cleaned;

  covg_hist_init          = ctx_calloc(DUMP_COVG_ARRSIZE, sizeof(uint64_t));
  covg_hist_cleaned       = ctx_calloc(DUMP_COVG_ARRSIZE, sizeof(uint64_t));
  mean_covg_hist_init     = ctx_calloc(DUMP_MEAN_COVG_ARRSIZE, sizeof(uint64_t));
  mean_covg_hist_cleaned  = ctx_calloc(DUMP_MEAN_COVG_ARRSIZE, sizeof(uint64_t));
  len_hist_init           = ctx_calloc(DUMP_LEN_ARRSIZE,  sizeof(uint64_t));
  len_hist_cleaned        = ctx_calloc(DUMP_LEN_ARRSIZE,  sizeof(uint64_t));

  SupernodeCleaner tmp = {.nthreads = nthreads,
                          .covg_threshold = covg_threshold,
                          .min_keep_tip = min_keep_tip,
                          .cbufs = cbufs,
                          .covg_hist_init    = covg_hist_init,
                          .covg_hist_cleaned = covg_hist_cleaned,
                          .covg_arrsize = DUMP_COVG_ARRSIZE,
                          .mean_covg_hist_init = mean_covg_hist_init,
                          .mean_covg_hist_cleaned = mean_covg_hist_cleaned,
                          .mean_covg_arrsize = DUMP_MEAN_COVG_ARRSIZE,
                          .len_hist_init     = len_hist_init,
                          .len_hist_cleaned  = len_hist_cleaned,
                          .len_arrsize = DUMP_LEN_ARRSIZE,
                          .keep_flags = keep_flags,
                          .num_tips = 0,
                          .num_low_covg_snodes = 0,
                          .num_tip_and_low_snodes = 0,
                          .num_tip_kmers = 0,
                          .num_low_covg_snode_kmers = 0,
                          .num_tip_and_low_snode_kmers = 0,
                          .db_graph = db_graph};

  memcpy(cl, &tmp, sizeof(SupernodeCleaner));
}

static void supernode_cleaner_dealloc(SupernodeCleaner *cl)
{
  size_t i;
  for(i = 0; i < cl->nthreads; i++)
    covg_buf_dealloc(&cl->cbufs[i]);
  ctx_free(cl->cbufs);
  ctx_free(cl->covg_hist_init);
  ctx_free(cl->covg_hist_cleaned);
  ctx_free(cl->mean_covg_hist_init);
  ctx_free(cl->mean_covg_hist_cleaned);
  ctx_free(cl->len_hist_init);
  ctx_free(cl->len_hist_cleaned);
  memset(cl, 0, sizeof(SupernodeCleaner));
}
Пример #14
0
void gpath_store_reset(GPathStore *gpstore)
{
  gpath_set_reset(&gpstore->gpset);
  gpstore->num_kmers_with_paths = gpstore->num_paths = gpstore->path_bytes = 0;
  memset(gpstore->paths_all, 0, gpstore->graph_capacity * sizeof(GPath*));
  if(gpstore->paths_traverse != gpstore->paths_all)
    ctx_free(gpstore->paths_traverse);
  gpstore->paths_traverse = gpstore->paths_all;
}
Пример #15
0
void acall_destroy(AlignedCall *call)
{
  size_t i;
  for(i = 0; i < call->n_lines; i++) strbuf_dealloc(&call->lines[i]);
  free(call->lines);
  free(call->gts);
  strbuf_dealloc(&call->info);
  ctx_free(call);
}
Пример #16
0
void gpath_store_merge_read_write(GPathStore *gpstore)
{
  if(gpstore->paths_traverse != gpstore->paths_all)
  {
    status("[GPathStore] Merging read/write GraphPath linked lists");
    ctx_free(gpstore->paths_traverse); // does nothing if NULL
    gpstore->paths_traverse = gpstore->paths_all;
  }
}
Пример #17
0
static void unitig_cleaner_alloc(UnitigCleaner *cl, size_t nthreads,
                                 size_t covg_threshold, size_t min_keep_tip,
                                 uint8_t *keep_flags,
                                 const dBGraph *db_graph)
{
  size_t i;
  CovgBuffer *cbufs = ctx_calloc(nthreads, sizeof(CovgBuffer));
  for(i = 0; i < nthreads; i++)
    covg_buf_alloc(&cbufs[i], 1024);

  uint64_t *kmer_covgs_init, *kmer_covgs_clean;
  uint64_t *unitig_covgs_init, *unitig_covg_clean;
  uint64_t *len_hist_init, *len_hist_clean;

  kmer_covgs_init      = ctx_calloc(DUMP_COVG_ARRSIZE, sizeof(uint64_t));
  kmer_covgs_clean    = ctx_calloc(DUMP_COVG_ARRSIZE, sizeof(uint64_t));
  unitig_covgs_init    = ctx_calloc(DUMP_COVG_ARRSIZE, sizeof(uint64_t));
  unitig_covg_clean  = ctx_calloc(DUMP_COVG_ARRSIZE, sizeof(uint64_t));
  len_hist_init        = ctx_calloc(DUMP_LEN_ARRSIZE,  sizeof(uint64_t));
  len_hist_clean     = ctx_calloc(DUMP_LEN_ARRSIZE,  sizeof(uint64_t));

  UnitigCleaner tmp = {.nthreads = nthreads,
                       .covg_threshold = covg_threshold,
                       .min_keep_tip = min_keep_tip,
                       .cbufs = cbufs,
                       .kmer_covgs_init   = kmer_covgs_init,
                       .kmer_covgs_clean  = kmer_covgs_clean,
                       .unitig_covgs_init = unitig_covgs_init,
                       .unitig_covg_clean = unitig_covg_clean,
                       .covg_arrsize      = DUMP_COVG_ARRSIZE,
                       .len_hist_init   = len_hist_init,
                       .len_hist_clean  = len_hist_clean,
                       .len_arrsize     = DUMP_LEN_ARRSIZE,
                       .keep_flags = keep_flags,
                       .num_tips = 0,
                       .num_low_covg_snodes = 0,
                       .num_tip_and_low_snodes = 0,
                       .num_tip_kmers = 0,
                       .num_low_covg_snode_kmers = 0,
                       .num_tip_and_low_snode_kmers = 0,
                       .db_graph = db_graph};

  memcpy(cl, &tmp, sizeof(UnitigCleaner));
}

static void unitig_cleaner_dealloc(UnitigCleaner *cl)
{
  size_t i;
  for(i = 0; i < cl->nthreads; i++)
    covg_buf_dealloc(&cl->cbufs[i]);
  ctx_free(cl->cbufs);
  ctx_free(cl->kmer_covgs_init);
  ctx_free(cl->kmer_covgs_clean);
  ctx_free(cl->unitig_covgs_init);
  ctx_free(cl->unitig_covg_clean);
  ctx_free(cl->len_hist_init);
  ctx_free(cl->len_hist_clean);
  memset(cl, 0, sizeof(UnitigCleaner));
}
Пример #18
0
/**
 * Save paths to a file.
 * @param gzout         gzFile to write to
 * @param path          path of output file
 * @param save_path_seq if true, save seq= and juncpos= for links, requires
 *                      exactly one colour in the graph
 * @param hdrs is array of JSON headers of input files
 */
void gpath_save(gzFile gzout, const char *path,
                size_t nthreads, bool save_path_seq,
                const char *cmdstr, cJSON *cmdhdr,
                cJSON **hdrs, size_t nhdrs,
                const ZeroSizeBuffer *contig_hists, size_t ncols,
                dBGraph *db_graph)
{
  ctx_assert(nthreads > 0);
  ctx_assert(gpath_set_has_nseen(&db_graph->gpstore.gpset));
  ctx_assert(ncols == db_graph->gpstore.gpset.ncols);
  ctx_assert(!save_path_seq || db_graph->num_of_cols == 1); // save_path => 1 colour

  char npaths_str[50];
  ulong_to_str(db_graph->gpstore.num_paths, npaths_str);

  status("Saving %s paths to: %s", npaths_str, path);
  status("  using %zu threads", nthreads);

  // Write header
  cJSON *json = gpath_save_mkhdr(path, cmdstr, cmdhdr, hdrs, nhdrs,
                                 contig_hists, ncols, db_graph);
  json_hdr_gzprint(json, gzout);
  cJSON_Delete(json);

  // Print comments about the format
  gzputs(gzout, ctp_explanation_comment);

  // Multithreaded
  GPathSaver *wrkrs = ctx_calloc(nthreads, sizeof(GPathSaver));
  pthread_mutex_t outlock;
  size_t i;

  if(pthread_mutex_init(&outlock, NULL) != 0) die("Mutex init failed");

  for(i = 0; i < nthreads; i++) {
    wrkrs[i] = (GPathSaver){.threadid = i,
                            .nthreads = nthreads,
                            .save_seq = save_path_seq,
                            .gzout = gzout,
                            .outlock = &outlock,
                            .db_graph = db_graph};
  }

  // Iterate over kmers writing paths
  util_run_threads(wrkrs, nthreads, sizeof(*wrkrs), nthreads, gpath_save_thread);

  pthread_mutex_destroy(&outlock);
  ctx_free(wrkrs);

  status("[GPathSave] Graph paths saved to %s", path);
}
Пример #19
0
void chrom_hash_load(char const*const* paths, size_t num_files,
                     ReadBuffer *chroms, ChromHash *genome)
{
  size_t i;

  seq_file_t **ref_files = ctx_malloc(num_files * sizeof(seq_file_t*));

  for(i = 0; i < num_files; i++)
    if((ref_files[i] = seq_open(paths[i])) == NULL)
      die("Cannot read sequence file: %s", paths[i]);

  chrom_hash_load2(ref_files, num_files, chroms, genome);

  ctx_free(ref_files);
}
Пример #20
0
static void pull_out_supernodes(const char **seq, const char **ans, size_t n,
                                const dBGraph *graph)
{
  dBNodeBuffer nbuf;
  db_node_buf_alloc(&nbuf, 1024);

  // 1. Check pulling out supernodes works for iterating over the graph
  uint64_t *visited;
  visited = ctx_calloc(roundup_bits2words64(graph->ht.capacity), 8);
  HASH_ITERATE(&graph->ht, supernode_from_kmer,
               &nbuf, visited, graph, ans, n);
  ctx_free(visited);

  // 2. Check pulling out supernodes works when we iterate over inputs
  size_t i, j, len;
  dBNode node;
  char tmpstr[SNODEBUF];

  for(i = 0; i < n; i++) {
    len = strlen(seq[i]);
    for(j = 0; j+graph->kmer_size <= len; j++)
    {
      // Find node
      node = db_graph_find_str(graph, seq[i]+j);
      TASSERT(node.key != HASH_NOT_FOUND);

      // Fetch supernode
      db_node_buf_reset(&nbuf);
      supernode_find(node.key, &nbuf, graph);
      supernode_normalise(nbuf.b, nbuf.len, graph);

      // Compare
      TASSERT(nbuf.len < SNODEBUF);
      db_nodes_to_str(nbuf.b, nbuf.len, graph, tmpstr);
      if(strcmp(tmpstr, ans[i]) != 0) {
        test_status("Got: %s from ans[i]:%s\n", tmpstr, ans[i]);
      }
      TASSERT(strcmp(tmpstr, ans[i]) == 0);
    }
  }

  db_node_buf_dealloc(&nbuf);
}
Пример #21
0
/* Finalization */
void ke_done(void)
{
  struct ctx_t *ctx;

  /* Finish all contexts */
  for (ctx = ke->context_list_head; ctx; ctx = ctx->context_next)
    if (!ctx_get_status(ctx, ctx_finished))
      ctx_finish(ctx, 0);

  /* Free contexts */
  while (ke->context_list_head)
    ctx_free(ke->context_list_head);
	
  /* Finalize GPU kernel */
  gk_done();

  /* End */
  free(ke);
  isa_done();
  syscall_summary();
}
Пример #22
0
/* Execute one instruction from each running context. */
void ke_run(void)
{
	struct ctx_t *ctx, *ctx_trav; 
	int flag = 0;

	/* Run an instruction from every running process */
	for (ctx = ke->running_list_head; ctx; ctx = ctx->running_next) {
		int i;
		//printf ("out - %p\n", ctx);

		for ( i = 0 ; i < ctx->instr_slice ; ++i) {
			if((no_instructions == get_least_interrupt_time()) && least_interrupt!=NULL){
				struct interrupt_t* curr_interrupt = get_least_interrupt();
				curr_interrupt->details->p_ctx->blocked = 0;

				printf("program with pid %d unblocked at %d\n",curr_interrupt->details->p_ctx->pid,no_instructions);


				delete_interrupt(curr_interrupt);
			}

			if(!ctx->blocked){
				ctx_execute_inst(ctx);
				no_instructions++;
				printf("%d,%d\n",ctx->pid,no_instructions);
			}	
			if (ctx!=ke->running_list_head)
				break;
		}
	}
	
	/* Free finished contexts */
	while (ke->finished_list_head)
		ctx_free(ke->finished_list_head);
	
	/* Process list of suspended contexts */
	ke_process_events();

}
Пример #23
0
// Merge temporary files, closes tmp files
void futil_merge_tmp_files(FILE **tmp_files, size_t num_files, FILE *fout)
{
  #define TMP_BUF_SIZE (1<<25) /* 32MB */

  char *data = ctx_malloc(TMP_BUF_SIZE);
  size_t i, len;
  FILE *tmp_file;

  for(i = 0; i < num_files; i++)
  {
    tmp_file = tmp_files[i];
    if(fseek(tmp_file, 0L, SEEK_SET) == -1) die("gzseek error");

    while((len = fread(data, 1, TMP_BUF_SIZE, tmp_file)) > 0)
      if(fwrite(data, 1, len, fout) != len)
        die("write error [%s]", strerror(errno));

    fclose(tmp_file);
  }

  ctx_free(data);

  #undef TMP_BUF_SIZE
}
Пример #24
0
void start(struct config *config) {
    assert(config != NULL);
    assert(config->num_threads > 0);
    assert(config->port > 0);
    assert(config->nodes != NULL);
    assert(config->num_nodes > 0);

    pthread_t threads[config->num_threads];
    struct ctx *ctxs[config->num_threads];

    int i;

    for (i = 0; i < config->num_threads; i++) {
        if ((ctxs[i] = ctx_new(config->nodes, config->num_nodes, config->port,
                               config->flush_interval)) == NULL)
            exit(1);
        pthread_create(&threads[i], NULL, &thread_start, ctxs[i]);
    }

    for (i = 0; i < config->num_threads; i++) {
        pthread_join(threads[i], NULL);
        ctx_free(ctxs[i]);
    }
}
Пример #25
0
int ctx_clean(int argc, char **argv)
{
  size_t nthreads = 0, use_ncols = 0;
  struct MemArgs memargs = MEM_ARGS_INIT;
  const char *out_ctx_path = NULL;
  bool tip_cleaning = false, supernode_cleaning = false;
  size_t min_keep_tip = 0;
  Covg threshold = 0, fallback_thresh = 0;
  const char *len_before_path = NULL, *len_after_path = NULL;
  const char *covg_before_path = NULL, *covg_after_path = NULL;

  // Arg parsing
  char cmd[100];
  char shortopts[300];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int c;

  // silence error messages from getopt_long
  // opterr = 0;

  while((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 'o':
        if(out_ctx_path != NULL) cmd_print_usage(NULL);
        out_ctx_path = optarg;
        break;
      case 'm': cmd_mem_args_set_memory(&memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break;
      case 'N': use_ncols = cmd_uint32_nonzero(cmd, optarg); break;
      case 't': cmd_check(!nthreads, cmd); nthreads = cmd_uint32_nonzero(cmd, optarg); break;
      case 'T':
        cmd_check(!tip_cleaning, cmd);
        min_keep_tip = cmd_uint32_nonzero(cmd, optarg);
        tip_cleaning = true;
        break;
      case 'S':
        cmd_check(!supernode_cleaning, cmd);
        if(optarg != NULL) threshold = cmd_uint32_nonzero(cmd, optarg);
        supernode_cleaning = true;
        break;
      case 'B': cmd_check(!fallback_thresh, cmd); fallback_thresh = cmd_uint32_nonzero(cmd, optarg); break;
      case 'l': cmd_check(!len_before_path, cmd); len_before_path = optarg; break;
      case 'L': cmd_check(!len_after_path, cmd); len_after_path = optarg; break;
      case 'c': cmd_check(!covg_before_path, cmd); covg_before_path = optarg; break;
      case 'C': cmd_check(!covg_after_path, cmd); covg_after_path = optarg; break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        // cmd_print_usage(NULL);
        die("`"CMD" clean -h` for help. Bad option: %s", argv[optind-1]);
      default: abort();
    }
  }

  if(nthreads == 0) nthreads = DEFAULT_NTHREADS;

  if(optind >= argc) cmd_print_usage("Please give input graph files");

  // Default behaviour
  if(!tip_cleaning && !supernode_cleaning) {
    if(out_ctx_path != NULL)
      supernode_cleaning = tip_cleaning = true; // do both
    else
      warn("No cleaning being done: you did not specify --out <out.ctx>");
  }

  bool doing_cleaning = (supernode_cleaning || tip_cleaning);

  if(doing_cleaning && out_ctx_path == NULL) {
    cmd_print_usage("Please specify --out <out.ctx> for cleaned graph");
  }

  if(!doing_cleaning && (covg_after_path || len_after_path)) {
    cmd_print_usage("You gave --len-after <out> / --covg-after <out> without "
                    "any cleaning (set -s, --supernodes or -t, --tips)");
  }

  if(doing_cleaning && strcmp(out_ctx_path,"-") != 0 &&
     !futil_get_force() && futil_file_exists(out_ctx_path))
  {
    cmd_print_usage("Output file already exists: %s", out_ctx_path);
  }

  if(fallback_thresh && !supernode_cleaning)
    cmd_print_usage("-B, --fallback <T> without --supernodes");

  // Use remaining args as graph files
  char **gfile_paths = argv + optind;
  size_t i, j, num_gfiles = (size_t)(argc - optind);

  // Open graph files
  GraphFileReader *gfiles = ctx_calloc(num_gfiles, sizeof(GraphFileReader));
  size_t ncols, ctx_max_kmers = 0, ctx_sum_kmers = 0;

  ncols = graph_files_open(gfile_paths, gfiles, num_gfiles,
                           &ctx_max_kmers, &ctx_sum_kmers);

  size_t kmer_size = gfiles[0].hdr.kmer_size;

  // default to one colour for now
  if(use_ncols == 0) use_ncols = 1;

  // Flatten if we don't have to remember colours / output a graph
  if(!doing_cleaning)
  {
    ncols = use_ncols = 1;
    for(i = 0; i < num_gfiles; i++)
      file_filter_flatten(&gfiles[i].fltr, 0);
  }

  if(ncols < use_ncols) {
    warn("I only need %zu colour%s ('--ncols %zu' ignored)",
         ncols, util_plural_str(ncols), use_ncols);
    use_ncols = ncols;
  }

  char max_kmers_str[100];
  ulong_to_str(ctx_max_kmers, max_kmers_str);
  status("%zu input graph%s, max kmers: %s, using %zu colours",
         num_gfiles, util_plural_str(num_gfiles), max_kmers_str, use_ncols);

  // If no arguments given we default to removing tips < 2*kmer_size
  if(tip_cleaning && min_keep_tip == 0)
    min_keep_tip = 2 * kmer_size;

  // Warn if any graph files already cleaned
  size_t fromcol, intocol;
  ErrorCleaning *cleaning;

  for(i = 0; i < num_gfiles; i++) {
    for(j = 0; j < file_filter_num(&gfiles[i].fltr); j++) {
      fromcol = file_filter_fromcol(&gfiles[i].fltr, j);
      cleaning = &gfiles[i].hdr.ginfo[fromcol].cleaning;
      if(cleaning->cleaned_snodes && supernode_cleaning) {
        warn("%s:%zu already has supernode cleaning with threshold: <%zu",
             file_filter_path(&gfiles[i].fltr), fromcol,
             (size_t)cleaning->clean_snodes_thresh);
      }
      if(cleaning->cleaned_tips && tip_cleaning) {
        warn("%s:%zu already has had tip cleaned",
             file_filter_path(&gfiles[i].fltr), fromcol);
      }
    }
  }

  // Print steps
  size_t step = 0;
  status("Actions:\n");
  if(covg_before_path != NULL)
    status("%zu. Saving kmer coverage distribution to: %s", step++, covg_before_path);
  if(len_before_path != NULL)
    status("%zu. Saving supernode length distribution to: %s", step++, len_before_path);
  if(tip_cleaning)
    status("%zu. Cleaning tips shorter than %zu nodes", step++, min_keep_tip);
  if(supernode_cleaning && threshold > 0)
    status("%zu. Cleaning supernodes with coverage < %u", step++, threshold);
  if(supernode_cleaning && threshold <= 0)
    status("%zu. Cleaning supernodes with auto-detected threshold", step++);
  if(covg_after_path != NULL)
    status("%zu. Saving kmer coverage distribution to: %s", step++, covg_after_path);
  if(len_after_path != NULL)
    status("%zu. Saving supernode length distribution to: %s", step++, len_after_path);

  //
  // Decide memory usage
  //
  bool all_colours_loaded = (ncols <= use_ncols);
  bool use_mem_limit = (memargs.mem_to_use_set && num_gfiles > 1) || !ctx_max_kmers;

  size_t kmers_in_hash, bits_per_kmer, graph_mem;
  size_t per_kmer_per_col_bits = (sizeof(BinaryKmer)+sizeof(Covg)+sizeof(Edges)) * 8;
  size_t pop_edges_per_kmer_bits = (all_colours_loaded ? 0 : sizeof(Edges) * 8);

  bits_per_kmer = per_kmer_per_col_bits * use_ncols + pop_edges_per_kmer_bits;

  kmers_in_hash = cmd_get_kmers_in_hash(memargs.mem_to_use,
                                        memargs.mem_to_use_set,
                                        memargs.num_kmers,
                                        memargs.num_kmers_set,
                                        bits_per_kmer,
                                        ctx_max_kmers, ctx_sum_kmers,
                                        use_mem_limit, &graph_mem);

  // Maximise the number of colours we load to fill the mem
  size_t max_usencols = (memargs.mem_to_use*8 - pop_edges_per_kmer_bits * kmers_in_hash) /
                        (per_kmer_per_col_bits * kmers_in_hash);
  use_ncols = MIN2(max_usencols, ncols);

  cmd_check_mem_limit(memargs.mem_to_use, graph_mem);

  //
  // Check output files are writable
  //
  futil_create_output(out_ctx_path);

  // Does nothing if arg is NULL
  futil_create_output(covg_before_path);
  futil_create_output(covg_after_path);
  futil_create_output(len_before_path);
  futil_create_output(len_after_path);

  // Create db_graph
  // Load as many colours as possible
  // Use an extra set of edge to take intersections
  dBGraph db_graph;
  db_graph_alloc(&db_graph, gfiles[0].hdr.kmer_size, use_ncols, use_ncols,
                 kmers_in_hash, DBG_ALLOC_COVGS);

  // Edges is a special case
  size_t num_edges = db_graph.ht.capacity * (use_ncols + !all_colours_loaded);
  db_graph.col_edges = ctx_calloc(num_edges, sizeof(Edges));

  // Load graph into a single colour
  LoadingStats stats = LOAD_STATS_INIT_MACRO;

  GraphLoadingPrefs gprefs = {.db_graph = &db_graph,
                              .boolean_covgs = false,
                              .must_exist_in_graph = false,
                              .must_exist_in_edges = NULL,
                              .empty_colours = false};

  // Construct cleaned graph header
  GraphFileHeader outhdr;
  memset(&outhdr, 0, sizeof(GraphFileHeader));
  outhdr.version = CTX_GRAPH_FILEFORMAT;
  outhdr.kmer_size = db_graph.kmer_size;
  outhdr.num_of_cols = ncols;
  outhdr.num_of_bitfields = (db_graph.kmer_size*2+63)/64;
  graph_header_alloc(&outhdr, ncols);

  // Merge info into header
  size_t gcol = 0;
  for(i = 0; i < num_gfiles; i++) {
    for(j = 0; j < file_filter_num(&gfiles[i].fltr); j++, gcol++) {
      fromcol = file_filter_fromcol(&gfiles[i].fltr, j);
      intocol = file_filter_intocol(&gfiles[i].fltr, j);
      graph_info_merge(&outhdr.ginfo[intocol], &gfiles[i].hdr.ginfo[fromcol]);
    }
  }

  if(ncols > use_ncols) {
    graph_files_load_flat(gfiles, num_gfiles, gprefs, &stats);
  } else {
    for(i = 0; i < num_gfiles; i++)
      graph_load(&gfiles[i], gprefs, &stats);
  }

  char num_kmers_str[100];
  ulong_to_str(db_graph.ht.num_kmers, num_kmers_str);
  status("Total kmers loaded: %s\n", num_kmers_str);

  size_t initial_nkmers = db_graph.ht.num_kmers;
  hash_table_print_stats(&db_graph.ht);

  uint8_t *visited = ctx_calloc(roundup_bits2bytes(db_graph.ht.capacity), 1);
  uint8_t *keep = ctx_calloc(roundup_bits2bytes(db_graph.ht.capacity), 1);

  if((supernode_cleaning && threshold <= 0) || covg_before_path || len_before_path)
  {
    // Get coverage distribution and estimate cleaning threshold
    int est_threshold = cleaning_get_threshold(nthreads,
                                               covg_before_path,
                                               len_before_path,
                                               visited, &db_graph);

    if(est_threshold < 0) status("Cannot find recommended cleaning threshold");
    else status("Recommended cleaning threshold is: %i", est_threshold);

    // Use estimated threshold if threshold not set
    if(threshold <= 0) {
      if(fallback_thresh > 0 && est_threshold < (int)fallback_thresh) {
        status("Using fallback threshold: %i", fallback_thresh);
        threshold = fallback_thresh;
      }
      else if(est_threshold >= 0) threshold = est_threshold;
    }
  }

  // Die if we failed to find suitable cleaning threshold
  if(supernode_cleaning && threshold <= 0)
    die("Need cleaning threshold (--supernodes=<D> or --fallback <D>)");

  if(doing_cleaning) {
    // Clean graph of tips (if min_keep_tip > 0) and supernodes (if threshold > 0)
    clean_graph(nthreads, threshold, min_keep_tip,
                covg_after_path, len_after_path,
                visited, keep, &db_graph);
  }

  ctx_free(visited);
  ctx_free(keep);

  if(doing_cleaning)
  {
    // Output graph file
    Edges *intersect_edges = NULL;
    bool kmers_loaded = true;
    size_t col, thresh;

    // Set output header ginfo cleaned
    for(col = 0; col < ncols; col++)
    {
      cleaning = &outhdr.ginfo[col].cleaning;
      cleaning->cleaned_snodes |= supernode_cleaning;
      cleaning->cleaned_tips |= tip_cleaning;

      // if(tip_cleaning) {
      //   strbuf_append_str(&outhdr.ginfo[col].sample_name, ".tipclean");
      // }

      if(supernode_cleaning) {
        thresh = cleaning->clean_snodes_thresh;
        thresh = cleaning->cleaned_snodes ? MAX2(thresh, (uint32_t)threshold)
                                          : (uint32_t)threshold;
        cleaning->clean_snodes_thresh = thresh;

        // char name_append[200];
        // sprintf(name_append, ".supclean%zu", thresh);
        // strbuf_append_str(&outhdr.ginfo[col].sample_name, name_append);
      }
    }

    if(!all_colours_loaded)
    {
      // We haven't loaded all the colours
      // intersect_edges are edges to mask with
      // resets graph edges
      intersect_edges = db_graph.col_edges;
      db_graph.col_edges += db_graph.ht.capacity;
    }

    // Print stats on removed kmers
    size_t removed_nkmers = initial_nkmers - db_graph.ht.num_kmers;
    double removed_pct = (100.0 * removed_nkmers) / initial_nkmers;
    char removed_str[100], init_str[100];
    ulong_to_str(removed_nkmers, removed_str);
    ulong_to_str(initial_nkmers, init_str);
    status("Removed %s of %s (%.2f%%) kmers", removed_str, init_str, removed_pct);

    graph_files_merge(out_ctx_path, gfiles, num_gfiles,
                      kmers_loaded, all_colours_loaded,
                      intersect_edges, &outhdr, &db_graph);

    // Swap back
    if(!all_colours_loaded)
      db_graph.col_edges = intersect_edges;
  }

  ctx_check(db_graph.ht.num_kmers == hash_table_count_kmers(&db_graph.ht));

  graph_header_dealloc(&outhdr);

  for(i = 0; i < num_gfiles; i++) graph_file_close(&gfiles[i]);
  ctx_free(gfiles);

  db_graph_dealloc(&db_graph);

  return EXIT_SUCCESS;
}
Пример #26
0
int main(int argc, char** argv) {
    struct timeval start_time, end_time;
    readstat_error_t error = READSTAT_OK;
    char *input_filename = NULL;
    char *catalog_filename = NULL;
    char *output_filename = NULL;

    if (argc == 2 && (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0)) {
        print_version();
        return 0;
    } else if (argc == 2 && (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)) {
        print_usage(argv[0]);
        return 0;
    } if (argc == 3) {
        if (!can_read(argv[1]) || !can_write(argv[2])) {
            print_usage(argv[0]);
            return 1;
        }
        input_filename = argv[1];
        output_filename = argv[2];
    } else if (argc == 4) {
        if (!can_read(argv[1]) || !is_catalog(argv[2]) || !can_write(argv[3])) {
            print_usage(argv[0]);
            return 1;
        }
        input_filename = argv[1];
        catalog_filename = argv[2];
        output_filename = argv[3];
    } else {
        print_usage(argv[0]);
        return 1;
    }

    int input_format = format(input_filename);
    int output_format = format(output_filename);

    gettimeofday(&start_time, NULL);

    int fd = open(output_filename, O_CREAT | O_WRONLY | O_EXCL, 0644);
    if (fd == -1) {
        dprintf(STDERR_FILENO, "Error opening %s for writing: %s\n", output_filename, strerror(errno));
        return 1;
    }

    readstat_parser_t *pass1_parser = readstat_parser_init();
    readstat_parser_t *pass2_parser = readstat_parser_init();
    readstat_writer_t *writer = readstat_writer_init();
    readstat_writer_set_file_label(writer, "Created by ReadStat <https://github.com/WizardMac/ReadStat>");

    rs_ctx_t *rs_ctx = ctx_init();

    rs_ctx->writer = writer;
    rs_ctx->out_fd = fd;
    rs_ctx->out_format = output_format;

    readstat_set_data_writer(writer, &write_data);

    // Pass 1 - Collect fweight and value labels
    readstat_set_error_handler(pass1_parser, &handle_error);
    readstat_set_info_handler(pass1_parser, &handle_info);
    readstat_set_value_label_handler(pass1_parser, &handle_value_label);
    readstat_set_fweight_handler(pass1_parser, &handle_fweight);

    if (catalog_filename) {
        error = parse_file(pass1_parser, catalog_filename, RS_FORMAT_SAS_CATALOG, rs_ctx);
    } else {
        error = parse_file(pass1_parser, input_filename, input_format, rs_ctx);
    }
    if (error != READSTAT_OK)
        goto cleanup;

    // Pass 2 - Parse full file
    readstat_set_error_handler(pass2_parser, &handle_error);
    readstat_set_info_handler(pass2_parser, &handle_info);
    readstat_set_variable_handler(pass2_parser, &handle_variable);
    readstat_set_value_handler(pass2_parser, &handle_value);

    error = parse_file(pass2_parser, input_filename, input_format, rs_ctx);
    if (error != READSTAT_OK)
        goto cleanup;

    gettimeofday(&end_time, NULL);

    dprintf(STDERR_FILENO, "Converted %ld variables and %ld rows in %.2lf seconds\n",
            rs_ctx->var_count, rs_ctx->row_count, 
            (end_time.tv_sec + 1e-6 * end_time.tv_usec) -
            (start_time.tv_sec + 1e-6 * start_time.tv_usec));

cleanup:
    readstat_parser_free(pass1_parser);
    readstat_parser_free(pass2_parser);
    readstat_writer_free(writer);
    ctx_free(rs_ctx);

    close(fd);

    if (error != READSTAT_OK) {
        dprintf(STDERR_FILENO, "%s\n", readstat_error_message(error));
        unlink(output_filename);
        return 1;
    }

    return 0;
}
Пример #27
0
int ctx_thread(int argc, char **argv)
{
  struct ReadThreadCmdArgs args;
  read_thread_args_alloc(&args);
  read_thread_args_parse(&args, argc, argv, longopts, false);

  GraphFileReader *gfile = &args.gfile;
  GPathFileBuffer *gpfiles = &args.gpfiles;
  CorrectAlnInputBuffer *inputs = &args.inputs;
  size_t i;

  if(args.zero_link_counts && gpfiles->len == 0)
    cmd_print_usage("-0,--zero-paths without -p,--paths <in.ctp> has no meaning");

  // Check each path file only loads one colour
  gpaths_only_for_colour(gpfiles->b, gpfiles->len, 0);

  //
  // Decide on memory
  //
  size_t bits_per_kmer, kmers_in_hash, graph_mem, total_mem;
  size_t path_hash_mem, path_store_mem, path_mem;
  bool sep_path_list = (!args.use_new_paths && gpfiles->len > 0);

  bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(Edges)*8 + sizeof(GPath*)*8 +
                  2 * args.nthreads; // Have traversed

  // false -> don't use mem_to_use to decide how many kmers to store in hash
  // since we need some of that memory for storing paths
  kmers_in_hash = cmd_get_kmers_in_hash(args.memargs.mem_to_use,
                                        args.memargs.mem_to_use_set,
                                        args.memargs.num_kmers,
                                        args.memargs.num_kmers_set,
                                        bits_per_kmer,
                                        gfile->num_of_kmers,
                                        gfile->num_of_kmers,
                                        false, &graph_mem);

  // Paths memory
  size_t min_path_mem = 0;
  gpath_reader_sum_mem(gpfiles->b, gpfiles->len, 1, true, true, &min_path_mem);

  if(graph_mem + min_path_mem > args.memargs.mem_to_use) {
    char buf[50];
    die("Require at least %s memory", bytes_to_str(graph_mem+min_path_mem, 1, buf));
  }

  path_mem = args.memargs.mem_to_use - graph_mem;
  size_t pentry_hash_mem = sizeof(GPEntry)/0.7;
  size_t pentry_store_mem = sizeof(GPath) + 8 + // struct + sequence
                            1 + // in colour
                            sizeof(uint8_t) + // counts
                            sizeof(uint32_t); // kmer length

  size_t max_paths = path_mem / (pentry_store_mem + pentry_hash_mem);
  path_store_mem = max_paths * pentry_store_mem;
  path_hash_mem = max_paths * pentry_hash_mem;
  cmd_print_mem(path_hash_mem, "paths hash");
  cmd_print_mem(path_store_mem, "paths store");

  total_mem = graph_mem + path_mem;
  cmd_check_mem_limit(args.memargs.mem_to_use, total_mem);

  //
  // Open output file
  //
  gzFile gzout = futil_gzopen_create(args.out_ctp_path, "w");

  status("Creating paths file: %s", futil_outpath_str(args.out_ctp_path));

  //
  // Allocate memory
  //
  dBGraph db_graph;
  size_t kmer_size = gfile->hdr.kmer_size;
  db_graph_alloc(&db_graph, kmer_size, 1, 1, kmers_in_hash,
                 DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL);

  // Split path memory 2:1 between store and hash
  // Create a path store that tracks path counts
  gpath_store_alloc(&db_graph.gpstore,
                    db_graph.num_of_cols, db_graph.ht.capacity,
                    0, path_store_mem, true, sep_path_list);

  // Create path hash table for fast lookup
  gpath_hash_alloc(&db_graph.gphash, &db_graph.gpstore, path_hash_mem);

  if(args.use_new_paths) {
    status("Using paths as they are added (risky)");
  } else {
    status("Not using new paths as they are added (safe)");
  }

  //
  // Start up workers to add paths to the graph
  //
  GenPathWorker *workers;
  workers = gen_paths_workers_alloc(args.nthreads, &db_graph);

  // Setup for loading graphs graph
  LoadingStats gstats;
  loading_stats_init(&gstats);

  // Path statistics
  LoadingStats *load_stats = gen_paths_get_stats(workers);
  CorrectAlnStats *aln_stats = gen_paths_get_aln_stats(workers);

  // Load contig hist distribution
  for(i = 0; i < gpfiles->len; i++) {
    gpath_reader_load_contig_hist(gpfiles->b[i].json,
                                  gpfiles->b[i].fltr.path.b,
                                  file_filter_fromcol(&gpfiles->b[i].fltr, 0),
                                  &aln_stats->contig_histgrm);
  }

  GraphLoadingPrefs gprefs = {.db_graph = &db_graph,
                              .boolean_covgs = false,
                              .must_exist_in_graph = false,
                              .must_exist_in_edges = NULL,
                              .empty_colours = false}; // already loaded paths

  // Load graph, print stats, close file
  graph_load(gfile, gprefs, &gstats);
  hash_table_print_stats_brief(&db_graph.ht);
  graph_file_close(gfile);

  // Load existing paths
  for(i = 0; i < gpfiles->len; i++)
    gpath_reader_load(&gpfiles->b[i], GPATH_DIE_MISSING_KMERS, &db_graph);

  // zero link counts of already loaded links
  if(args.zero_link_counts) {
    status("Zeroing link counts for loaded links");
    gpath_set_zero_nseen(&db_graph.gpstore.gpset);
  }

  if(!args.use_new_paths)
    gpath_store_split_read_write(&db_graph.gpstore);

  // Deal with a set of files at once
  // Can have different numbers of inputs vs threads
  size_t start, end;
  for(start = 0; start < inputs->len; start += MAX_IO_THREADS)
  {
    end = MIN2(inputs->len, start+MAX_IO_THREADS);
    generate_paths(inputs->b+start, end-start, workers, args.nthreads);
  }

  // Print memory statistics
  gpath_hash_print_stats(&db_graph.gphash);
  gpath_store_print_stats(&db_graph.gpstore);

  correct_aln_dump_stats(aln_stats, load_stats,
                         args.dump_seq_sizes,
                         args.dump_frag_sizes,
                         db_graph.ht.num_kmers);

  // Don't need GPathHash anymore
  gpath_hash_dealloc(&db_graph.gphash);

  cJSON **hdrs = ctx_malloc(gpfiles->len * sizeof(cJSON*));
  for(i = 0; i < gpfiles->len; i++) hdrs[i] = gpfiles->b[i].json;

  size_t output_threads = MIN2(args.nthreads, MAX_IO_THREADS);

  // Generate a cJSON header for all inputs
  cJSON *thread_hdr = cJSON_CreateObject();
  cJSON *inputs_hdr = cJSON_CreateArray();
  cJSON_AddItemToObject(thread_hdr, "inputs", inputs_hdr);
  for(i = 0; i < inputs->len; i++)
    cJSON_AddItemToArray(inputs_hdr, correct_aln_input_json_hdr(&inputs->b[i]));

  // Write output file
  gpath_save(gzout, args.out_ctp_path, output_threads, true,
             "thread", thread_hdr, hdrs, gpfiles->len,
             &aln_stats->contig_histgrm, 1,
             &db_graph);

  gzclose(gzout);
  ctx_free(hdrs);

  // Optionally run path checks for debugging
  // gpath_checks_all_paths(&db_graph, args.nthreads);

  // ins_gap, err_gap no longer allocated after this line
  gen_paths_workers_dealloc(workers, args.nthreads);

  // Close and free input files etc.
  read_thread_args_dealloc(&args);
  db_graph_dealloc(&db_graph);

  return EXIT_SUCCESS;
}
Пример #28
0
int ctx_calls2vcf(int argc, char **argv)
{
  const char *in_path = NULL, *out_path = NULL, *out_type = NULL;
  // Filtering parameters
  int32_t min_mapq = -1, max_align_len = -1, max_allele_len = -1;
  // Alignment parameters
  int nwmatch = 1, nwmismatch = -2, nwgapopen = -4, nwgapextend = -1;
  // ref paths
  char const*const* ref_paths = NULL;
  size_t nref_paths = 0;
  // flank file
  const char *sam_path = NULL;

  //
  // Things we figure out by looking at the input
  //
  bool isbubble = false;
  // samples in VCF, (0 for bubble, does not include ref in breakpoint calls)
  size_t i, kmer_size, num_samples;

  //
  // Reference genome
  //
  // Hash map of chromosome name -> sequence
  ChromHash *genome;
  ReadBuffer chroms;

  // Arg parsing
  char cmd[100];
  char shortopts[300];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int c;

  // silence error messages from getopt_long
  // opterr = 0;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'o': cmd_check(!out_path, cmd); out_path = optarg; break;
      case 'O': cmd_check(!out_type, cmd); out_type = optarg; break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 'F': cmd_check(!sam_path,cmd); sam_path = optarg; break;
      case 'Q': cmd_check(min_mapq < 0,cmd); min_mapq = cmd_uint32(cmd, optarg); break;
      case 'A': cmd_check(max_align_len  < 0,cmd); max_align_len  = cmd_uint32(cmd, optarg); break;
      case 'L': cmd_check(max_allele_len < 0,cmd); max_allele_len = cmd_uint32(cmd, optarg); break;
      case 'm': nwmatch = cmd_int32(cmd, optarg); break;
      case 'M': nwmismatch = cmd_int32(cmd, optarg); break;
      case 'g': nwgapopen = cmd_int32(cmd, optarg); break;
      case 'G': nwgapextend = cmd_int32(cmd, optarg); break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        die("`"CMD" "SUBCMD" -h` for help. Bad option: %s", argv[optind-1]);
      default: ctx_assert2(0, "shouldn't reach here: %c", c);
    }
  }

  // Defaults for unset values
  if(out_path == NULL) out_path = "-";
  if(max_align_len  < 0) max_align_len  = DEFAULT_MAX_ALIGN;
  if(max_allele_len < 0) max_allele_len = DEFAULT_MAX_ALLELE;

  if(optind+2 > argc)
    cmd_print_usage("Require <in.txt.gz> and at least one reference");

  in_path = argv[optind++];
  ref_paths = (char const*const*)argv + optind;
  nref_paths = argc - optind;

  // These functions call die() on error
  gzFile gzin = futil_gzopen(in_path, "r");

  // Read call file header
  cJSON *json = json_hdr_load(gzin, in_path);

  // Check we can handle the kmer size
  kmer_size = json_hdr_get_kmer_size(json, in_path);
  db_graph_check_kmer_size(kmer_size, in_path);

  // Get format (bubble or breakpoint file)
  cJSON *json_fmt = json_hdr_get(json, "file_format", cJSON_String, in_path);
  if(strcmp(json_fmt->valuestring,"CtxBreakpoints") == 0) isbubble = false;
  else if(strcmp(json_fmt->valuestring,"CtxBubbles") == 0) isbubble = true;
  else die("Unknown format: '%s'", json_fmt->valuestring);

  status("Reading %s in %s format", futil_inpath_str(in_path),
         isbubble ? "bubble" : "breakpoint");

  if(isbubble) {
    // bubble specific
    if(sam_path == NULL)
      cmd_print_usage("Require -F <flanks.sam> with bubble file");
    if(min_mapq < 0) min_mapq = DEFAULT_MIN_MAPQ;
  }
  else {
    // breakpoint specific
    if(min_mapq >= 0)
      cmd_print_usage("-Q,--min-mapq <Q> only valid with bubble calls");
  }

  // Open flank file if it exists
  htsFile *samfh = NULL;
  bam_hdr_t *bam_hdr = NULL;
  bam1_t *mflank = NULL;

  if(sam_path)
  {
    if((samfh = hts_open(sam_path, "r")) == NULL)
      die("Cannot open SAM/BAM %s", sam_path);

    // Load BAM header
    bam_hdr = sam_hdr_read(samfh);
    if(bam_hdr == NULL) die("Cannot load BAM header: %s", sam_path);
    mflank = bam_init1();
  }

  // Output VCF has 0 samples if bubbles file, otherwise has N where N is
  // number of samples/colours in the breakpoint graph
  size_t num_graph_samples = json_hdr_get_ncols(json, in_path);
  size_t num_graph_nonref = json_hdr_get_nonref_ncols(json, in_path);

  num_samples = 0;
  if(!isbubble) {
    // If last colour has "is_ref", drop number of samples by one
    num_samples = num_graph_nonref < num_graph_samples ? num_graph_samples-1
                                                       : num_graph_samples;
  }

  //
  // Open output file
  //
  if(!out_path) out_path = "-";
  int mode = vcf_misc_get_outtype(out_type, out_path);
  futil_create_output(out_path);
  htsFile *vcffh = hts_open(out_path, modes_htslib[mode]);

  status("[calls2vcf] Reading %s call file with %zu samples",
         isbubble ? "Bubble" : "Breakpoint", num_graph_samples);
  status("[calls2vcf] %zu sample output to: %s format: %s",
         num_samples, futil_outpath_str(out_path), hsmodes_htslib[mode]);

  if(isbubble) status("[calls2vcf] min. MAPQ: %i", min_mapq);
  status("[calls2vcf] max alignment length: %i", max_align_len);
  status("[calls2vcf] max VCF allele length: %i", max_allele_len);
  status("[calls2vcf] alignment match:%i mismatch:%i gap open:%i extend:%i",
         nwmatch, nwmismatch, nwgapopen, nwgapextend);

  // Load reference genome
  read_buf_alloc(&chroms, 1024);
  genome = chrom_hash_init();
  chrom_hash_load(ref_paths, nref_paths, &chroms, genome);

  // convert to upper case
  char *s;
  for(i = 0; i < chroms.len; i++)
    for(s = chroms.b[i].seq.b; *s; s++) *s = toupper(*s);

  if(!isbubble) brkpnt_check_refs_match(json, genome, in_path);

  bcf_hdr_t *vcfhdr = make_vcf_hdr(json, in_path, !isbubble, kmer_size,
                                   ref_paths, nref_paths,
                                   chroms.b, chroms.len);

  if(bcf_hdr_write(vcffh, vcfhdr) != 0) die("Cannot write VCF header");

  AlignedCall *call = acall_init();
  CallDecomp *aligner = call_decomp_init(vcffh, vcfhdr);

  scoring_t *scoring = call_decomp_get_scoring(aligner);
  scoring_init(scoring, nwmatch, nwmismatch, nwgapopen, nwgapextend,
               false, false, 0, 0, 0, 0);

  CallFileEntry centry;
  call_file_entry_alloc(&centry);

  char kmer_str[50];
  sprintf(kmer_str, ";K%zu", kmer_size);

  if(isbubble)
  {
    // Bubble calls
    DecompBubble *bubbles = decomp_bubble_init();

    // Set scoring for aligning 3' flank
    scoring = decomp_bubble_get_scoring(bubbles);
    scoring_init(scoring, nwmatch, nwmismatch, nwgapopen, nwgapextend,
                 true, true, 0, 0, 0, 0);

    while(call_file_read(gzin, in_path, &centry)) {
      do {
        if(sam_read1(samfh, bam_hdr, mflank) < 0)
          die("We've run out of SAM entries!");
      } while(mflank->core.flag & (BAM_FSECONDARY | BAM_FSUPPLEMENTARY));

      // Align call
      strbuf_reset(&call->info);
      decomp_bubble_call(bubbles, genome, kmer_size, min_mapq,
                         &centry, mflank, bam_hdr, call);
      strbuf_append_str(&call->info, kmer_str);
      acall_decompose(aligner, call, max_align_len, max_allele_len);
    }

    // print bubble stats
    DecompBubbleStats *bub_stats = ctx_calloc(1, sizeof(*bub_stats));
    decomp_bubble_cpy_stats(bub_stats, bubbles);
    print_bubble_stats(bub_stats);
    ctx_free(bub_stats);

    decomp_bubble_destroy(bubbles);
  }
  else
  {
    // Breakpoint calls
    DecompBreakpoint *breakpoints = decomp_brkpt_init();

    while(call_file_read(gzin, in_path, &centry)) {
      strbuf_reset(&call->info);
      decomp_brkpt_call(breakpoints, genome, num_samples, &centry, call);
      strbuf_append_str(&call->info, kmer_str);
      acall_decompose(aligner, call, max_align_len, max_allele_len);
    }

    // print bubble stats
    DecompBreakpointStats *brk_stats = ctx_calloc(1, sizeof(*brk_stats));
    decomp_brkpt_cpy_stats(brk_stats, breakpoints);
    print_breakpoint_stats(brk_stats);
    ctx_free(brk_stats);

    decomp_brkpt_destroy(breakpoints);
  }

  // Print stats
  DecomposeStats *astats = ctx_calloc(1, sizeof(*astats));
  call_decomp_cpy_stats(astats, aligner);
  print_acall_stats(astats);
  ctx_free(astats);

  call_file_entry_dealloc(&centry);
  call_decomp_destroy(aligner);
  acall_destroy(call);

  // Finished - clean up
  cJSON_Delete(json);
  gzclose(gzin);

  bcf_hdr_destroy(vcfhdr);
  hts_close(vcffh);

  for(i = 0; i < chroms.len; i++) seq_read_dealloc(&chroms.b[i]);
  read_buf_dealloc(&chroms);
  chrom_hash_destroy(genome);

  if(sam_path) {
    hts_close(samfh);
    bam_hdr_destroy(bam_hdr);
    bam_destroy1(mflank);
  }

  return EXIT_SUCCESS;
}
Пример #29
0
int ctx_rmsubstr(int argc, char **argv)
{
  struct MemArgs memargs = MEM_ARGS_INIT;
  size_t kmer_size = 0, nthreads = 0;
  const char *output_file = NULL;
  seq_format fmt = SEQ_FMT_FASTA;
  bool invert = false;

  // Arg parsing
  char cmd[100], shortopts[100];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int c;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 'o': cmd_check(!output_file, cmd); output_file = optarg; break;
      case 't': cmd_check(!nthreads, cmd); nthreads = cmd_uint32_nonzero(cmd, optarg); break;
      case 'm': cmd_mem_args_set_memory(&memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break;
      case 'k': cmd_check(!kmer_size,cmd); kmer_size = cmd_uint32(cmd, optarg); break;
      case 'F': cmd_check(fmt==SEQ_FMT_FASTA, cmd); fmt = cmd_parse_format(cmd, optarg); break;
      case 'v': cmd_check(!invert,cmd); invert = true; break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        // cmd_print_usage(NULL);
        cmd_print_usage("`"CMD" rmsubstr -h` for help. Bad option: %s", argv[optind-1]);
      default: abort();
    }
  }

  // Defaults
  if(!nthreads) nthreads = DEFAULT_NTHREADS;
  if(!kmer_size) kmer_size = DEFAULT_KMER;

  if(!(kmer_size&1)) cmd_print_usage("Kmer size must be odd");
  if(kmer_size < MIN_KMER_SIZE) cmd_print_usage("Kmer size too small (recompile)");
  if(kmer_size > MAX_KMER_SIZE) cmd_print_usage("Kmer size too large (recompile?)");

  if(optind >= argc)
    cmd_print_usage("Please specify at least one input sequence file (.fq, .fq etc.)");

  size_t i, num_seq_files = argc - optind;
  char **seq_paths = argv + optind;
  seq_file_t **seq_files = ctx_calloc(num_seq_files, sizeof(seq_file_t*));

  for(i = 0; i < num_seq_files; i++)
    if((seq_files[i] = seq_open(seq_paths[i])) == NULL)
      die("Cannot read sequence file %s", seq_paths[i]);

  // Estimate number of bases
  // set to -1 if we cannot calc
  int64_t est_num_bases = seq_est_seq_bases(seq_files, num_seq_files);
  if(est_num_bases < 0) {
    warn("Cannot get file sizes, using pipes");
    est_num_bases = memargs.num_kmers * IDEAL_OCCUPANCY;
  }

  status("[memory] Estimated number of bases: %li", (long)est_num_bases);

  // Use file sizes to decide on memory

  //
  // Decide on memory
  //
  size_t bits_per_kmer, kmers_in_hash, graph_mem;

  bits_per_kmer = sizeof(BinaryKmer)*8 +
                  sizeof(KONodeList) + sizeof(KOccur) + // see kmer_occur.h
                  8; // 1 byte per kmer for each base to load sequence files

  kmers_in_hash = cmd_get_kmers_in_hash(memargs.mem_to_use,
                                        memargs.mem_to_use_set,
                                        memargs.num_kmers,
                                        memargs.num_kmers_set,
                                        bits_per_kmer,
                                        est_num_bases, est_num_bases,
                                        false, &graph_mem);

  cmd_check_mem_limit(memargs.mem_to_use, graph_mem);

  //
  // Open output file
  //
  if(output_file == NULL) output_file = "-";
  FILE *fout = futil_fopen_create(output_file, "w");

  //
  // Set up memory
  //
  dBGraph db_graph;
  db_graph_alloc(&db_graph, kmer_size, 1, 0, kmers_in_hash, DBG_ALLOC_BKTLOCKS);

  //
  // Load reference sequence into a read buffer
  //
  ReadBuffer rbuf;
  read_buf_alloc(&rbuf, 1024);
  seq_load_all_reads(seq_files, num_seq_files, &rbuf);

  // Check for reads too short
  for(i = 0; i < rbuf.len && rbuf.b[i].seq.end >= kmer_size; i++) {}
  if(i < rbuf.len)
    warn("Reads shorter than kmer size (%zu) will not be filtered", kmer_size);

  KOGraph kograph = kograph_create(rbuf.b, rbuf.len, true, 0,
                                   nthreads, &db_graph);

  size_t num_reads = rbuf.len, num_reads_printed = 0, num_bad_reads = 0;

  // Loop over reads printing those that are not substrings
  int ret;
  for(i = 0; i < rbuf.len; i++) {
    ret = _is_substr(&rbuf, i, &kograph, &db_graph);
    if(ret == -1) num_bad_reads++;
    else if((ret && invert) || (!ret && !invert)) {
      seqout_print_read(&rbuf.b[i], fmt, fout);
      num_reads_printed++;
    }
  }

  char num_reads_str[100], num_reads_printed_str[100], num_bad_reads_str[100];
  ulong_to_str(num_reads, num_reads_str);
  ulong_to_str(num_reads_printed, num_reads_printed_str);
  ulong_to_str(num_bad_reads, num_bad_reads_str);

  status("Printed %s / %s (%.1f%%) to %s",
         num_reads_printed_str, num_reads_str,
         !num_reads ? 0.0 : (100.0 * num_reads_printed) / num_reads,
         futil_outpath_str(output_file));

  if(num_bad_reads > 0) {
    status("Bad reads: %s / %s (%.1f%%) - no kmer {ACGT} of length %zu",
           num_bad_reads_str, num_reads_str,
           (100.0 * num_bad_reads) / num_reads,
           kmer_size);
  }

  fclose(fout);
  kograph_dealloc(&kograph);

  // Free sequence memory
  for(i = 0; i < rbuf.len; i++) seq_read_dealloc(&rbuf.b[i]);
  read_buf_dealloc(&rbuf);
  ctx_free(seq_files);

  db_graph_dealloc(&db_graph);

  return EXIT_SUCCESS;
}
Пример #30
0
int ctx_correct(int argc, char **argv)
{
  size_t i;
  struct ReadThreadCmdArgs args;
  read_thread_args_alloc(&args);
  read_thread_args_parse(&args, argc, argv, longopts, true);

  GraphFileReader *gfile = &args.gfile;
  GPathFileBuffer *gpfiles = &args.gpfiles;
  CorrectAlnInputBuffer *inputs = &args.inputs;

  // Update colours in graph file - sample in 0, all others in 1
  size_t ncols = gpath_load_sample_pop(gfile, 1, gpfiles->b, gpfiles->len,
                                       args.colour);

  // Check for compatibility between graph files and link files
  graphs_gpaths_compatible(gfile, 1, gpfiles->b, gpfiles->len, 1);

  int64_t ctx_num_kmers = gfile->num_of_kmers;

  //
  // Decide on memory
  //
  size_t bits_per_kmer, kmers_in_hash, graph_mem, path_mem, total_mem;

  // 1 bit needed per kmer if we need to keep track of noreseed
  bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(Edges)*8 +
                  (gpfiles->len > 0 ? sizeof(GPath*)*8 : 0) +
                  ncols; // in colour

  kmers_in_hash = cmd_get_kmers_in_hash(args.memargs.mem_to_use,
                                        args.memargs.mem_to_use_set,
                                        args.memargs.num_kmers,
                                        args.memargs.num_kmers_set,
                                        bits_per_kmer,
                                        ctx_num_kmers, ctx_num_kmers,
                                        false, &graph_mem);

  // Paths memory
  size_t rem_mem = args.memargs.mem_to_use - MIN2(args.memargs.mem_to_use, graph_mem);
  path_mem = gpath_reader_mem_req(gpfiles->b, gpfiles->len, ncols, rem_mem, false,
                                  kmers_in_hash, false);

  cmd_print_mem(path_mem, "paths");

  // Shift path store memory from graphs->paths
  graph_mem -= sizeof(GPath*)*kmers_in_hash;
  path_mem  += sizeof(GPath*)*kmers_in_hash;

  // Total memory
  total_mem = graph_mem + path_mem;
  cmd_check_mem_limit(args.memargs.mem_to_use, total_mem);

  //
  // Check we can write all output files
  //
  // Open output files
  SeqOutput *outputs = ctx_calloc(inputs->len, sizeof(SeqOutput));
  bool err_occurred = false;

  for(i = 0; i < inputs->len && !err_occurred; i++)
  {
    CorrectAlnInput *input = &inputs->b[i];
    // We loaded target colour into colour zero
    input->crt_params.ctxcol = input->crt_params.ctpcol = 0;
    bool is_pe = asyncio_task_is_pe(&input->files);
    err_occurred = !seqout_open(&outputs[i], input->out_base, args.fmt, is_pe);
    input->output = &outputs[i];
  }

  // Abandon if some of the output files already exist
  if(err_occurred) {
    for(i = 0; i < inputs->len; i++)
      seqout_close(&outputs[i], true);
    die("Error creating output files");
  }

  //
  // Allocate memory
  //

  dBGraph db_graph;
  db_graph_alloc(&db_graph, gfile->hdr.kmer_size, ncols, 1, kmers_in_hash,
                 DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL);

  // Create a path store that does not tracks path counts
  gpath_reader_alloc_gpstore(gpfiles->b, gpfiles->len, path_mem, false, &db_graph);

  //
  // Load Graph and link files
  //
  GraphLoadingPrefs gprefs = graph_loading_prefs(&db_graph);
  gprefs.empty_colours = true;

  // Load graph, print stats, close file
  graph_load(gfile, gprefs, NULL);
  hash_table_print_stats_brief(&db_graph.ht);
  graph_file_close(gfile);

  // Load link files
  for(i = 0; i < gpfiles->len; i++) {
    gpath_reader_load(&gpfiles->b[i], GPATH_DIE_MISSING_KMERS, &db_graph);
    gpath_reader_close(&gpfiles->b[i]);
  }

  //
  // Run alignment
  //
  correct_reads(inputs->b, inputs->len,
                args.dump_seq_sizes, args.dump_frag_sizes,
                args.fq_zero, args.append_orig_seq,
                args.nthreads, &db_graph);

  // Close and free output files
  for(i = 0; i < inputs->len; i++)
    seqout_close(&outputs[i], false);
  ctx_free(outputs);

  // Closes input files
  read_thread_args_dealloc(&args);
  db_graph_dealloc(&db_graph);

  return EXIT_SUCCESS;
}