Exemple #1
0
void graph_crawler_alloc(GraphCrawler *crawler, const dBGraph *db_graph)
{
  ctx_assert(db_graph->node_in_cols != NULL);

  size_t ncols = db_graph->num_of_cols;

  int *col_paths = ctx_calloc(ncols, sizeof(int));
  GCMultiColPath *multicol_paths = ctx_calloc(ncols, sizeof(GCMultiColPath));
  GCUniColPath *unicol_paths = ctx_calloc(ncols, sizeof(GCUniColPath));
  uint32_t *col_list = ctx_calloc(ncols, sizeof(uint32_t));

  GraphCrawler tmp = {.num_paths = 0,
                      .col_paths = col_paths,
                      .multicol_paths = multicol_paths,
                      .unicol_paths = unicol_paths,
                      .col_list = col_list};

  memcpy(crawler, &tmp, sizeof(GraphCrawler));

  graph_cache_alloc(&crawler->cache, db_graph);
  graph_walker_alloc(&crawler->wlk, db_graph);
  rpt_walker_alloc(&crawler->rptwlk, db_graph->ht.capacity, 22); // 4MB
}

void graph_crawler_dealloc(GraphCrawler *crawler)
{
  ctx_free(crawler->col_paths);
  ctx_free(crawler->multicol_paths);
  ctx_free(crawler->unicol_paths);
  ctx_free(crawler->col_list);
  graph_cache_dealloc(&crawler->cache);
  graph_walker_dealloc(&crawler->wlk);
  rpt_walker_dealloc(&crawler->rptwlk);
  memset(crawler, 0, sizeof(GraphCrawler)); // reset
}
Exemple #2
0
static void run_exp_abc(const dBGraph *db_graph, bool prime_AB,
                        size_t nthreads, size_t num_repeats,
                        size_t max_AB_dist, bool print_failed_contigs)
{
  ExpABCWorker *wrkrs = ctx_calloc(nthreads, sizeof(ExpABCWorker));
  size_t i, j;

  if(max_AB_dist == 0) max_AB_dist = SIZE_MAX;

  for(i = 0; i < nthreads; i++) {
    wrkrs[i].colour = 0;
    wrkrs[i].nthreads = nthreads;
    wrkrs[i].db_graph = db_graph;
    wrkrs[i].prime_AB = prime_AB;
    wrkrs[i].num_limit = num_repeats / nthreads;
    wrkrs[i].max_AB_dist = max_AB_dist;
    wrkrs[i].print_failed_contigs = print_failed_contigs;
    db_node_buf_alloc(&wrkrs[i].nbuf, 1024);
    graph_walker_alloc(&wrkrs[i].gwlk, db_graph);
    rpt_walker_alloc(&wrkrs[i].rptwlk, db_graph->ht.capacity, 22); // 4MB
  }

  util_run_threads(wrkrs, nthreads, sizeof(ExpABCWorker),
                   nthreads, run_exp_abc_thread);

  // Merge results
  size_t num_tests = 0, results[NUM_RESULT_VALUES] = {0};
  size_t ab_fail_state[GRPHWLK_NUM_STATES] = {0};
  size_t bc_fail_state[GRPHWLK_NUM_STATES] = {0};

  for(i = 0; i < nthreads; i++) {
    num_tests += wrkrs[i].num_tests;
    for(j = 0; j < NUM_RESULT_VALUES; j++) results[j] += wrkrs[i].results[j];
    for(j = 0; j < GRPHWLK_NUM_STATES; j++) ab_fail_state[j] += wrkrs[i].ab_fail_state[j];
    for(j = 0; j < GRPHWLK_NUM_STATES; j++) bc_fail_state[j] += wrkrs[i].bc_fail_state[j];
    db_node_buf_dealloc(&wrkrs[i].nbuf);
    graph_walker_dealloc(&wrkrs[i].gwlk);
    rpt_walker_dealloc(&wrkrs[i].rptwlk);
  }

  // Print results
  char nrunstr[50];
  ulong_to_str(num_tests, nrunstr);
  status("Ran %s tests with %zu threads", nrunstr, nthreads);

  const char *titles[] = {"RES_ABC_SUCCESS", "RES_AB_WRONG",
                          "RES_AB_FAILED",   "RES_BC_WRONG",
                          "RES_BC_FAILED",   "RES_BC_OVERSHOT",
                          "RES_LOST_IN_RPT", "RES_NO_TRAVERSAL"};

  util_print_nums(titles, results, NUM_RESULT_VALUES, 30);

  status("AB_FAILED:");
  graph_step_print_state_hist(ab_fail_state);
  status("BC_FAILED:");
  graph_step_print_state_hist(bc_fail_state);

  ctx_free(wrkrs);
}
static void test_repeat_loop()
{
  TASSERT(sizeof(FollowPath) == 20);

  // Construct 1 colour graph with kmer-size=11
  dBGraph graph;
  size_t kmer_size = 11, ncols = 1;

  // Set up alignment correction params
  CorrectAlnParam params = {.ctpcol = 0, .ctxcol = 0,
                            .ins_gap_min = 0, .ins_gap_max = 0,
                            .one_way_gap_traverse = true, .use_end_check = true,
                            .max_context = 10,
                            .gap_variance = 0.1, .gap_wiggle = 5};

  // Sequence with repeat
  char seq[] = "ATTTGGAACTCCGGA"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "GATAGGGCCAGT"
               "CGTCAGGAGCTAACT";

  char p0[] = "ATTTGGAACTCCGGA""GATAGGGCCAGT""GATAGGGCCAGT";
  char p1[] = "GATAGGGCCAGT""GATAGGGCCAGT""CGTCAGGAGCTAACT";

  // Allocate graph, but don't add any sequence
  _construct_graph_with_paths(&graph, kmer_size, ncols, NULL, 0, params);

  GenPathWorker *gen_path_wrkr = gen_paths_workers_alloc(1, &graph, NULL);

  GraphWalker gwlk;
  RepeatWalker rptwlk;
  graph_walker_alloc(&gwlk);
  rpt_walker_alloc(&rptwlk, graph.ht.capacity, 12);

  dBNodeBuffer nbuf;
  db_node_buf_alloc(&nbuf, 1024);

  // Construct graph but no paths
  build_graph_from_str_mt(&graph, 0, seq, strlen(seq));
  TASSERT2(graph.ht.num_kmers == 15+12+15, "%zu", (size_t)graph.ht.num_kmers);

  // Find first node in sequence
  dBNode node0 = db_graph_find_str(&graph, seq);
  TASSERT(node0.key != HASH_NOT_FOUND);

  // 1) With no paths
  char ans0[] = "ATTTGGAACTCCGGA""GATAGGGCCAGT";
  test_walk(&gwlk, &rptwlk, node0, &nbuf, &graph, 15+2, ans0);

  // 2) Add small paths - produces collapsed down seq with two copy repeat
  gen_paths_from_str_mt(gen_path_wrkr, p0, params);
  gen_paths_from_str_mt(gen_path_wrkr, p1, params);
  char ans1[] = "ATTTGGAACTCCGGA""GATAGGGCCAGT""GATAGGGCCAGT""CGTCAGGAGCTAACT";
  test_walk(&gwlk, &rptwlk, node0, &nbuf, &graph, 15+12+12+5, ans1);

  // 3) Add long paths
  gen_paths_from_str_mt(gen_path_wrkr, seq, params);
  test_walk(&gwlk, &rptwlk, node0, &nbuf, &graph, strlen(seq)+1-kmer_size, seq);

  graph_walker_dealloc(&gwlk);
  rpt_walker_dealloc(&rptwlk);
  db_node_buf_dealloc(&nbuf);
  gen_paths_workers_dealloc(gen_path_wrkr, 1);
  db_graph_dealloc(&graph);
}

void test_repeat_walker()
{
  test_status("Testing repeat_walker.h");
  test_repeat_loop();
}
Exemple #4
0
BubbleCaller* bubble_callers_new(size_t num_callers,
                                 BubbleCallingPrefs prefs,
                                 gzFile gzout,
                                 const dBGraph *db_graph)
{
  ctx_assert(num_callers > 0);

  // Max usage is 4 * max_allele_len * cols
  size_t i;
  size_t max_path_len = MAX2(prefs.max_flank_len, prefs.max_allele_len);

  BubbleCaller *callers = ctx_malloc(num_callers * sizeof(BubbleCaller));

  pthread_mutex_t *out_lock = ctx_malloc(sizeof(pthread_mutex_t));
  if(pthread_mutex_init(out_lock, NULL) != 0) die("mutex init failed");

  size_t *num_bubbles_ptr = ctx_calloc(1, sizeof(size_t));

  for(i = 0; i < num_callers; i++)
  {
    BubbleCaller tmp = {.threadid = i, .nthreads = num_callers,
                        .haploid_seen = ctx_calloc(1+prefs.num_haploid, sizeof(bool)),
                        .num_bubbles_ptr = num_bubbles_ptr,
                        .prefs = prefs,
                        .db_graph = db_graph, .gzout = gzout,
                        .out_lock = out_lock};

    memcpy(&callers[i], &tmp, sizeof(BubbleCaller));

    // First two buffers don't actually need to grow
    db_node_buf_alloc(&callers[i].flank5p, prefs.max_flank_len);
    db_node_buf_alloc(&callers[i].pathbuf, max_path_len);

    graph_walker_alloc(&callers[i].wlk, db_graph);
    rpt_walker_alloc(&callers[i].rptwlk, db_graph->ht.capacity, 22); // 4MB

    graph_cache_alloc(&callers[i].cache, db_graph);
    cache_stepptr_buf_alloc(&callers[i].spp_forward, 1024);
    cache_stepptr_buf_alloc(&callers[i].spp_reverse, 1024);
    strbuf_alloc(&callers[i].output_buf, 2048);
  }

  return callers;
}

void bubble_callers_destroy(BubbleCaller *callers, size_t num_callers)
{
  ctx_assert(num_callers > 0);

  size_t i;
  for(i = 0; i < num_callers; i++)
  {
    ctx_free(callers[i].haploid_seen);

    db_node_buf_dealloc(&callers[i].flank5p);
    db_node_buf_dealloc(&callers[i].pathbuf);

    rpt_walker_dealloc(&callers[i].rptwlk);
    graph_walker_dealloc(&callers[i].wlk);

    graph_cache_dealloc(&callers[i].cache);
    cache_stepptr_buf_dealloc(&callers[i].spp_forward);
    cache_stepptr_buf_dealloc(&callers[i].spp_reverse);
    strbuf_dealloc(&callers[i].output_buf);
  }
  pthread_mutex_destroy(callers[0].out_lock);
  ctx_free(callers[0].out_lock);
  ctx_free(callers[0].num_bubbles_ptr);
  ctx_free(callers);
}