Beispiel #1
0
END_TEST

START_TEST (ut_graph_load_dictionary_prefix_search)
{
  init_node(graph);

  FILE *fp = fopen("/usr/share/dict/words", "r");
  wchar_t word[1000];

  while (fgetws(word, 80, fp))
  {
    word[wcslen(word)-1] = L'\0';
    printf("%ls\n", word);
    insert_word(graph, word);
  }
  fclose(fp);

  PrefixResult *result = search_prefix(graph, L"app");

  while(result != NULL)
  {
    printf("%ls\n", result->word);
    result = result->next;
  }

  clear_node(graph);
}
Beispiel #2
0
void suffix_mng_search_read_cals_by_region(fastq_read_t *read, int num_seeds, 
					   sa_index3_t *sa_index, 
					   int strand, int chromosome, 
					   size_t start, size_t end, 
					   array_list_t *cal_list, 
					   suffix_mng_t *suffix_mng) {
  int chrom, num_prefixes, num_suffixes, suffix_len = 0;
  size_t low, high, r_start_suf, r_end_suf, g_start_suf, g_end_suf;

  int read_pos, read_inc = read->length / num_seeds;
  if (read_inc < sa_index->k_value / 2) {
    read_inc = sa_index->k_value / 2;
  }

  // Fill in the CAL manager structure
  int read_end_pos = read->length - sa_index->k_value;
  int extra_seed = (read->length - sa_index->k_value) % read_inc;

  // first step, searching mappings in both strands
  // distance between seeds >= prefix value (sa_index->k_value)
  char *r_seq = strand == 0 ? read->sequence : read->revcomp;
  for (read_pos = 0; read_pos < read_end_pos; read_pos += read_inc)  {	
    num_prefixes = search_prefix(&r_seq[read_pos], &low, &high, sa_index, 0);
    num_suffixes = num_prefixes;
    suffix_len = num_suffixes > 0 ? sa_index->k_value : 0;
    if (num_suffixes > 0) {
      for (size_t suff = low; suff <= high; suff++) {
	chrom = sa_index->CHROM[suff];
	if (chrom == chromosome) {
	  // extend suffix to right side
	  r_start_suf = read_pos;
	  r_end_suf = r_start_suf + suffix_len - 1;
	  
	  g_start_suf = sa_index->SA[suff] - sa_index->genome->chrom_offsets[chrom];
	  g_end_suf = g_start_suf + suffix_len - 1;
	  
	  if (start <= g_start_suf && end >= g_end_suf) {
	    suffix_mng_update(chrom, r_start_suf, r_end_suf, g_start_suf, g_end_suf, suffix_mng);
	  }
	}
      }
    }
  } // end of for read_pos
    
  if (suffix_len != read->length && extra_seed) {
    read_pos = read->length - sa_index->k_value;
    num_prefixes = search_prefix(&r_seq[read_pos], &low, &high, sa_index, 0);
    num_suffixes = num_prefixes;
    suffix_len = num_suffixes > 0 ? sa_index->k_value : 0;

    if (num_suffixes > 0) {
      for (size_t suff = low; suff <= high; suff++) {
	chrom = sa_index->CHROM[suff];
	if (chrom == chromosome) {
	  // extend suffix to right side
	  r_start_suf = read_pos;
	  r_end_suf = r_start_suf + suffix_len - 1;
	  
	  g_start_suf = sa_index->SA[suff] - sa_index->genome->chrom_offsets[chrom];
	  g_end_suf = g_start_suf + suffix_len - 1;
	  
	  if (start <= g_start_suf && end >= g_end_suf) {
	    suffix_mng_update(chrom, r_start_suf, r_end_suf, g_start_suf, g_end_suf, suffix_mng);
	  }
	}
      }
    }
  }
  
  // using suffix manager instead of the previous cal manager
  suffix_mng_create_cals(read, read->length / 3, strand, 
			 sa_index, cal_list, suffix_mng);
}