END_TEST START_TEST (ut_graph_load_dictionary_prefix_search) { init_node(graph); FILE *fp = fopen("/usr/share/dict/words", "r"); wchar_t word[1000]; while (fgetws(word, 80, fp)) { word[wcslen(word)-1] = L'\0'; printf("%ls\n", word); insert_word(graph, word); } fclose(fp); PrefixResult *result = search_prefix(graph, L"app"); while(result != NULL) { printf("%ls\n", result->word); result = result->next; } clear_node(graph); }
void suffix_mng_search_read_cals_by_region(fastq_read_t *read, int num_seeds, sa_index3_t *sa_index, int strand, int chromosome, size_t start, size_t end, array_list_t *cal_list, suffix_mng_t *suffix_mng) { int chrom, num_prefixes, num_suffixes, suffix_len = 0; size_t low, high, r_start_suf, r_end_suf, g_start_suf, g_end_suf; int read_pos, read_inc = read->length / num_seeds; if (read_inc < sa_index->k_value / 2) { read_inc = sa_index->k_value / 2; } // Fill in the CAL manager structure int read_end_pos = read->length - sa_index->k_value; int extra_seed = (read->length - sa_index->k_value) % read_inc; // first step, searching mappings in both strands // distance between seeds >= prefix value (sa_index->k_value) char *r_seq = strand == 0 ? read->sequence : read->revcomp; for (read_pos = 0; read_pos < read_end_pos; read_pos += read_inc) { num_prefixes = search_prefix(&r_seq[read_pos], &low, &high, sa_index, 0); num_suffixes = num_prefixes; suffix_len = num_suffixes > 0 ? sa_index->k_value : 0; if (num_suffixes > 0) { for (size_t suff = low; suff <= high; suff++) { chrom = sa_index->CHROM[suff]; if (chrom == chromosome) { // extend suffix to right side r_start_suf = read_pos; r_end_suf = r_start_suf + suffix_len - 1; g_start_suf = sa_index->SA[suff] - sa_index->genome->chrom_offsets[chrom]; g_end_suf = g_start_suf + suffix_len - 1; if (start <= g_start_suf && end >= g_end_suf) { suffix_mng_update(chrom, r_start_suf, r_end_suf, g_start_suf, g_end_suf, suffix_mng); } } } } } // end of for read_pos if (suffix_len != read->length && extra_seed) { read_pos = read->length - sa_index->k_value; num_prefixes = search_prefix(&r_seq[read_pos], &low, &high, sa_index, 0); num_suffixes = num_prefixes; suffix_len = num_suffixes > 0 ? sa_index->k_value : 0; if (num_suffixes > 0) { for (size_t suff = low; suff <= high; suff++) { chrom = sa_index->CHROM[suff]; if (chrom == chromosome) { // extend suffix to right side r_start_suf = read_pos; r_end_suf = r_start_suf + suffix_len - 1; g_start_suf = sa_index->SA[suff] - sa_index->genome->chrom_offsets[chrom]; g_end_suf = g_start_suf + suffix_len - 1; if (start <= g_start_suf && end >= g_end_suf) { suffix_mng_update(chrom, r_start_suf, r_end_suf, g_start_suf, g_end_suf, suffix_mng); } } } } } // using suffix manager instead of the previous cal manager suffix_mng_create_cals(read, read->length / 3, strand, sa_index, cal_list, suffix_mng); }