Ejemplo n.º 1
0
std::vector<std::tuple<std::string, unsigned int, unsigned int> > CompactTrie::get_dlwords_aux( unsigned int offset, std::string current_word, std::string word, unsigned int distance, std::string word_partial )
{
  unsigned int value = read_binary_unsigned_int_void_ptr( offset_void_pointer(file_mmap_, offset), COMPACT_TRIE_VALUE_SIZE );
  offset += COMPACT_TRIE_VALUE_SIZE;
  unsigned int frequency = read_binary_unsigned_int_void_ptr( offset_void_pointer(file_mmap_, offset), COMPACT_TRIE_FREQUENCY_SIZE );
  offset += COMPACT_TRIE_FREQUENCY_SIZE;
  unsigned int children_offset = read_binary_unsigned_int_void_ptr( offset_void_pointer(file_mmap_, offset), COMPACT_TRIE_CHILDREN_OFFSET_SIZE );
  offset += COMPACT_TRIE_CHILDREN_OFFSET_SIZE;
  unsigned int n_children = read_binary_unsigned_int_void_ptr( offset_void_pointer(file_mmap_, offset), COMPACT_TRIE_N_CHILDREN_SIZE );
  offset += COMPACT_TRIE_N_CHILDREN_SIZE;

  current_word += (char)value;

  unsigned int current_distance = damerau_levenshtein_distance(current_word.c_str(), word.c_str());
  unsigned int current_distance_partial = damerau_levenshtein_distance(current_word.c_str(), word_partial.c_str());

  std::vector<std::tuple<std::string, unsigned int, unsigned int> > ret;

  if ( distance == 0 && current_distance_partial > 0 )
    return ret;

  unsigned int length_diff = std::abs( (int)current_word.size() - (int)word.size() );

  if ( current_distance_partial > distance*2 )
    return ret;


  if ( n_children == 0 || frequency != 0 )
  {
    if ( current_distance <= distance )
      ret.push_back( std::make_tuple( current_word, current_distance, frequency ) );
  }

  if ( n_children > 0 )
  {
    for ( unsigned int i = 0; i < n_children; ++i )
    {
      std::string new_word_partial = word_partial;
      if ( word_partial.size() < word.size() )
        new_word_partial += word[current_word.size()];
      auto vect = get_dlwords_aux( children_offset + i*COMPACT_TRIE_NODE_SIZE, current_word, word, distance, new_word_partial );
      ret.insert(ret.end(), vect.begin(), vect.end());
    }
  }

  return ret;
}
Ejemplo n.º 2
0
static PyObject* jellyfish_damerau_levenshtein_distance(PyObject *self,
                                                     PyObject *args)
{
    const char *s1, *s2;
    int result;

    if (!PyArg_ParseTuple(args, "ss", &s1, &s2)) {
        return NULL;
    }

    result = damerau_levenshtein_distance(s1, s2);
    if (result == -1) {
        PyErr_NoMemory();
        return NULL;
    }

    return Py_BuildValue("i", result);
}