struct hash_search_state hash_search( struct hash * root, char * key ) { struct hash * parent = NULL; struct hash * node = root; struct hash * child; char * str = key; int num_matched; char config; char parent_choice = '\0'; char next_char = '\0'; struct charhash * child_charhash; while ( node ) { num_matched = longest_common_prefix( str, node->sigstr, &config ); if ( config != 'c' ) { break; } next_char = *( str + num_matched ); if ( !( child_charhash = charhashlookup( node->child, next_char ) ) ) { break; // No child corresponding to this character. } str += ( num_matched + 1 ); parent = node; node = child_charhash->data; parent_choice = next_char; } struct hash_search_state search_state = { parent, node, parent_choice, str, num_matched, config }; return search_state; }
void test_longest_common_prefix() { assert(longest_common_prefix("foo", {"foob", "foobar"}) == "foob"); assert(longest_common_prefix("foo", {"foob"}) == "foob"); assert(longest_common_prefix("foo", {"foo", "foobar"}) == "foo"); assert(longest_common_prefix("k", {"kbc1", "kbc2", "kbc2"}) == "kbc"); assert(longest_common_prefix("k", {"kbc1"}) == "kbc1"); assert(longest_common_prefix("k", {"kbc1", "kbb", "kbc2"}) == "kb"); assert(longest_common_prefix("", {"kbc1", "kbb", "kbc2"}) == "kb"); assert(longest_common_prefix("k", {"kba1", "kba2", "kba3"}) == "kba"); }
unsigned int CSD_PFC::locateInBlock(size_t block, const unsigned char *str, unsigned int len) { if(block>=nblocks){ return 0; } uint64_t delta = 0; unsigned int idInBlock = 0; unsigned int commonPrefix = 0; size_t pos = blocks->get(block); // Read the first string std::string tmpStr((char*)text+pos); pos+=tmpStr.length()+1; idInBlock++; // Read the rest while ( (idInBlock<blocksize) && (pos<bytes)) { // Decode the prefix pos += VByte::decode(text+pos, text+bytes, &delta); // Copy the suffix tmpStr.resize(delta); tmpStr.append((char*)(text+pos)); if (delta >= commonPrefix) { // Compare tmpString with the searched one, only after commonPrefix characters. // (We already knew that commonPrefix was common anyway). commonPrefix += longest_common_prefix( (unsigned char*)tmpStr.c_str()+commonPrefix, str+commonPrefix, tmpStr.length()-commonPrefix, len-commonPrefix ); // We found it! if ((commonPrefix == len) && (tmpStr.length() == len)) { return idInBlock; } } else { // The common prefix is even worse than before, not found. return 0; } pos += tmpStr.length()+1-delta; idInBlock++; } // We checked the whole block but did not find it. return 0; }
/** * g_filename_completer_get_completion_suffix: * @completer: the filename completer. * @initial_text: text to be completed. * * Obtains a completion for @initial_text from @completer. * * Returns: a completed string, or %NULL if no completion exists. * This string is not owned by GIO, so remember to g_free() it * when finished. **/ char * g_filename_completer_get_completion_suffix (GFilenameCompleter *completer, const char *initial_text) { GList *possible_matches, *l; char *prefix; char *suffix; char *possible_match; char *lcp; g_return_val_if_fail (G_IS_FILENAME_COMPLETER (completer), NULL); g_return_val_if_fail (initial_text != NULL, NULL); possible_matches = init_completion (completer, initial_text, &prefix); suffix = NULL; for (l = possible_matches; l != NULL; l = l->next) { possible_match = l->data; if (g_str_has_prefix (possible_match, prefix)) { if (suffix == NULL) suffix = g_strdup (possible_match + strlen (prefix)); else { lcp = longest_common_prefix (suffix, possible_match + strlen (prefix)); g_free (suffix); suffix = lcp; if (*suffix == 0) break; } } } g_free (prefix); return suffix; }
CSD_PFC::CSD_PFC(hdt::IteratorUCharString *it, uint32_t blocksize, hdt::ProgressListener *listener) : isMapped(false) { this->type = PFC; this->numstrings = 0; this->bytes = 0; this->blocksize = blocksize; this->nblocks = 0; uint64_t reservedSize = 1024; text = (unsigned char*)malloc(reservedSize*sizeof(unsigned char)); // Pointers to the first string of each block. blocks = new hdt::LogSequence2(sizeof(size_t)==8 ? 34 : 32); unsigned char *currentStr = NULL; size_t currentLength = 0; string previousStr; while (it->hasNext()) { currentStr = it->next(); currentLength = strlen( (char*) currentStr); // Realloc size of the buffer if necessary. // +1 for string terminator +10 for VByte encoding (worst case) if ((bytes+currentLength+11) > reservedSize) { reservedSize = (bytes+currentLength+10)*2; text = (unsigned char*)realloc(text, reservedSize*sizeof(unsigned char)); } if ((numstrings % blocksize) == 0) { // First string in the current block! blocks->push_back(bytes); nblocks++; // The string is explicitly copied to the encoded sequence. strncpy((char*)(text+bytes), (char*)currentStr, currentLength); bytes+=currentLength; } else { // Regular string // Calculate the length of the common prefix unsigned int delta = longest_common_prefix((unsigned char *)previousStr.c_str(), currentStr, previousStr.length(), currentLength); // The prefix is differentially encoded bytes += VByte::encode(text+bytes, delta); // The suffix is copied to the sequence strncpy((char*)(text+bytes), (char*)currentStr+delta, currentLength-delta); bytes+=currentLength-delta; } text[bytes] = '\0'; bytes++; // New string processed numstrings++; // Save previous previousStr.assign((char*)currentStr); NOTIFYCOND(listener, "Converting dictionary to PFC", numstrings, it->getNumberOfElements()); it->freeStr(currentStr); } // Storing the final byte position in the vector of positions blocks->push_back(bytes); // Trunc encoded sequence to save unused memory text = (unsigned char *) realloc(text, bytes*sizeof(unsigned char)); blocks->reduceBits(); }
static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *key, int key_len, void *value, int depth, int *old) { // If we are at a NULL node, inject a leaf if (!n) { *ref = (art_node*)SET_LEAF(make_leaf(key, key_len, value)); return NULL; } // If we are at a leaf, we need to replace it with a node if (IS_LEAF(n)) { art_leaf *l = LEAF_RAW(n); // Check if we are updating an existing value if (!leaf_matches(l, key, key_len, depth)) { *old = 1; void *old_val = l->value; l->value = value; return old_val; } // New value, we must split the leaf into a node4 art_node4 *new_node = (art_node4*)alloc_node(NODE4); // Create a new leaf art_leaf *l2 = make_leaf(key, key_len, value); // Determine longest prefix int longest_prefix = longest_common_prefix(l, l2, depth); new_node->n.partial_len = longest_prefix; memcpy(new_node->n.partial, key+depth, min(MAX_PREFIX_LEN, longest_prefix)); // Add the leafs to the new node4 *ref = (art_node*)new_node; add_child4(new_node, ref, l->key[depth+longest_prefix], SET_LEAF(l)); add_child4(new_node, ref, l2->key[depth+longest_prefix], SET_LEAF(l2)); return NULL; } // Check if given node has a prefix if (n->partial_len) { // Determine if the prefixes differ, since we need to split int prefix_diff = prefix_mismatch(n, key, key_len, depth); if ((uint32_t)prefix_diff >= n->partial_len) { depth += n->partial_len; goto RECURSE_SEARCH; } // Create a new node art_node4 *new_node = (art_node4*)alloc_node(NODE4); *ref = (art_node*)new_node; new_node->n.partial_len = prefix_diff; memcpy(new_node->n.partial, n->partial, min(MAX_PREFIX_LEN, prefix_diff)); // Adjust the prefix of the old node if (n->partial_len <= MAX_PREFIX_LEN) { add_child4(new_node, ref, n->partial[prefix_diff], n); n->partial_len -= (prefix_diff+1); memmove(n->partial, n->partial+prefix_diff+1, min(MAX_PREFIX_LEN, n->partial_len)); } else { n->partial_len -= (prefix_diff+1); art_leaf *l = minimum(n); add_child4(new_node, ref, l->key[depth+prefix_diff], n); memcpy(n->partial, l->key+depth+prefix_diff+1, min(MAX_PREFIX_LEN, n->partial_len)); } // Insert the new leaf art_leaf *l = make_leaf(key, key_len, value); add_child4(new_node, ref, key[depth+prefix_diff], SET_LEAF(l)); return NULL; } RECURSE_SEARCH:; // Find a child to recurse to art_node **child = find_child(n, key[depth]); if (child) { return recursive_insert(*child, child, key, key_len, value, depth+1, old); } // No child, node goes within us art_leaf *l = make_leaf(key, key_len, value); add_child(n, ref, key[depth], SET_LEAF(l)); return NULL; }