static PyObject * chunker_process(Chunker *c) { uint32_t sum, chunk_mask = c->chunk_mask, min_size = c->min_size, window_size = c->window_size; int n = 0; if(c->done) { if(c->bytes_read == c->bytes_yielded) PyErr_SetNone(PyExc_StopIteration); else PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch"); return NULL; } if(c->remaining <= window_size) { if(!chunker_fill(c)) { return NULL; } } if(c->remaining < window_size) { c->done = 1; if(c->remaining) { c->bytes_yielded += c->remaining; return PyBuffer_FromMemory(c->data + c->position, c->remaining); } else { if(c->bytes_read == c->bytes_yielded) PyErr_SetNone(PyExc_StopIteration); else PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch"); return NULL; } } sum = buzhash(c->data + c->position, window_size, c->table); while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) { sum = buzhash_update(sum, c->data[c->position], c->data[c->position + window_size], window_size, c->table); c->position++; c->remaining--; n++; if(c->remaining <= window_size) { if(!chunker_fill(c)) { return NULL; } } } if(c->remaining <= window_size) { c->position += c->remaining; c->remaining = 0; } int old_last = c->last; c->last = c->position; n = c->last - old_last; c->bytes_yielded += n; return PyBuffer_FromMemory(c->data + old_last, n); }
static inline int str_search_find (kk_str_search_t *search, const char *haystack, kk_str_match_t *match, int return_on_first_match) { uint64_t m; uint32_t h; size_t i; /* Clear results of previous calls */ *match = 0ull; /* Make sure haystack isn't smaller than our minimum required length */ for (i = 0; i < search->m; i++) if (haystack[i] == '\0') return 0; buzhash_init (&h, (const unsigned char *) haystack, search->m); for (;;) { m = bloom_mask (h); /** * Current position in haystack might math one of our patterns (needles). * We go through each pattern and check if it really matches. */ if ((search->bloom & m) == m) { for (i = 0; i < search->len; i++) if (str_pattern_is_match (search->pattern + i, haystack, h)) { *match |= m; if (return_on_first_match) return 1; if ((search->bloom & *match) == search->bloom) return 1; } } if (haystack[search->m] == '\0') break; buzhash_update (&h, (const unsigned char *) haystack, search->m); haystack++; } return 0; }
static PyObject * chunker_process(Chunker *c) { uint32_t sum, chunk_mask = c->chunk_mask; size_t n = 0, old_last, min_size = c->min_size, window_size = c->window_size; if(c->done) { if(c->bytes_read == c->bytes_yielded) PyErr_SetNone(PyExc_StopIteration); else PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch"); return NULL; } while(c->remaining < min_size + window_size + 1 && !c->eof) { /* see assert in Chunker init */ if(!chunker_fill(c)) { return NULL; } } /* here we either are at eof ... */ if(c->eof) { c->done = 1; if(c->remaining) { c->bytes_yielded += c->remaining; return PyMemoryView_FromMemory((char *)(c->data + c->position), c->remaining, PyBUF_READ); } else { if(c->bytes_read == c->bytes_yielded) PyErr_SetNone(PyExc_StopIteration); else PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch"); return NULL; } } /* ... or we have at least min_size + window_size + 1 bytes remaining. * We do not want to "cut" a chunk smaller than min_size and the hash * window starts at the potential cutting place. */ c->position += min_size; c->remaining -= min_size; n += min_size; sum = buzhash(c->data + c->position, window_size, c->table); while(c->remaining > c->window_size && (sum & chunk_mask)) { sum = buzhash_update(sum, c->data[c->position], c->data[c->position + window_size], window_size, c->table); c->position++; c->remaining--; n++; if(c->remaining <= window_size) { if(!chunker_fill(c)) { return NULL; } } } if(c->remaining <= window_size) { c->position += c->remaining; c->remaining = 0; } old_last = c->last; c->last = c->position; n = c->last - old_last; c->bytes_yielded += n; return PyMemoryView_FromMemory((char *)(c->data + old_last), n, PyBUF_READ); }