Example #1
0
static PyObject *
chunker_process(Chunker *c)
{
    uint32_t sum, chunk_mask = c->chunk_mask, min_size = c->min_size, window_size = c->window_size;
    int n = 0;

    if(c->done) {
        if(c->bytes_read == c->bytes_yielded)
            PyErr_SetNone(PyExc_StopIteration);
        else
            PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
        return NULL;
    }
    if(c->remaining <= window_size) {
        if(!chunker_fill(c)) {
            return NULL;
        }
    }
    if(c->remaining < window_size) {
        c->done = 1;
        if(c->remaining) {
            c->bytes_yielded += c->remaining;
            return PyBuffer_FromMemory(c->data + c->position, c->remaining);
        }
        else {
            if(c->bytes_read == c->bytes_yielded)
                PyErr_SetNone(PyExc_StopIteration);
            else
                PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
            return NULL;
        }
    }
    sum = buzhash(c->data + c->position, window_size, c->table);
    while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) {
        sum = buzhash_update(sum, c->data[c->position],
                             c->data[c->position + window_size],
                             window_size, c->table);
        c->position++;
        c->remaining--;
        n++;
        if(c->remaining <= window_size) {
            if(!chunker_fill(c)) {
                return NULL;
            }
        }
    }
    if(c->remaining <= window_size) {
        c->position += c->remaining;
        c->remaining = 0;
    }
    int old_last = c->last;
    c->last = c->position;
    n = c->last - old_last;
    c->bytes_yielded += n;
    return PyBuffer_FromMemory(c->data + old_last, n);
    
}
Example #2
0
static inline int
str_search_find (kk_str_search_t *search, const char *haystack,
    kk_str_match_t *match, int return_on_first_match)
{
  uint64_t m;
  uint32_t h;
  size_t i;

  /* Clear results of previous calls */
  *match = 0ull;

  /* Make sure haystack isn't smaller than our minimum required length */
  for (i = 0; i < search->m; i++)
    if (haystack[i] == '\0')
      return 0;

  buzhash_init (&h, (const unsigned char *) haystack, search->m);
  for (;;) {
    m = bloom_mask (h);

    /**
     * Current position in haystack might math one of our patterns (needles).
     * We go through each pattern and check if it really matches.
     */
    if ((search->bloom & m) == m) {
      for (i = 0; i < search->len; i++)
        if (str_pattern_is_match (search->pattern + i, haystack, h)) {
          *match |= m;
          if (return_on_first_match)
            return 1;
          if ((search->bloom & *match) == search->bloom)
            return 1;
        }
    }

    if (haystack[search->m] == '\0')
      break;

    buzhash_update (&h, (const unsigned char *) haystack, search->m);
    haystack++;
  }
  return 0;
}
Example #3
0
static PyObject *
chunker_process(Chunker *c)
{
    uint32_t sum, chunk_mask = c->chunk_mask;
    size_t n = 0, old_last, min_size = c->min_size, window_size = c->window_size;

    if(c->done) {
        if(c->bytes_read == c->bytes_yielded)
            PyErr_SetNone(PyExc_StopIteration);
        else
            PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
        return NULL;
    }
    while(c->remaining < min_size + window_size + 1 && !c->eof) {  /* see assert in Chunker init */
        if(!chunker_fill(c)) {
            return NULL;
        }
    }
    /* here we either are at eof ... */
    if(c->eof) {
        c->done = 1;
        if(c->remaining) {
            c->bytes_yielded += c->remaining;
            return PyMemoryView_FromMemory((char *)(c->data + c->position), c->remaining, PyBUF_READ);
        }
        else {
            if(c->bytes_read == c->bytes_yielded)
                PyErr_SetNone(PyExc_StopIteration);
            else
                PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
            return NULL;
        }
    }
    /* ... or we have at least min_size + window_size + 1 bytes remaining.
     * We do not want to "cut" a chunk smaller than min_size and the hash
     * window starts at the potential cutting place.
     */
    c->position += min_size;
    c->remaining -= min_size;
    n += min_size;
    sum = buzhash(c->data + c->position, window_size, c->table);
    while(c->remaining > c->window_size && (sum & chunk_mask)) {
        sum = buzhash_update(sum, c->data[c->position],
                             c->data[c->position + window_size],
                             window_size, c->table);
        c->position++;
        c->remaining--;
        n++;
        if(c->remaining <= window_size) {
            if(!chunker_fill(c)) {
                return NULL;
            }
        }
    }
    if(c->remaining <= window_size) {
        c->position += c->remaining;
        c->remaining = 0;
    }
    old_last = c->last;
    c->last = c->position;
    n = c->last - old_last;
    c->bytes_yielded += n;
    return PyMemoryView_FromMemory((char *)(c->data + old_last), n, PyBUF_READ);
}