int rrl_setlocks(rrl_table_t *rrl, unsigned granularity) { if (!rrl) return KNOT_EINVAL; assert(!rrl->lk); /* Cannot change while locks are used. */ assert(granularity <= rrl->size / 10); /* Due to int. division err. */ if (pthread_mutex_init(&rrl->ll, NULL) < 0) { return KNOT_ENOMEM; } /* Alloc new locks. */ rrl->lk = malloc(granularity * sizeof(pthread_mutex_t)); if (!rrl->lk) return KNOT_ENOMEM; memset(rrl->lk, 0, granularity * sizeof(pthread_mutex_t)); /* Initialize. */ for (size_t i = 0; i < granularity; ++i) { if (pthread_mutex_init(rrl->lk + i, NULL) < 0) break; ++rrl->lk_count; } /* Incomplete initialization */ if (rrl->lk_count != granularity) { for (size_t i = 0; i < rrl->lk_count; ++i) { pthread_mutex_destroy(rrl->lk + i); } free(rrl->lk); rrl->lk_count = 0; dbg_rrl("%s: failed to init locks\n", __func__); return KNOT_ERROR; } dbg_rrl("%s: set granularity to '%u'\n", __func__, granularity); return KNOT_EOK; }
static inline unsigned reduce_dist(rrl_table_t *t, unsigned id, unsigned d, unsigned *f) { unsigned rd = HOP_LEN - 1; while (rd > 0) { unsigned s = (t->size + *f - rd) % t->size; /* bucket to be vacated */ unsigned o = __builtin_ctz(t->arr[s].hop); /* offset of first valid bucket */ if (t->arr[s].hop != 0 && o < rd) { /* only offsets in <s, f> are interesting */ unsigned e = (s + o) % t->size; /* this item will be displaced to [f] */ unsigned keep_hop = t->arr[*f].hop; /* unpredictable padding */ memcpy(t->arr + *f, t->arr + e, sizeof(rrl_item_t)); t->arr[*f].hop = keep_hop; t->arr[e].cls = CLS_NULL; t->arr[s].hop &= ~(1<<o); t->arr[s].hop |= 1<<rd; *f = e; return d - (rd - o); } --rd; } assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */ *f = id; d = 0; /* force vacate initial element */ dbg_rrl("%s: no potential relocation, freeing bucket %u\n", __func__, id); return d; }
int rrl_destroy(rrl_table_t *rrl) { if (rrl) { dbg_rrl("%s: freeing table %p\n", __func__, rrl); if (rrl->lk_count > 0) pthread_mutex_destroy(&rrl->ll); for (size_t i = 0; i < rrl->lk_count; ++i) { pthread_mutex_destroy(rrl->lk + i); } free(rrl->lk); } free(rrl); return KNOT_EOK; }
rrl_table_t *rrl_create(size_t size) { if (size == 0) { return NULL; } const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t); rrl_table_t *t = malloc(tbl_len); if (!t) return NULL; memset(t, 0, sizeof(rrl_table_t)); t->size = size; rrl_reseed(t); dbg_rrl("%s: created table size '%zu'\n", __func__, t->size); return t; }
static int find_free(rrl_table_t *t, unsigned i, uint32_t now) { rrl_item_t *np = t->arr + t->size; rrl_item_t *b = NULL; for (b = t->arr + i; b != np; ++b) { if (bucket_free(b, now)) { return b - (t->arr + i); } } np = t->arr + i; for (b = t->arr; b != np; ++b) { if (bucket_free(b, now)) { return (b - t->arr) + (t->size - i); } } /* this happens if table is full... force vacate current elm */ dbg_rrl("%s: out of free buckets, freeing bucket %u\n", __func__, i); return i; }
int rrl_reseed(rrl_table_t *rrl) { /* Lock entire table. */ if (rrl->lk_count > 0) { pthread_mutex_lock(&rrl->ll); for (unsigned i = 0; i < rrl->lk_count; ++i) { rrl_lock(rrl, i); } } memset(rrl->arr, 0, rrl->size * sizeof(rrl_item_t)); rrl->seed = dnssec_random_uint32_t(); dbg_rrl("%s: reseed to '%u'\n", __func__, rrl->seed); if (rrl->lk_count > 0) { for (unsigned i = 0; i < rrl->lk_count; ++i) { rrl_unlock(rrl, i); } pthread_mutex_unlock(&rrl->ll); } return KNOT_EOK; }
static int rrl_clsname(char *dst, size_t maxlen, uint8_t cls, rrl_req_t *p, const knot_zone_t *z) { const knot_dname_t *dn = NULL; const uint8_t *n = (const uint8_t*)"\x00"; /* Fallback zone (for errors etc.) */ int nb = 1; if (z) { /* Found associated zone. */ dn = knot_zone_name(z); } switch (cls) { case CLS_ERROR: /* Could be a non-existent zone or garbage. */ case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */ case CLS_WILDCARD: /* Queries to names covered by a wildcard. */ dbg_rrl_verb("%s: using zone/fallback name\n", __func__); break; default: if (p->qst) dn = p->qst->qname; break; } if (dn) { /* Check used dname. */ assert(dn); /* Should be always set. */ n = knot_dname_name(dn); nb = (int)knot_dname_size(dn); } /* Write to wire */ if (nb > maxlen) return KNOT_ESPACE; if (memcpy(dst, n, nb) == NULL) { dbg_rrl("%s: failed to serialize name=%p len=%u\n", __func__, n, nb); return KNOT_ERROR; } return nb; }
int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *a, rrl_req_t *req, const zone_t *zone) { if (!rrl || !req || !a) return KNOT_EINVAL; /* Calculate hash and fetch */ int ret = KNOT_EOK; int lock = -1; uint32_t now = time(NULL); rrl_item_t *b = rrl_hash(rrl, a, req, zone, now, &lock); if (!b) { dbg_rrl("%s: failed to compute bucket from packet\n", __func__); if (lock > -1) rrl_unlock(rrl, lock); return KNOT_ERROR; } /* Calculate rate for dT */ uint32_t dt = now - b->time; if (dt > RRL_CAPACITY) { dt = RRL_CAPACITY; } /* Visit bucket. */ b->time = now; dbg_rrl("%s: bucket=0x%x tokens=%hu flags=%x dt=%u\n", __func__, (unsigned)(b - rrl->arr), b->ntok, b->flags, dt); if (dt > 0) { /* Window moved. */ /* Check state change. */ if ((b->ntok > 0 || dt > 1) && (b->flags & RRL_BF_ELIMIT)) { b->flags &= ~RRL_BF_ELIMIT; rrl_log_state(a, b->flags, b->cls); } /* Add new tokens. */ uint32_t dn = rrl->rate * dt; if (b->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */ b->flags &= ~RRL_BF_SSTART; dbg_rrl("%s: bucket '0x%x' slow-start finished\n", __func__, (unsigned)(b - rrl->arr)); } b->ntok += dn; if (b->ntok > RRL_CAPACITY * rrl->rate) { b->ntok = RRL_CAPACITY * rrl->rate; } } /* Last item taken. */ if (b->ntok == 1 && !(b->flags & RRL_BF_ELIMIT)) { b->flags |= RRL_BF_ELIMIT; rrl_log_state(a, b->flags, b->cls); } /* Decay current bucket. */ if (b->ntok > 0) { --b->ntok; } else if (b->ntok == 0) { ret = KNOT_ELIMIT; } if (lock > -1) rrl_unlock(rrl, lock); return ret; }
rrl_item_t* rrl_hash(rrl_table_t *t, const struct sockaddr_storage *a, rrl_req_t *p, const zone_t *zone, uint32_t stamp, int *lock) { char buf[RRL_CLSBLK_MAXLEN]; int len = rrl_classify(buf, sizeof(buf), a, p, zone, t->seed); if (len < 0) { return NULL; } uint32_t id = hash(buf, len) % t->size; /* Lock for lookup. */ pthread_mutex_lock(&t->ll); /* Find an exact match in <id, id + HOP_LEN). */ uint16_t *qname = (uint16_t*)(buf + sizeof(uint8_t) + sizeof(uint64_t)); rrl_item_t match = { 0, *((uint64_t*)(buf + 1)), t->rate, /* hop, netblk, ntok */ buf[0], RRL_BF_NULL, /* cls, flags */ hash((char*)(qname + 1), *qname), stamp /* qname, time*/ }; unsigned d = find_match(t, id, &match); if (d > HOP_LEN) { /* not an exact match, find free element [f] */ d = find_free(t, id, stamp); } /* Reduce distance to fit <id, id + HOP_LEN) */ unsigned f = (id + d) % t->size; while (d >= HOP_LEN) { d = reduce_dist(t, id, d, &f); } /* Assign granular lock and unlock lookup. */ *lock = f % t->lk_count; rrl_lock(t, *lock); pthread_mutex_unlock(&t->ll); /* found free elm 'k' which is in <id, id + HOP_LEN) */ t->arr[id].hop |= (1 << d); rrl_item_t* b = t->arr + f; assert(f == (id+d) % t->size); dbg_rrl("%s: classified pkt as %4x '%u+%u' bucket=%p \n", __func__, f, id, d, b); /* Inspect bucket state. */ unsigned hop = b->hop; if (b->cls == CLS_NULL) { memcpy(b, &match, sizeof(rrl_item_t)); b->hop = hop; } /* Check for collisions. */ if (!bucket_match(b, &match)) { dbg_rrl("%s: collision in bucket '%4x'\n", __func__, id); if (!(b->flags & RRL_BF_SSTART)) { memcpy(b, &match, sizeof(rrl_item_t)); b->hop = hop; b->ntok = t->rate + t->rate / RRL_SSTART; b->flags |= RRL_BF_SSTART; dbg_rrl("%s: bucket '%4x' slow-start\n", __func__, id); } } return b; }