/* This function updates the server trees according to server <srv>'s new * state. It should be called when server <srv>'s status changes to up. * It is not important whether the server was already down or not. It is not * important either that the new state is completely UP (the caller may not * know all the variables of a server's state). This function will not change * the weight of a server which was already up. */ static void fwrr_set_server_status_up(struct server *srv) { struct proxy *p = srv->proxy; struct fwrr_group *grp; if (srv->state == srv->prev_state && srv->eweight == srv->prev_eweight) return; if (!srv_is_usable(srv->state, srv->eweight)) goto out_update_state; if (srv_is_usable(srv->prev_state, srv->prev_eweight)) /* server was already up */ goto out_update_backend; grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; grp->next_weight += srv->eweight; if (srv->state & SRV_BACKUP) { p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; p->srv_bck++; if (!(p->options & PR_O_USE_ALL_BK)) { if (!p->lbprm.fbck) { /* there was no backup server anymore */ p->lbprm.fbck = srv; } else { /* we may have restored a backup server prior to fbck, * in which case it should replace it. */ struct server *srv2 = srv; do { srv2 = srv2->next; } while (srv2 && (srv2 != p->lbprm.fbck)); if (srv2) p->lbprm.fbck = srv; } } } else { p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; p->srv_act++; } /* note that eweight cannot be 0 here */ fwrr_get_srv(srv); srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight; fwrr_queue_srv(srv); out_update_backend: /* check/update tot_used, tot_weight */ update_backend_weight(p); out_update_state: srv->prev_state = srv->state; srv->prev_eweight = srv->eweight; }
/* This function updates the server trees according to server <srv>'s new * state. It should be called when server <srv>'s status changes to down. * It is not important whether the server was already down or not. It is not * important either that the new state is completely down (the caller may not * know all the variables of a server's state). */ static void fwrr_set_server_status_down(struct server *srv) { struct proxy *p = srv->proxy; struct fwrr_group *grp; if (srv->state == srv->prev_state && srv->eweight == srv->prev_eweight) return; if (srv_is_usable(srv->state, srv->eweight)) goto out_update_state; if (!srv_is_usable(srv->prev_state, srv->prev_eweight)) /* server was already down */ goto out_update_backend; grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; grp->next_weight -= srv->prev_eweight; if (srv->state & SRV_BACKUP) { p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; p->srv_bck--; if (srv == p->lbprm.fbck) { /* we lost the first backup server in a single-backup * configuration, we must search another one. */ struct server *srv2 = p->lbprm.fbck; do { srv2 = srv2->next; } while (srv2 && !((srv2->state & SRV_BACKUP) && srv_is_usable(srv2->state, srv2->eweight))); p->lbprm.fbck = srv2; } } else { p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; p->srv_act--; } fwrr_dequeue_srv(srv); fwrr_remove_from_tree(srv); out_update_backend: /* check/update tot_used, tot_weight */ update_backend_weight(p); out_update_state: srv->prev_state = srv->state; srv->prev_eweight = srv->eweight; }
/* This function is responsible for building the trees in case of fast * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to * uweight ratio. Both active and backup groups are initialized. */ void fas_init_server_tree(struct proxy *p) { struct server *srv; struct eb_root init_head = EB_ROOT; p->lbprm.set_server_status_up = fas_set_server_status_up; p->lbprm.set_server_status_down = fas_set_server_status_down; p->lbprm.update_server_eweight = fas_update_server_weight; p->lbprm.server_take_conn = fas_srv_reposition; p->lbprm.server_drop_conn = fas_srv_reposition; p->lbprm.wdiv = BE_WEIGHT_SCALE; for (srv = p->srv; srv; srv = srv->next) { srv->eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult; srv_lb_commit_status(srv); } recount_servers(p); update_backend_weight(p); p->lbprm.fas.act = init_head; p->lbprm.fas.bck = init_head; /* queue active and backup servers in two distinct groups */ for (srv = p->srv; srv; srv = srv->next) { if (!srv_is_usable(srv)) continue; srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fas.bck : &p->lbprm.fas.act; fas_queue_srv(srv); } }
/* This function updates the server trees according to server <srv>'s new * state. It should be called when server <srv>'s status changes to down. * It is not important whether the server was already down or not. It is not * important either that the new state is completely down (the caller may not * know all the variables of a server's state). */ static void fas_set_server_status_down(struct server *srv) { struct proxy *p = srv->proxy; if (!srv_lb_status_changed(srv)) return; if (srv_is_usable(srv)) goto out_update_state; if (!srv_was_usable(srv)) /* server was already down */ goto out_update_backend; if (srv->state & SRV_BACKUP) { p->lbprm.tot_wbck -= srv->prev_eweight; p->srv_bck--; if (srv == p->lbprm.fbck) { /* we lost the first backup server in a single-backup * configuration, we must search another one. */ struct server *srv2 = p->lbprm.fbck; do { srv2 = srv2->next; } while (srv2 && !((srv2->state & SRV_BACKUP) && srv_is_usable(srv2))); p->lbprm.fbck = srv2; } } else { p->lbprm.tot_wact -= srv->prev_eweight; p->srv_act--; } fas_dequeue_srv(srv); fas_remove_from_tree(srv); out_update_backend: /* check/update tot_used, tot_weight */ update_backend_weight(p); out_update_state: srv_lb_commit_status(srv); }
/* This function updates the server trees according to server <srv>'s new * state. It should be called when server <srv>'s status changes to up. * It is not important whether the server was already down or not. It is not * important either that the new state is completely UP (the caller may not * know all the variables of a server's state). This function will not change * the weight of a server which was already up. */ static void fas_set_server_status_up(struct server *srv) { struct proxy *p = srv->proxy; if (!srv_lb_status_changed(srv)) return; if (!srv_is_usable(srv)) goto out_update_state; if (srv_was_usable(srv)) /* server was already up */ goto out_update_backend; if (srv->state & SRV_BACKUP) { srv->lb_tree = &p->lbprm.fas.bck; p->lbprm.tot_wbck += srv->eweight; p->srv_bck++; if (!(p->options & PR_O_USE_ALL_BK)) { if (!p->lbprm.fbck) { /* there was no backup server anymore */ p->lbprm.fbck = srv; } else { /* we may have restored a backup server prior to fbck, * in which case it should replace it. */ struct server *srv2 = srv; do { srv2 = srv2->next; } while (srv2 && (srv2 != p->lbprm.fbck)); if (srv2) p->lbprm.fbck = srv; } } } else { srv->lb_tree = &p->lbprm.fas.act; p->lbprm.tot_wact += srv->eweight; p->srv_act++; } /* note that eweight cannot be 0 here */ fas_queue_srv(srv); out_update_backend: /* check/update tot_used, tot_weight */ update_backend_weight(p); out_update_state: srv_lb_commit_status(srv); }
/* This function must be called after an update to server <srv>'s effective * weight. It may be called after a state change too. */ static void fas_update_server_weight(struct server *srv) { int old_state, new_state; struct proxy *p = srv->proxy; if (!srv_lb_status_changed(srv)) return; /* If changing the server's weight changes its state, we simply apply * the procedures we already have for status change. If the state * remains down, the server is not in any tree, so it's as easy as * updating its values. If the state remains up with different weights, * there are some computations to perform to find a new place and * possibly a new tree for this server. */ old_state = srv_was_usable(srv); new_state = srv_is_usable(srv); if (!old_state && !new_state) { srv_lb_commit_status(srv); return; } else if (!old_state && new_state) { fas_set_server_status_up(srv); return; } else if (old_state && !new_state) { fas_set_server_status_down(srv); return; } if (srv->lb_tree) fas_dequeue_srv(srv); if (srv->state & SRV_BACKUP) { p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight; srv->lb_tree = &p->lbprm.fas.bck; } else { p->lbprm.tot_wact += srv->eweight - srv->prev_eweight; srv->lb_tree = &p->lbprm.fas.act; } fas_queue_srv(srv); update_backend_weight(p); srv_lb_commit_status(srv); }
/* This function is responsible for building the weight trees in case of fast * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight * ratio. Both active and backup groups are initialized. */ void fwrr_init_server_groups(struct proxy *p) { struct server *srv; struct eb_root init_head = EB_ROOT; p->lbprm.set_server_status_up = fwrr_set_server_status_up; p->lbprm.set_server_status_down = fwrr_set_server_status_down; p->lbprm.update_server_eweight = fwrr_update_server_weight; p->lbprm.wdiv = BE_WEIGHT_SCALE; for (srv = p->srv; srv; srv = srv->next) { srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE; srv->prev_state = srv->state; } recount_servers(p); update_backend_weight(p); /* prepare the active servers group */ p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight = p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact; p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 = p->lbprm.fwrr.act.t1 = init_head; p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0; p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1; /* prepare the backup servers group */ p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight = p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck; p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 = p->lbprm.fwrr.bck.t1 = init_head; p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0; p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1; /* queue active and backup servers in two distinct groups */ for (srv = p->srv; srv; srv = srv->next) { if (!srv_is_usable(srv->state, srv->eweight)) continue; fwrr_queue_by_weight((srv->state & SRV_BACKUP) ? p->lbprm.fwrr.bck.init : p->lbprm.fwrr.act.init, srv); } }
/* This function updates the map according to server <srv>'s new state */ static void map_set_server_status_up(struct server *srv) { struct proxy *p = srv->proxy; if (srv->state == srv->prev_state && srv->eweight == srv->prev_eweight) return; if (!srv_is_usable(srv->state, srv->eweight)) goto out_update_state; /* FIXME: could be optimized since we know what changed */ recount_servers(p); update_backend_weight(p); p->lbprm.map.state |= LB_MAP_RECALC; out_update_state: srv->prev_state = srv->state; srv->prev_eweight = srv->eweight; }
/* * This function recounts the number of usable active and backup servers for * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck. * This function also recomputes the total active and backup weights. However, * it does not update tot_weight nor tot_used. Use update_backend_weight() for * this. */ void recount_servers(struct proxy *px) { struct server *srv; px->srv_act = px->srv_bck = 0; px->lbprm.tot_wact = px->lbprm.tot_wbck = 0; px->lbprm.fbck = NULL; for (srv = px->srv; srv != NULL; srv = srv->next) { if (!srv_is_usable(srv)) continue; if (srv->flags & SRV_F_BACKUP) { if (!px->srv_bck && !(px->options & PR_O_USE_ALL_BK)) px->lbprm.fbck = srv; px->srv_bck++; px->lbprm.tot_wbck += srv->eweight; } else { px->srv_act++; px->lbprm.tot_wact += srv->eweight; } } }
/* queues a server into the appropriate group and tree depending on its * backup status, and ->npos. If the server is disabled, simply assign * it to the NULL tree. */ static void fwrr_queue_srv(struct server *s) { struct proxy *p = s->proxy; struct fwrr_group *grp; grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; /* Delay everything which does not fit into the window and everything * which does not fit into the theorical new window. */ if (!srv_is_usable(s->state, s->eweight)) { fwrr_remove_from_tree(s); } else if (s->eweight <= 0 || s->npos >= 2 * grp->curr_weight || s->npos >= grp->curr_weight + grp->next_weight) { /* put into next tree, and readjust npos in case we could * finally take this back to current. */ s->npos -= grp->curr_weight; fwrr_queue_by_weight(grp->next, s); } else { /* The sorting key is stored in units of s->npos * user_weight * in order to avoid overflows. As stated in backend.h, the * lower the scale, the rougher the weights modulation, and the * higher the scale, the lower the number of servers without * overflow. With this formula, the result is always positive, * so we can use eb32_insert(). */ s->lb_node.key = SRV_UWGHT_RANGE * s->npos + (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE; eb32_insert(&grp->curr, &s->lb_node); s->lb_tree = &grp->curr; } }
int assign_server(struct session *s) { struct connection *conn; struct server *conn_slot; struct server *srv, *prev_srv; int err; DPRINTF(stderr,"assign_server : s=%p\n",s); err = SRV_STATUS_INTERNAL; if (unlikely(s->pend_pos || s->flags & SN_ASSIGNED)) goto out_err; prev_srv = objt_server(s->target); conn_slot = s->srv_conn; /* We have to release any connection slot before applying any LB algo, * otherwise we may erroneously end up with no available slot. */ if (conn_slot) sess_change_server(s, NULL); /* We will now try to find the good server and store it into <objt_server(s->target)>. * Note that <objt_server(s->target)> may be NULL in case of dispatch or proxy mode, * as well as if no server is available (check error code). */ srv = NULL; s->target = NULL; conn = objt_conn(s->si[1].end); if (conn && (conn->flags & CO_FL_CONNECTED) && objt_server(conn->target) && __objt_server(conn->target)->proxy == s->be && ((s->txn.flags & TX_PREFER_LAST) || ((s->be->options & PR_O_PREF_LAST) && (!s->be->max_ka_queue || server_has_room(__objt_server(conn->target)) || (__objt_server(conn->target)->nbpend + 1) < s->be->max_ka_queue))) && srv_is_usable(__objt_server(conn->target))) { /* This session was relying on a server in a previous request * and the proxy has "option prefer-last-server" set, so * let's try to reuse the same server. */ srv = __objt_server(conn->target); s->target = &srv->obj_type; } else if (s->be->lbprm.algo & BE_LB_KIND) { /* we must check if we have at least one server available */ if (!s->be->lbprm.tot_weight) { err = SRV_STATUS_NOSRV; goto out; } /* First check whether we need to fetch some data or simply call * the LB lookup function. Only the hashing functions will need * some input data in fact, and will support multiple algorithms. */ switch (s->be->lbprm.algo & BE_LB_LKUP) { case BE_LB_LKUP_RRTREE: srv = fwrr_get_next_server(s->be, prev_srv); break; case BE_LB_LKUP_FSTREE: srv = fas_get_next_server(s->be, prev_srv); break; case BE_LB_LKUP_LCTREE: srv = fwlc_get_next_server(s->be, prev_srv); break; case BE_LB_LKUP_CHTREE: case BE_LB_LKUP_MAP: if ((s->be->lbprm.algo & BE_LB_KIND) == BE_LB_KIND_RR) { if (s->be->lbprm.algo & BE_LB_LKUP_CHTREE) srv = chash_get_next_server(s->be, prev_srv); else srv = map_get_server_rr(s->be, prev_srv); break; } else if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI) { /* unknown balancing algorithm */ err = SRV_STATUS_INTERNAL; goto out; } switch (s->be->lbprm.algo & BE_LB_PARM) { case BE_LB_HASH_SRC: conn = objt_conn(s->si[0].end); if (conn && conn->addr.from.ss_family == AF_INET) { srv = get_server_sh(s->be, (void *)&((struct sockaddr_in *)&conn->addr.from)->sin_addr, 4); } else if (conn && conn->addr.from.ss_family == AF_INET6) { srv = get_server_sh(s->be, (void *)&((struct sockaddr_in6 *)&conn->addr.from)->sin6_addr, 16); } else { /* unknown IP family */ err = SRV_STATUS_INTERNAL; goto out; } break; case BE_LB_HASH_URI: /* URI hashing */ if (s->txn.req.msg_state < HTTP_MSG_BODY) break; srv = get_server_uh(s->be, b_ptr(s->req.buf, -http_uri_rewind(&s->txn.req)), s->txn.req.sl.rq.u_l); break; case BE_LB_HASH_PRM: /* URL Parameter hashing */ if (s->txn.req.msg_state < HTTP_MSG_BODY) break; srv = get_server_ph(s->be, b_ptr(s->req.buf, -http_uri_rewind(&s->txn.req)), s->txn.req.sl.rq.u_l); if (!srv && s->txn.meth == HTTP_METH_POST) srv = get_server_ph_post(s); break; case BE_LB_HASH_HDR: /* Header Parameter hashing */ if (s->txn.req.msg_state < HTTP_MSG_BODY) break; srv = get_server_hh(s); break; case BE_LB_HASH_RDP: /* RDP Cookie hashing */ srv = get_server_rch(s); break; default: /* unknown balancing algorithm */ err = SRV_STATUS_INTERNAL; goto out; } /* If the hashing parameter was not found, let's fall * back to round robin on the map. */ if (!srv) { if (s->be->lbprm.algo & BE_LB_LKUP_CHTREE) srv = chash_get_next_server(s->be, prev_srv); else srv = map_get_server_rr(s->be, prev_srv); } /* end of map-based LB */ break; default: /* unknown balancing algorithm */ err = SRV_STATUS_INTERNAL; goto out; } if (!srv) { err = SRV_STATUS_FULL; goto out; } else if (srv != prev_srv) { s->be->be_counters.cum_lbconn++; srv->counters.cum_lbconn++; } s->target = &srv->obj_type; } else if (s->be->options & (PR_O_DISPATCH | PR_O_TRANSP)) { s->target = &s->be->obj_type; } else if ((s->be->options & PR_O_HTTP_PROXY) && (conn = objt_conn(s->si[1].end)) && is_addr(&conn->addr.to)) { /* in proxy mode, we need a valid destination address */ s->target = &s->be->obj_type; } else { err = SRV_STATUS_NOSRV; goto out; } s->flags |= SN_ASSIGNED; err = SRV_STATUS_OK; out: /* Either we take back our connection slot, or we offer it to someone * else if we don't need it anymore. */ if (conn_slot) { if (conn_slot == srv) { sess_change_server(s, srv); } else { if (may_dequeue_tasks(conn_slot, s->be)) process_srv_queue(conn_slot); } } out_err: return err; }
/* This function must be called after an update to server <srv>'s effective * weight. It may be called after a state change too. */ static void fwrr_update_server_weight(struct server *srv) { int old_state, new_state; struct proxy *p = srv->proxy; struct fwrr_group *grp; if (srv->state == srv->prev_state && srv->eweight == srv->prev_eweight) return; /* If changing the server's weight changes its state, we simply apply * the procedures we already have for status change. If the state * remains down, the server is not in any tree, so it's as easy as * updating its values. If the state remains up with different weights, * there are some computations to perform to find a new place and * possibly a new tree for this server. */ old_state = srv_is_usable(srv->prev_state, srv->prev_eweight); new_state = srv_is_usable(srv->state, srv->eweight); if (!old_state && !new_state) { srv->prev_state = srv->state; srv->prev_eweight = srv->eweight; return; } else if (!old_state && new_state) { fwrr_set_server_status_up(srv); return; } else if (old_state && !new_state) { fwrr_set_server_status_down(srv); return; } grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight; p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; if (srv->lb_tree == grp->init) { fwrr_dequeue_srv(srv); fwrr_queue_by_weight(grp->init, srv); } else if (!srv->lb_tree) { /* FIXME: server was down. This is not possible right now but * may be needed soon for slowstart or graceful shutdown. */ fwrr_dequeue_srv(srv); fwrr_get_srv(srv); srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight; fwrr_queue_srv(srv); } else { /* The server is either active or in the next queue. If it's * still in the active queue and it has not consumed all of its * places, let's adjust its next position. */ fwrr_get_srv(srv); if (srv->eweight > 0) { int prev_next = srv->npos; int step = grp->next_weight / srv->eweight; srv->npos = srv->lpos + step; srv->rweight = 0; if (srv->npos > prev_next) srv->npos = prev_next; if (srv->npos < grp->curr_pos + 2) srv->npos = grp->curr_pos + step; } else { /* push it into the next tree */ srv->npos = grp->curr_pos + grp->curr_weight; } fwrr_dequeue_srv(srv); fwrr_queue_srv(srv); } update_backend_weight(p); srv->prev_state = srv->state; srv->prev_eweight = srv->eweight; }