/* insert a non-pending timer into the scheduler */ void __ci_ip_timer_set(ci_netif *netif, ci_ip_timer *ts, ci_iptime_t t) { ci_ni_dllist_t* bucket; int w; ci_iptime_t stime = IPTIMER_STATE(netif)->sched_ticks; ci_assert(TIME_GT(t, stime)); /* this is absolute time */ ts->time = t; if( TIME_LT(t, IPTIMER_STATE(netif)->closest_timer) ) IPTIMER_STATE(netif)->closest_timer = t; /* Previous error in this code was to choose wheel based on time delta * before timer fires (ts->time - stime). This is bogus as the timer wheels * work like a clock and we need to find wheel based on the absolute time */ /* insert in wheel 0 if the top 3 wheels have the same time */ if ((stime & WHEEL0_MASK) == (t & WHEEL0_MASK)) w = 0; /* else, insert in wheel 1 if the top 2 wheels have the same time */ else if ((stime & WHEEL1_MASK) == (t & WHEEL1_MASK)) w = 1; /* else, insert in wheel 2 if the top wheel has the same time */ else if ((stime & WHEEL2_MASK) == (t & WHEEL2_MASK)) w = 2; else w = 3; bucket = BUCKET(netif, w, t); LOG_ITV(log("%s: delta=0x%x (t=0x%x-s=0x%x), w=0x%x, b=0x%x", __FUNCTION__, ts->time-stime, ts->time, stime, w, BUCKETNO(w, ts->time))); /* append onto the correct bucket ** ** NB this might not be stable because a later insert with a ** smaller relative time will be before an earlier insert with a ** larger relative time. Oh well doesn't really matter */ ci_ni_dllist_push_tail(netif, bucket, &ts->link); ci_assert(ci_ip_timer_is_link_valid(netif, ts)); DETAILED_CHECK_TIMERS(netif); }
void ci_ip_timer_state_dump(ci_netif* ni) { ci_ip_timer_state* ipts; ci_ip_timer* ts; ci_ni_dllist_t* bucket; ci_ni_dllist_link* l; ci_iptime_t stime, wheel_base, max_time, min_time; int w, b, bit_shift; /* shifting a 32 bit integer left or right 32 bits has undefined results * (i.e. not 0 which is required). Therefore I now use an array of mask * values */ unsigned wheel_mask[CI_IPTIME_WHEELS] = { WHEEL0_MASK, WHEEL1_MASK, WHEEL2_MASK, 0 }; ipts = IPTIMER_STATE(ni); stime = ipts->sched_ticks; ci_log("%s: time is 0x%x", __FUNCTION__, stime); /* for each wheel */ for(w=0; w < CI_IPTIME_WHEELS; w++) { /* base time of wheel */ wheel_base = stime & wheel_mask[w]; /* for each bucket in wheel */ for (b=0; b < CI_IPTIME_BUCKETS; b++) { /* max and min relative times for this bucket */ bit_shift = CI_IPTIME_BUCKETBITS*w; min_time = wheel_base + (b << bit_shift); max_time = min_time + (1 << bit_shift); bucket = &ipts->warray[w*CI_IPTIME_BUCKETS + b]; /* check buckets that should be empty are! */ if ( TIME_LE(min_time, stime) && !ci_ni_dllist_is_empty(ni, bucket) ) ci_log("w:%d, b:%d, [0x%x->0x%x] - bucket should be empty", w, b, min_time, max_time); /* run through timers in bucket */ for (l = ci_ni_dllist_start(ni, bucket); l != ci_ni_dllist_end(ni, bucket); ci_ni_dllist_iter(ni, l) ) { /* get timer */ ts = LINK2TIMER(l); ci_log(" ts = 0x%x %s w:%d, b:%d, [0x%x->0x%x]", ts->time, ci_ip_timer_dump(ts), w, b, min_time, max_time); if ( TIME_LE(ts->time, stime) ) ci_log(" ERROR: timer before current time"); if ( !(TIME_LT(ts->time, max_time) && TIME_GE(ts->time, min_time)) ) ci_log(" ERROR: timer in wrong bucket"); } } } ci_log("----------------------"); }
/* unpick the ci_ip_timer structure to actually do the callback */ static void ci_ip_timer_docallback(ci_netif *netif, ci_ip_timer* ts) { ci_assert( TIME_LE(ts->time, ci_ip_time_now(netif)) ); ci_assert( ts->time == IPTIMER_STATE(netif)->sched_ticks ); switch(ts->fn){ case CI_IP_TIMER_TCP_RTO: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_rto(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_DELACK: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_delack(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_ZWIN: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_zwin(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_KALIVE: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_kalive(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_LISTEN: ci_tcp_timeout_listen(netif, SP_TO_TCP_LISTEN(netif, ts->param1)); break; case CI_IP_TIMER_TCP_CORK: ci_tcp_timeout_cork(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_NETIF_TIMEOUT: ci_netif_timeout_state(netif); break; case CI_IP_TIMER_PMTU_DISCOVER: ci_pmtu_timeout_pmtu(netif, SP_TO_TCP(netif, ts->param1)); break; #if CI_CFG_TCP_SOCK_STATS case CI_IP_TIMER_TCP_STATS: ci_tcp_stats_action(netif, SP_TO_TCP(netif, ts->param1), CI_IP_STATS_FLUSH, CI_IP_STATS_OUTPUT_NONE, NULL, NULL ); break; #endif #if CI_CFG_SUPPORT_STATS_COLLECTION case CI_IP_TIMER_NETIF_STATS: ci_netif_stats_action(netif, CI_IP_STATS_FLUSH, CI_IP_STATS_OUTPUT_NONE, NULL, NULL ); break; #endif #if CI_CFG_IP_TIMER_DEBUG case CI_IP_TIMER_DEBUG_HOOK: ci_ip_timer_debug_fn(netif, ts->link.addr, ts->param1); break; #endif default: LOG_U(log( LPF "unknown timer callback code:%x param1:%d", ts->fn, OO_SP_FMT(ts->param1))); CI_DEBUG(ci_fail_stop_fn()); } }
/* initialise the iptimer scheduler */ void ci_ip_timer_state_init(ci_netif* netif, unsigned cpu_khz) { ci_ip_timer_state* ipts = IPTIMER_STATE(netif); int i; int us2isn; /* initialise the cycle to tick constants */ ipts->khz = cpu_khz; ipts->ci_ip_time_frc2tick = shift_for_gran(CI_IP_TIME_APP_GRANULARITY, ipts->khz); ipts->ci_ip_time_frc2us = shift_for_gran(1, ipts->khz); /* The Linux kernel ticks the initial sequence number that it would use for * a given tuple every 64 ns. Onload does the same, when using * EF_TCP_ISN_MODE=clocked. However in EF_TCP_ISN_MODE=clocked+cache our use * of the clock-driven ISN is slightly different, though, as we remember * old sequence numbers in the case where the clock-driven ISN is not known * to be safe. As such, we don't need it to tick so fast, and so we let it * tick at most every 256 ns. This means that it takes more than eight * minutes to wrap by half, while four minutes is our assumed maximum * peer-MSL. This in practice reduces the cases in which we have to * remember old sequence numbers. */ us2isn = NI_OPTS(netif).tcp_isn_mode != 0 ? 2 : 4; ipts->ci_ip_time_frc2isn = ipts->ci_ip_time_frc2us > us2isn ? ipts->ci_ip_time_frc2us - us2isn : 0; ci_ip_time_initial_sync(ipts); ipts->sched_ticks = ci_ip_time_now(netif); ipts->closest_timer = ipts->sched_ticks + IPTIME_INFINITY; /* To convert ms to ticks we will use fixed point arithmetic * Calculate conversion factor, which is expected to be in range <0.5,1] * */ ipts->ci_ip_time_ms2tick_fxp = (((ci_uint64)ipts->khz) << 32) / (1u << ipts->ci_ip_time_frc2tick); ci_assert_gt(ipts->ci_ip_time_ms2tick_fxp, 1ull<<31); ci_assert_le(ipts->ci_ip_time_ms2tick_fxp, 1ull<<32); /* set module specific time constants dependent on frc2tick */ ci_tcp_timer_init(netif); ci_ni_dllist_init(netif, &ipts->fire_list, oo_ptr_to_statep(netif, &ipts->fire_list), "fire"); /* Initialise the wheel lists. */ for( i=0; i < CI_IPTIME_WHEELSIZE; i++) ci_ni_dllist_init(netif, &ipts->warray[i], oo_ptr_to_statep(netif, &ipts->warray[i]), "timw"); }
/* Called when the statistics report timer fires OR at start/end of * the session or for a manual update through a sockopt * \param ni netif context * \param ts TCP state context * \param reason Action to perform * \param type Type of output (0=default, 1 = text, 2 = XML) * \param ptr Pointer to the memory where statistics is put on STATS_GET * action. It has no sense with other actions and should be set to * NULL. * \param which Type of statistics to report (TCP, netif or both) */ extern void ci_tcp_stats_action(__NI_STRUCT__ *ni, __STATE_STRUCT__ *ts, ci_ip_stats_action_type action, ci_ip_stats_output_fmt fmt, void *data, socklen_t *size) { ci_iptime_t it; ci_assert(ni); ci_assert( IPTIMER_STATE(ni) ); ci_assert(ts); LOG_STATS( ci_log( "%s( %p, %p, %d, %d, %p )", __FUNCTION__, ni, ts, action, fmt, data)); /* update snapshot timestamp */ ci_ip_time_get(IPTIMER_STATE(ni), &it); /* ci_ip_time_ticks2ms() is not defined in KERNEL space */ #ifndef __KERNEL__ ts->stats_snapshot.now = ci_ip_time_ticks2ms(ni, it); #endif switch (action) { case CI_IP_STATS_START: ci_tcp_stats_init_data( &ts->stats_snapshot); ci_tcp_stats_init_data( &ts->stats_cumulative); it = NI_CONF(ni).tconst_stats; ci_tcp_stats_handle_timer(ni, ts, it ); break; case CI_IP_STATS_GET: if ((data != NULL) && (size != NULL) && (*size >= 2 * sizeof(ci_ip_stats))){ /* assumed to be a valid user memory area to update */ ci_ip_sock_stats* ii = (ci_ip_sock_stats*)data; memcpy( &ii[0], &ts->stats_snapshot, sizeof(*ii) ); memcpy( &ii[1], &ts->stats_cumulative, sizeof(*ii)); *size = 2 * sizeof(ci_ip_sock_stats); } break; case CI_IP_STATS_REPORT: #if CI_CFG_SEND_STATS_TO_LOG ci_tcp_stats_report(ni, ts, ni->state->stats_fmt, NULL, 0); #else if ((data != NULL) && (size != NULL)) { *size = ci_tcp_stats_report(ni, ts, ni->state->stats_fmt, data, *size); } #endif break; case CI_IP_STATS_END: case CI_IP_STATS_FLUSH: ci_tcp_stats_update( ts ); /* Stop stats timer on CI_IP_STATS_END */ it = action != CI_IP_STATS_END ? NI_CONF(ni).tconst_stats : 0; ci_tcp_stats_handle_timer(ni, ts, it ); break; default: break; } }
void ci_ip_timer_state_assert_valid(ci_netif* ni, const char* file, int line) { ci_ip_timer_state* ipts; ci_ip_timer* ts; ci_ni_dllist_t* bucket; ci_ni_dllist_link* l; ci_iptime_t stime, wheel_base, max_time, min_time; int a1, a2, a3, w, b, bit_shift; /* shifting a 32 bit integer left or right 32 bits has undefined results * (i.e. not 0 which is required). Therefore I now use an array of mask * values */ unsigned wheel_mask[CI_IPTIME_WHEELS] = { WHEEL0_MASK, WHEEL1_MASK, WHEEL2_MASK, 0 }; ipts = IPTIMER_STATE(ni); stime = ipts->sched_ticks; /* for each wheel */ for(w=0; w < CI_IPTIME_WHEELS; w++) { /* base time of wheel */ wheel_base = stime & wheel_mask[w]; /* for each bucket in wheel */ for (b=0; b < CI_IPTIME_BUCKETS; b++) { /* max and min relative times for this bucket */ bit_shift = CI_IPTIME_BUCKETBITS*w; min_time = wheel_base + (b << bit_shift); max_time = min_time + (1 << bit_shift); bucket = &ipts->warray[w*CI_IPTIME_BUCKETS + b]; /* check list looks valid */ if ( ci_ni_dllist_start(ni, bucket) == ci_ni_dllist_end(ni, bucket) ) { ci_assert( ci_ni_dllist_is_empty(ni, bucket) ); } /* check buckets that should be empty are! */ a3 = TIME_GT(min_time, stime) || ci_ni_dllist_is_empty(ni, bucket); /* run through timers in bucket */ for (l = ci_ni_dllist_start(ni, bucket); l != ci_ni_dllist_end(ni, bucket); ci_ni_dllist_iter(ni, l) ) { ci_ni_dllist_link_assert_valid(ni, l); /* get timer */ ts = LINK2TIMER(l); /* must be in the future */ a1 = TIME_GT(ts->time, stime); /* must be within time range of bucket */ a2 = TIME_LT(ts->time, max_time) && TIME_GE(ts->time, min_time); /* if any of the checks fail then print out timer details */ if (!a1 || !a2 || !a3) { ci_log("%s: [w=0x%x/b=0x%x] stime=0x%x", __FUNCTION__, w, b, stime); ci_log(" --> t=0x%x, min=0x%x, max=0x%x", ts->time, min_time, max_time); ci_log(" [%s line=%d]", file, line); } /* stop if assertion failed */ ci_assert(a1 && a2 && a3); } } } }
/* run any pending timers */ void ci_ip_timer_poll(ci_netif *netif) { ci_ip_timer_state* ipts = IPTIMER_STATE(netif); ci_iptime_t* stime = &ipts->sched_ticks; ci_ip_timer* ts; ci_iptime_t rtime; ci_ni_dllist_link* link; int changed = 0; /* The caller is expected to ensure that the current time is sufficiently ** up-to-date. */ rtime = ci_ip_time_now(netif); /* check for sanity i.e. time always goes forwards */ ci_assert( TIME_GE(rtime, *stime) ); /* bug chasing Bug 2855 - check the temp list used is OK before we start */ ci_assert( ci_ni_dllist_is_valid(netif, &ipts->fire_list.l) ); ci_assert( ci_ni_dllist_is_empty(netif, &ipts->fire_list)); while( TIME_LT(*stime, rtime) ) { DETAILED_CHECK_TIMERS(netif); /* advance the schedulers view of time */ (*stime)++; /* cascade through wheels if reached end of current wheel */ if(BUCKETNO(0, *stime) == 0) { if(BUCKETNO(1, *stime) == 0) { if(BUCKETNO(2, *stime) == 0) { ci_ip_timer_cascadewheel(netif, 3, *stime); } ci_ip_timer_cascadewheel(netif, 2, *stime); } changed = ci_ip_timer_cascadewheel(netif, 1, *stime); } /* Bug 1828: We need to be creaful here ... because: - ci_ip_timer_docallback can set/clear timers - the timers being set/cleared may not necessarily be the ones firing - however, they could be in this bucket In summary, need to ensure the ni_dllist stays valid at all times so safe to call. Slightly complicated by the case that its not possible to hold indirected linked lists on the stack */ ci_assert( ci_ni_dllist_is_valid(netif, &ipts->fire_list.l)); ci_assert( ci_ni_dllist_is_empty(netif, &ipts->fire_list)); /* run timers in the current bucket */ ci_ni_dllist_rehome( netif, &ipts->fire_list, &ipts->warray[BUCKETNO(0, *stime)] ); DETAILED_CHECK_TIMERS(netif); while( (link = ci_ni_dllist_try_pop(netif, &ipts->fire_list)) ) { ts = LINK2TIMER(link); ci_assert_equal(ts->time, *stime); /* ensure time marked as NOT pending */ ci_ni_dllist_self_link(netif, &ts->link); /* callback safe to set/clear this or other timers */ ci_ip_timer_docallback(netif, ts); } ci_assert( ci_ni_dllist_is_valid(netif, &ipts->fire_list.l) ); ci_assert( ci_ni_dllist_is_empty(netif, &ipts->fire_list)); DETAILED_CHECK_TIMERS(netif); } ci_assert( ci_ni_dllist_is_valid(netif, &ipts->fire_list.l) ); ci_assert( ci_ni_dllist_is_empty(netif, &ipts->fire_list)); /* What is our next timer? * Let's update if our previous "closest" timer have already been * handled, or if the previous estimation was "infinity". */ if( TIME_GE(ipts->sched_ticks, ipts->closest_timer) || (changed && ipts->closest_timer - ipts->sched_ticks > IPTIME_INFINITY_LOW) ) { /* we peek into the first wheel only */ ci_iptime_t base = ipts->sched_ticks & WHEEL0_MASK; ci_iptime_t b = ipts->sched_ticks - base; for( b++ ; b < CI_IPTIME_BUCKETS; b++ ) { if( !ci_ni_dllist_is_empty(netif, &ipts->warray[b]) ) { ipts->closest_timer = base + b; return; } } /* We do not know the next timer. Set it to a sort of infinity. */ ipts->closest_timer = ipts->sched_ticks + IPTIME_INFINITY; } }