From 130d3d68b52097c7ae081109f700b02776adcb9c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2013 13:56:19 -0700 Subject: [PATCH] net_sched: psched_ratecfg_precompute() improvements Before allowing 64bits bytes rates, refactor psched_ratecfg_precompute() to get better comments and increased accuracy. rate_bps field is renamed to rate_bytes_ps, as we only have to worry about bytes per second. Signed-off-by: Eric Dumazet Cc: Ben Greear Signed-off-by: David S. Miller --- include/net/sch_generic.h | 4 ++-- net/sched/sch_generic.c | 44 ++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index df5676029827..6eab63363e59 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -680,7 +680,7 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, #endif struct psched_ratecfg { - u64 rate_bps; + u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; u8 shift; @@ -698,7 +698,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, const struct psched_ratecfg *r) { memset(res, 0, sizeof(*res)); - res->rate = r->rate_bps >> 3; + res->rate = r->rate_bytes_ps; res->overhead = r->overhead; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 20224086cc28..4626cef4b76e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -901,37 +901,33 @@ void dev_shutdown(struct net_device *dev) void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf) { - u64 factor; - u64 mult; - int shift; - memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bps = (u64)conf->rate << 3; + r->rate_bytes_ps = conf->rate; r->mult = 1; /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. + * The deal here is to replace a divide by a reciprocal one + * in fast path (a reciprocal divide is a multiply and a shift) + * + * Normal formula would be : + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps + * + * We compute mult/shift to use instead : + * time_in_ns = (len * mult) >> shift; + * + * We try to get the highest possible mult value for accuracy, + * but have to make sure no overflows will ever happen. */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) + if (r->rate_bytes_ps > 0) { + u64 factor = NSEC_PER_SEC; + + for (;;) { + r->mult = div64_u64(factor, r->rate_bytes_ps); + if (r->mult & (1U << 31) || factor & (1ULL << 63)) break; + factor <<= 1; + r->shift++; } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); } } EXPORT_SYMBOL(psched_ratecfg_precompute); -- 2.20.1