| 1 | From 4a8056dfeef49b306ad6af24a5563d7d6867aae0 Mon Sep 17 00:00:00 2001 |
| 2 | From: Eric Dumazet <edumazet@google.com> |
| 3 | Date: Sat, 12 May 2012 03:32:13 +0000 |
| 4 | Subject: [PATCH] codel: use Newton method instead of sqrt() and divides |
| 5 | |
| 6 | commit 536edd67109df5e0cdb2c4ee759e9bade7976367 upstream. |
| 7 | |
| 8 | As Van pointed out, interval/sqrt(count) can be implemented using |
| 9 | multiplies only. |
| 10 | |
| 11 | http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots |
| 12 | |
| 13 | This patch implements the Newton method and reciprocal divide. |
| 14 | |
| 15 | Total cost is 15 cycles instead of 120 on my Corei5 machine (64bit |
| 16 | kernel). |
| 17 | |
| 18 | There is a small 'error' for count values < 5, but we don't really care. |
| 19 | |
| 20 | I reuse a hole in struct codel_vars : |
| 21 | - pack the dropping boolean into one bit |
| 22 | - use 31bit to store the reciprocal value of sqrt(count). |
| 23 | |
| 24 | Suggested-by: Van Jacobson <van@pollere.net> |
| 25 | Signed-off-by: Eric Dumazet <edumazet@google.com> |
| 26 | Cc: Dave Taht <dave.taht@bufferbloat.net> |
| 27 | Cc: Kathleen Nichols <nichols@pollere.com> |
| 28 | Cc: Tom Herbert <therbert@google.com> |
| 29 | Cc: Matt Mathis <mattmathis@google.com> |
| 30 | Cc: Yuchung Cheng <ycheng@google.com> |
| 31 | Cc: Nandita Dukkipati <nanditad@google.com> |
| 32 | Cc: Stephen Hemminger <shemminger@vyatta.com> |
| 33 | Signed-off-by: David S. Miller <davem@davemloft.net> |
| 34 | --- |
| 35 | include/net/codel.h | 68 ++++++++++++++++++++++++++++----------------------- |
| 36 | 1 file changed, 37 insertions(+), 31 deletions(-) |
| 37 | |
| 38 | --- a/include/net/codel.h |
| 39 | +++ b/include/net/codel.h |
| 40 | @@ -46,6 +46,7 @@ |
| 41 | #include <linux/skbuff.h> |
| 42 | #include <net/pkt_sched.h> |
| 43 | #include <net/inet_ecn.h> |
| 44 | +#include <linux/reciprocal_div.h> |
| 45 | |
| 46 | /* Controlling Queue Delay (CoDel) algorithm |
| 47 | * ========================================= |
| 48 | @@ -123,6 +124,7 @@ struct codel_params { |
| 49 | * entered dropping state |
| 50 | * @lastcount: count at entry to dropping state |
| 51 | * @dropping: set to true if in dropping state |
| 52 | + * @rec_inv_sqrt: reciprocal value of sqrt(count) >> 1 |
| 53 | * @first_above_time: when we went (or will go) continuously above target |
| 54 | * for interval |
| 55 | * @drop_next: time to drop next packet, or when we dropped last |
| 56 | @@ -131,7 +133,8 @@ struct codel_params { |
| 57 | struct codel_vars { |
| 58 | u32 count; |
| 59 | u32 lastcount; |
| 60 | - bool dropping; |
| 61 | + bool dropping:1; |
| 62 | + u32 rec_inv_sqrt:31; |
| 63 | codel_time_t first_above_time; |
| 64 | codel_time_t drop_next; |
| 65 | codel_time_t ldelay; |
| 66 | @@ -158,11 +161,7 @@ static void codel_params_init(struct cod |
| 67 | |
| 68 | static void codel_vars_init(struct codel_vars *vars) |
| 69 | { |
| 70 | - vars->drop_next = 0; |
| 71 | - vars->first_above_time = 0; |
| 72 | - vars->dropping = false; /* exit dropping state */ |
| 73 | - vars->count = 0; |
| 74 | - vars->lastcount = 0; |
| 75 | + memset(vars, 0, sizeof(*vars)); |
| 76 | } |
| 77 | |
| 78 | static void codel_stats_init(struct codel_stats *stats) |
| 79 | @@ -170,38 +169,37 @@ static void codel_stats_init(struct code |
| 80 | stats->maxpacket = 256; |
| 81 | } |
| 82 | |
| 83 | -/* return interval/sqrt(x) with good precision |
| 84 | - * relies on int_sqrt(unsigned long x) kernel implementation |
| 85 | +/* |
| 86 | + * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots |
| 87 | + * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) |
| 88 | + * |
| 89 | + * Here, invsqrt is a fixed point number (< 1.0), 31bit mantissa) |
| 90 | */ |
| 91 | -static u32 codel_inv_sqrt(u32 _interval, u32 _x) |
| 92 | +static void codel_Newton_step(struct codel_vars *vars) |
| 93 | { |
| 94 | - u64 interval = _interval; |
| 95 | - unsigned long x = _x; |
| 96 | - |
| 97 | - /* Scale operands for max precision */ |
| 98 | + u32 invsqrt = vars->rec_inv_sqrt; |
| 99 | + u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 31; |
| 100 | + u64 val = (3LL << 31) - ((u64)vars->count * invsqrt2); |
| 101 | |
| 102 | -#if BITS_PER_LONG == 64 |
| 103 | - x <<= 32; /* On 64bit arches, we can prescale x by 32bits */ |
| 104 | - interval <<= 16; |
| 105 | -#endif |
| 106 | + val = (val * invsqrt) >> 32; |
| 107 | |
| 108 | - while (x < (1UL << (BITS_PER_LONG - 2))) { |
| 109 | - x <<= 2; |
| 110 | - interval <<= 1; |
| 111 | - } |
| 112 | - do_div(interval, int_sqrt(x)); |
| 113 | - return (u32)interval; |
| 114 | + vars->rec_inv_sqrt = val; |
| 115 | } |
| 116 | |
| 117 | +/* |
| 118 | + * CoDel control_law is t + interval/sqrt(count) |
| 119 | + * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid |
| 120 | + * both sqrt() and divide operation. |
| 121 | + */ |
| 122 | static codel_time_t codel_control_law(codel_time_t t, |
| 123 | codel_time_t interval, |
| 124 | - u32 count) |
| 125 | + u32 rec_inv_sqrt) |
| 126 | { |
| 127 | - return t + codel_inv_sqrt(interval, count); |
| 128 | + return t + reciprocal_divide(interval, rec_inv_sqrt << 1); |
| 129 | } |
| 130 | |
| 131 | |
| 132 | -static bool codel_should_drop(struct sk_buff *skb, |
| 133 | +static bool codel_should_drop(const struct sk_buff *skb, |
| 134 | unsigned int *backlog, |
| 135 | struct codel_vars *vars, |
| 136 | struct codel_params *params, |
| 137 | @@ -274,14 +272,16 @@ static struct sk_buff *codel_dequeue(str |
| 138 | */ |
| 139 | while (vars->dropping && |
| 140 | codel_time_after_eq(now, vars->drop_next)) { |
| 141 | - if (++vars->count == 0) /* avoid zero divides */ |
| 142 | - vars->count = ~0U; |
| 143 | + vars->count++; /* dont care of possible wrap |
| 144 | + * since there is no more divide |
| 145 | + */ |
| 146 | + codel_Newton_step(vars); |
| 147 | if (params->ecn && INET_ECN_set_ce(skb)) { |
| 148 | stats->ecn_mark++; |
| 149 | vars->drop_next = |
| 150 | codel_control_law(vars->drop_next, |
| 151 | params->interval, |
| 152 | - vars->count); |
| 153 | + vars->rec_inv_sqrt); |
| 154 | goto end; |
| 155 | } |
| 156 | qdisc_drop(skb, sch); |
| 157 | @@ -296,7 +296,7 @@ static struct sk_buff *codel_dequeue(str |
| 158 | vars->drop_next = |
| 159 | codel_control_law(vars->drop_next, |
| 160 | params->interval, |
| 161 | - vars->count); |
| 162 | + vars->rec_inv_sqrt); |
| 163 | } |
| 164 | } |
| 165 | } |
| 166 | @@ -319,12 +319,18 @@ static struct sk_buff *codel_dequeue(str |
| 167 | if (codel_time_before(now - vars->drop_next, |
| 168 | 16 * params->interval)) { |
| 169 | vars->count = (vars->count - vars->lastcount) | 1; |
| 170 | + /* we dont care if rec_inv_sqrt approximation |
| 171 | + * is not very precise : |
| 172 | + * Next Newton steps will correct it quadratically. |
| 173 | + */ |
| 174 | + codel_Newton_step(vars); |
| 175 | } else { |
| 176 | vars->count = 1; |
| 177 | + vars->rec_inv_sqrt = 0x7fffffff; |
| 178 | } |
| 179 | vars->lastcount = vars->count; |
| 180 | vars->drop_next = codel_control_law(now, params->interval, |
| 181 | - vars->count); |
| 182 | + vars->rec_inv_sqrt); |
| 183 | } |
| 184 | end: |
| 185 | return skb; |
| 186 | |