From da210f559019ba1cd4ebee2a28ad158bfb95bab2 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 30 Aug 2012 12:02:47 +0000 Subject: [PATCH] bonding: add some slack to arp monitoring time limits Currently, all the time limits in the bonding ARP monitor are in multiples of arp_interval -- the time interval at which the ARP monitor is periodically scheduled. With a fast network round-trip and a little scheduling latency of the ARP monitor work, a limit of n*delta_in_ticks may effectively mean (n-1)*delta_in_ticks. This is fatal in case of n==1 (the link will stay down forever) and makes the behaviour non-deterministic in all the other cases. Add a delta_in_ticks/2 time slack to all the time limits. Signed-off-by: Jiri Bohac Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b24ce257ac7b..7858c58df4a3 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2811,12 +2811,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work) arp_work.work); struct slave *slave, *oldcurrent; int do_failover = 0; - int delta_in_ticks; + int delta_in_ticks, extra_ticks; int i; read_lock(&bond->lock); delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); + extra_ticks = delta_in_ticks / 2; if (bond->slave_cnt == 0) goto re_arm; @@ -2839,10 +2840,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work) if (slave->link != BOND_LINK_UP) { if (time_in_range(jiffies, trans_start - delta_in_ticks, - trans_start + delta_in_ticks) && + trans_start + delta_in_ticks + extra_ticks) && time_in_range(jiffies, slave->dev->last_rx - delta_in_ticks, - slave->dev->last_rx + delta_in_ticks)) { + slave->dev->last_rx + delta_in_ticks + extra_ticks)) { slave->link = BOND_LINK_UP; bond_set_active_slave(slave); @@ -2872,10 +2873,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work) */ if (!time_in_range(jiffies, trans_start - delta_in_ticks, - trans_start + 2 * delta_in_ticks) || + trans_start + 2 * delta_in_ticks + extra_ticks) || !time_in_range(jiffies, slave->dev->last_rx - delta_in_ticks, - slave->dev->last_rx + 2 * delta_in_ticks)) { + slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) { slave->link = BOND_LINK_DOWN; bond_set_backup_slave(slave); @@ -2933,6 +2934,14 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) struct slave *slave; int i, commit = 0; unsigned long trans_start; + int extra_ticks; + + /* All the time comparisons below need some extra time. Otherwise, on + * fast networks the ARP probe/reply may arrive within the same jiffy + * as it was sent. Then, the next time the ARP monitor is run, one + * arp_interval will already have passed in the comparisons. + */ + extra_ticks = delta_in_ticks / 2; bond_for_each_slave(bond, slave, i) { slave->new_link = BOND_LINK_NOCHANGE; @@ -2940,7 +2949,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) if (slave->link != BOND_LINK_UP) { if (time_in_range(jiffies, slave_last_rx(bond, slave) - delta_in_ticks, - slave_last_rx(bond, slave) + delta_in_ticks)) { + slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) { slave->new_link = BOND_LINK_UP; commit++; @@ -2956,7 +2965,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) */ if (time_in_range(jiffies, slave->jiffies - delta_in_ticks, - slave->jiffies + 2 * delta_in_ticks)) + slave->jiffies + 2 * delta_in_ticks + extra_ticks)) continue; /* @@ -2976,7 +2985,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) !bond->current_arp_slave && !time_in_range(jiffies, slave_last_rx(bond, slave) - delta_in_ticks, - slave_last_rx(bond, slave) + 3 * delta_in_ticks)) { + slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) { slave->new_link = BOND_LINK_DOWN; commit++; @@ -2992,10 +3001,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) if (bond_is_active_slave(slave) && (!time_in_range(jiffies, trans_start - delta_in_ticks, - trans_start + 2 * delta_in_ticks) || + trans_start + 2 * delta_in_ticks + extra_ticks) || !time_in_range(jiffies, slave_last_rx(bond, slave) - delta_in_ticks, - slave_last_rx(bond, slave) + 2 * delta_in_ticks))) { + slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) { slave->new_link = BOND_LINK_DOWN; commit++; @@ -3027,7 +3036,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) if ((!bond->curr_active_slave && time_in_range(jiffies, trans_start - delta_in_ticks, - trans_start + delta_in_ticks)) || + trans_start + delta_in_ticks + delta_in_ticks / 2)) || bond->curr_active_slave != slave) { slave->link = BOND_LINK_UP; if (bond->current_arp_slave) { -- 2.20.1