From 671889e6740ac7ab84d1420525b50d1d47001102 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 2 Dec 2016 12:33:00 -0800 Subject: [PATCH] i40e: avoid race condition when sending filters to firmware for addition Refactor how we add new filters to firmware to avoid a race condition that can occur due to removing filters from the hash temporarily. To understand the race condition, suppose that you have a number of MAC filters, but have not yet added any VLANs. Now, add two VLANs in rapid succession. A possible resulting flow would look something like the following: (1) lock hash for add VLAN (2) add the new MAC/VLAN combos for each current MAC filter (3) unlock hash (4) lock hash for filter sync (5) notice that we have a VLAN, so prepare to update all MAC filters with VLAN=-1 to be VLAN=0. (6) move NEW and REMOVE filters to temporary list (7) unlock hash (8) lock hash for add VLAN (9) add new MAC/VLAN combos. Notice that no MAC filters are currently in the hash list, so we don't add any VLANs <--- BUG! (10) unlock hash (11) sync the temporary lists to firmware (12) lock hash for post-sync (13) move the temporary elements back to the main list .... Because we take filters out of the main hash into temporary lists, we introduce a narrow window where it is possible that other callers to the list will not see some of the filters which were previously added but have not yet been finalized. This results in sometimes dropping VLAN additions, and could also result in failing to add a MAC address on the newly added VLAN. One obvious way to avoid this race condition would be to lock the entire firmware process. Unfortunately this does not work because adminq firmware commands take a mutex which results in a sleep while atomic BUG(). So, we can't use the simplest approach. An alternative approach is to simply not remove the filters from the hash list while adding. Instead, add an i40e_new_mac_filter structure which we will use to track added filters. This avoids the need to remove the filter from the hash list. We'll store a pointer to the original i40e_mac_filter, along with our own copy of the state. We won't update the state directly, so as to avoid race with other code that may modify the state while under the lock. We are safe to read f->macaddr and f->vlan since these only change in two locations. The first is on filter creation, which must have already occurred. The second is inside i40e_correct_vlan_filters which was previously run after creation of this object and can't be run again until after. Thus, we should be safe to read the MAC address and VLAN while outside the lock. We also aren't going to run into a use-after-free issue because the only place where we free filters is when they are marked FAILED or when we remove them inside the sync subtask. Since the subtask has its own critical flag to prevent duplicate runs, we know this won't happen. We also know that the only location to transition a filter from NEW to FAILED is inside the subtask also, so we aren't worried about that either. Use the wrapper i40e_new_mac_filter for additions, and once we've finalized the addition to firmware, we will update the filter state inside a lock, and then free the wrapper structure. In order to avoid a possible race condition with filter deletion, we won't update the original filter state unless it is still I40E_FILTER_NEW when we finish the firmware sync. This approach is more complex, but avoids race conditions related to filters being temporarily removed from the list. We do not need the same behavior for deletion because we always unconditionally removed the filters from the list regardless of the firmware status. Change-Id: I14b74bc2301f8e69433fbe77ebca532db20c5317 Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 16 +++ drivers/net/ethernet/intel/i40e/i40e_main.c | 147 +++++++++++++------- 2 files changed, 112 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index fdd9069b6cec..7a23d3e47c6f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -467,6 +467,22 @@ struct i40e_mac_filter { enum i40e_filter_state state; }; +/* Wrapper structure to keep track of filters while we are preparing to send + * firmware commands. We cannot send firmware commands while holding a + * spinlock, since it might sleep. To avoid this, we wrap the added filters in + * a separate structure, which will track the state change and update the real + * filter while under lock. We can't simply hold the filters in a separate + * list, as this opens a window for a race condition when adding new MAC + * addresses to all VLANs, or when adding new VLANs to all MAC addresses. + */ +struct i40e_new_mac_filter { + struct hlist_node hlist; + struct i40e_mac_filter *f; + + /* Track future changes to state separately */ + enum i40e_filter_state state; +}; + struct i40e_veb { struct i40e_pf *pf; u16 idx; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 06c80d4162af..e83a8ca5dd65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1255,6 +1255,7 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, int vlan_filters) { struct i40e_mac_filter *f, *add_head; + struct i40e_new_mac_filter *new; struct hlist_node *h; int bkt, new_vlan; @@ -1273,13 +1274,13 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, */ /* Update the filters about to be added in place */ - hlist_for_each_entry(f, tmp_add_list, hlist) { - if (vsi->info.pvid && f->vlan != vsi->info.pvid) - f->vlan = vsi->info.pvid; - else if (vlan_filters && f->vlan == I40E_VLAN_ANY) - f->vlan = 0; - else if (!vlan_filters && f->vlan == 0) - f->vlan = I40E_VLAN_ANY; + hlist_for_each_entry(new, tmp_add_list, hlist) { + if (vsi->info.pvid && new->f->vlan != vsi->info.pvid) + new->f->vlan = vsi->info.pvid; + else if (vlan_filters && new->f->vlan == I40E_VLAN_ANY) + new->f->vlan = 0; + else if (!vlan_filters && new->f->vlan == 0) + new->f->vlan = I40E_VLAN_ANY; } /* Update the remaining active filters */ @@ -1305,9 +1306,16 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, if (!add_head) return -ENOMEM; - /* Put the replacement filter into the add list */ - hash_del(&add_head->hlist); - hlist_add_head(&add_head->hlist, tmp_add_list); + /* Create a temporary i40e_new_mac_filter */ + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + return -ENOMEM; + + new->f = add_head; + new->state = add_head->state; + + /* Add the new filter to the tmp list */ + hlist_add_head(&new->hlist, tmp_add_list); /* Put the original filter into the delete list */ f->state = I40E_FILTER_REMOVE; @@ -1819,16 +1827,15 @@ static void i40e_set_rx_mode(struct net_device *netdev) } /** - * i40e_undo_filter_entries - Undo the changes made to MAC filter entries + * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries * @vsi: Pointer to VSI struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * - * MAC filter entries from list were slated to be sent to firmware, either for - * addition or deletion. + * MAC filter entries from this list were slated for deletion. **/ -static void i40e_undo_filter_entries(struct i40e_vsi *vsi, - struct hlist_head *from) +static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) { struct i40e_mac_filter *f; struct hlist_node *h; @@ -1842,29 +1849,51 @@ static void i40e_undo_filter_entries(struct i40e_vsi *vsi, } } +/** + * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries + * @vsi: Pointer to vsi struct + * @from: Pointer to list which contains MAC filter entries - changes to + * those entries needs to be undone. + * + * MAC filter entries from this list were slated for addition. + **/ +static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) +{ + struct i40e_new_mac_filter *new; + struct hlist_node *h; + + hlist_for_each_entry_safe(new, h, from, hlist) { + /* We can simply free the wrapper structure */ + hlist_del(&new->hlist); + kfree(new); + } +} + /** * i40e_next_entry - Get the next non-broadcast filter from a list - * @f: pointer to filter in list + * @next: pointer to filter in list * * Returns the next non-broadcast filter in the list. Required so that we * ignore broadcast filters within the list, since these are not handled via * the normal firmware update path. */ -static struct i40e_mac_filter *i40e_next_filter(struct i40e_mac_filter *f) +static +struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next) { - while (f) { - f = hlist_entry(f->hlist.next, - typeof(struct i40e_mac_filter), - hlist); + while (next) { + next = hlist_entry(next->hlist.next, + typeof(struct i40e_new_mac_filter), + hlist); /* keep going if we found a broadcast filter */ - if (f && is_broadcast_ether_addr(f->macaddr)) + if (next && is_broadcast_ether_addr(next->f->macaddr)) continue; break; } - return f; + return next; } /** @@ -1880,7 +1909,7 @@ static struct i40e_mac_filter *i40e_next_filter(struct i40e_mac_filter *f) static int i40e_update_filter_state(int count, struct i40e_aqc_add_macvlan_element_data *add_list, - struct i40e_mac_filter *add_head) + struct i40e_new_mac_filter *add_head) { int retval = 0; int i; @@ -1958,7 +1987,7 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, - struct i40e_mac_filter *add_head, + struct i40e_new_mac_filter *add_head, int num_add, bool *promisc_changed) { struct i40e_hw *hw = &vsi->back->hw; @@ -1986,10 +2015,12 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, * This function sets or clears the promiscuous broadcast flags for VLAN * filters in order to properly receive broadcast frames. Assumes that only * broadcast filters are passed. + * + * Returns status indicating success or failure; **/ -static -void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, - struct i40e_mac_filter *f) +static i40e_status +i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, + struct i40e_mac_filter *f) { bool enable = f->state == I40E_FILTER_NEW; struct i40e_hw *hw = &vsi->back->hw; @@ -2008,15 +2039,13 @@ void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, NULL); } - if (aq_ret) { + if (aq_ret) dev_warn(&vsi->back->pdev->dev, "Error %s setting broadcast promiscuous mode on %s\n", i40e_aq_str(hw, hw->aq.asq_last_status), vsi_name); - f->state = I40E_FILTER_FAILED; - } else if (enable) { - f->state = I40E_FILTER_ACTIVE; - } + + return aq_ret; } /** @@ -2030,7 +2059,8 @@ void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { struct hlist_head tmp_add_list, tmp_del_list; - struct i40e_mac_filter *f, *add_head = NULL; + struct i40e_mac_filter *f; + struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; @@ -2084,8 +2114,17 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) continue; } if (f->state == I40E_FILTER_NEW) { - hash_del(&f->hlist); - hlist_add_head(&f->hlist, &tmp_add_list); + /* Create a temporary i40e_new_mac_filter */ + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + goto err_no_memory_locked; + + /* Store pointer to the real filter */ + new->f = f; + new->state = f->state; + + /* Add it to the hash list */ + hlist_add_head(&new->hlist, &tmp_add_list); } /* Count the number of active (current and new) VLAN @@ -2178,32 +2217,37 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) goto err_no_memory; num_add = 0; - hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { + hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)) { - f->state = I40E_FILTER_FAILED; + new->state = I40E_FILTER_FAILED; continue; } /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ - if (is_broadcast_ether_addr(f->macaddr)) { - i40e_aqc_broadcast_filter(vsi, vsi_name, f); + if (is_broadcast_ether_addr(new->f->macaddr)) { + if (i40e_aqc_broadcast_filter(vsi, vsi_name, + new->f)) + new->state = I40E_FILTER_FAILED; + else + new->state = I40E_FILTER_ACTIVE; continue; } /* add to add array */ if (num_add == 0) - add_head = f; + add_head = new; cmd_flags = 0; - ether_addr_copy(add_list[num_add].mac_addr, f->macaddr); - if (f->vlan == I40E_VLAN_ANY) { + ether_addr_copy(add_list[num_add].mac_addr, + new->f->macaddr); + if (new->f->vlan == I40E_VLAN_ANY) { add_list[num_add].vlan_tag = 0; cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { add_list[num_add].vlan_tag = - cpu_to_le16((u16)(f->vlan)); + cpu_to_le16((u16)(new->f->vlan)); } add_list[num_add].queue_number = 0; /* set invalid match method for later detection */ @@ -2229,11 +2273,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) * the VSI's list. */ spin_lock_bh(&vsi->mac_filter_hash_lock); - hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { - u64 key = i40e_addr_to_hkey(f->macaddr); - - hlist_del(&f->hlist); - hash_add(vsi->mac_filter_hash, &f->hlist, key); + hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { + /* Only update the state if we're still NEW */ + if (new->f->state == I40E_FILTER_NEW) + new->f->state = new->state; + hlist_del(&new->hlist); + kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); kfree(add_list); @@ -2394,8 +2439,8 @@ err_no_memory: /* Restore elements on the temporary add and delete lists */ spin_lock_bh(&vsi->mac_filter_hash_lock); err_no_memory_locked: - i40e_undo_filter_entries(vsi, &tmp_del_list); - i40e_undo_filter_entries(vsi, &tmp_add_list); + i40e_undo_del_filter_entries(vsi, &tmp_del_list); + i40e_undo_add_filter_entries(vsi, &tmp_add_list); spin_unlock_bh(&vsi->mac_filter_hash_lock); vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; -- 2.20.1