* netdev_map consistent in this case. From the devmap side BPF programs
* calling into these operations are the same as multiple user space threads
* making system calls.
+ *
+ * Finally, any of the above may race with a netdev_unregister notifier. The
+ * unregister notifier must search for net devices in the map structure that
+ * contain a reference to the net device and remove them. This is a two step
+ * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b)
+ * check to see if the ifindex is the same as the net_device being removed.
+ * Unfortunately, the xchg() operations do not protect against this. To avoid
+ * potentially removing incorrect objects the dev_map_list_mutex protects
+ * conflicting netdev unregister and BPF syscall operations. Updates and
+ * deletes from a BPF program (done in rcu critical section) are blocked
+ * because of this mutex.
*/
#include <linux/bpf.h>
#include <linux/jhash.h>
struct bpf_map map;
struct bpf_dtab_netdev **netdev_map;
unsigned long int __percpu *flush_needed;
+ struct list_head list;
};
+static DEFINE_MUTEX(dev_map_list_mutex);
+static LIST_HEAD(dev_map_list);
+
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
{
struct bpf_dtab *dtab;
if (!dtab->netdev_map)
goto free_dtab;
+ mutex_lock(&dev_map_list_mutex);
+ list_add_tail(&dtab->list, &dev_map_list);
+ mutex_unlock(&dev_map_list_mutex);
return &dtab->map;
free_dtab:
cpu_relax();
}
+ /* Although we should no longer have datapath or bpf syscall operations
+ * at this point we we can still race with netdev notifier, hence the
+ * lock.
+ */
+ mutex_lock(&dev_map_list_mutex);
for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev;
/* At this point bpf program is detached and all pending operations
* _must_ be complete
*/
+ list_del(&dtab->list);
+ mutex_unlock(&dev_map_list_mutex);
free_percpu(dtab->flush_needed);
bpf_map_area_free(dtab->netdev_map);
kfree(dtab);
* the driver tear down ensures all soft irqs are complete before
* removing the net device in the case of dev_put equals zero.
*/
+ mutex_lock(&dev_map_list_mutex);
old_dev = xchg(&dtab->netdev_map[k], NULL);
if (old_dev)
call_rcu(&old_dev->rcu, __dev_map_entry_free);
+ mutex_unlock(&dev_map_list_mutex);
return 0;
}
* Remembering the driver side flush operation will happen before the
* net device is removed.
*/
+ mutex_lock(&dev_map_list_mutex);
old_dev = xchg(&dtab->netdev_map[i], dev);
if (old_dev)
call_rcu(&old_dev->rcu, __dev_map_entry_free);
+ mutex_unlock(&dev_map_list_mutex);
return 0;
}
.map_update_elem = dev_map_update_elem,
.map_delete_elem = dev_map_delete_elem,
};
+
+static int dev_map_notification(struct notifier_block *notifier,
+ ulong event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct bpf_dtab *dtab;
+ int i;
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ mutex_lock(&dev_map_list_mutex);
+ list_for_each_entry(dtab, &dev_map_list, list) {
+ for (i = 0; i < dtab->map.max_entries; i++) {
+ struct bpf_dtab_netdev *dev;
+
+ dev = dtab->netdev_map[i];
+ if (!dev ||
+ dev->dev->ifindex != netdev->ifindex)
+ continue;
+ dev = xchg(&dtab->netdev_map[i], NULL);
+ if (dev)
+ call_rcu(&dev->rcu,
+ __dev_map_entry_free);
+ }
+ }
+ mutex_unlock(&dev_map_list_mutex);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block dev_map_notifier = {
+ .notifier_call = dev_map_notification,
+};
+
+static int __init dev_map_init(void)
+{
+ register_netdevice_notifier(&dev_map_notifier);
+ return 0;
+}
+
+subsys_initcall(dev_map_init);