MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
};
+struct mlxsw_sp_nexthop_group;
+
struct mlxsw_sp_fib_entry {
struct rhash_head ht_node;
struct mlxsw_sp_fib_key key;
u8 added:1;
u16 rif; /* used for action local */
struct mlxsw_sp_vr *vr;
+ struct list_head nexthop_group_node;
+ struct mlxsw_sp_nexthop_group *nh_group;
};
struct mlxsw_sp_fib {
struct delayed_work dw;
struct mlxsw_sp_port *mlxsw_sp_port;
unsigned char ha[ETH_ALEN];
+ struct list_head nexthop_list; /* list of nexthops using
+ * this neigh entry
+ */
};
static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
neigh_entry->rif = rif;
neigh_entry->n = n;
INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
+ INIT_LIST_HEAD(&neigh_entry->nexthop_list);
return neigh_entry;
}
mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
}
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ bool removing);
+
static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
{
struct mlxsw_sp_neigh_entry *neigh_entry =
} else {
neigh_entry->offloaded = true;
}
+ mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
} else if (removing) {
mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
neigh_entry->rif,
} else {
neigh_entry->offloaded = false;
}
+ mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
}
neigh_release(n);
rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
}
+struct mlxsw_sp_nexthop {
+ struct list_head neigh_list_node; /* member of neigh entry list */
+ struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
+ * this belongs to
+ */
+ u8 should_offload:1, /* set indicates this neigh is connected and
+ * should be put to KVD linear area of this group.
+ */
+ offloaded:1, /* set in case the neigh is actually put into
+ * KVD linear area of this group.
+ */
+ update:1; /* set indicates that MAC of this neigh should be
+ * updated in HW
+ */
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+};
+
+struct mlxsw_sp_nexthop_group {
+ struct list_head list; /* node in mlxsw->router.nexthop_group_list */
+ struct list_head fib_list; /* list of fib entries that use this group */
+ u8 adj_index_valid:1;
+ u32 adj_index;
+ u16 ecmp_size;
+ u16 count;
+ struct mlxsw_sp_nexthop nexthops[0];
+};
+
+static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_vr *vr,
+ u32 adj_index, u16 ecmp_size,
+ u32 new_adj_index,
+ u16 new_ecmp_size)
+{
+ char raleu_pl[MLXSW_REG_RALEU_LEN];
+
+ mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id,
+ adj_index, ecmp_size,
+ new_adj_index, new_ecmp_size);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
+}
+
+static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ u32 old_adj_index, u16 old_ecmp_size)
+{
+ struct mlxsw_sp_fib_entry *fib_entry;
+ struct mlxsw_sp_vr *vr = NULL;
+ int err;
+
+ list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+ if (vr == fib_entry->vr)
+ continue;
+ vr = fib_entry->vr;
+ err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
+ old_adj_index,
+ old_ecmp_size,
+ nh_grp->adj_index,
+ nh_grp->ecmp_size);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+ char ratr_pl[MLXSW_REG_RATR_LEN];
+
+ mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+ true, adj_index, neigh_entry->rif);
+ mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int
+mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ u32 adj_index = nh_grp->adj_index; /* base */
+ struct mlxsw_sp_nexthop *nh;
+ int i;
+ int err;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+
+ if (!nh->should_offload) {
+ nh->offloaded = 0;
+ continue;
+ }
+
+ if (nh->update) {
+ err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
+ adj_index, nh);
+ if (err)
+ return err;
+ nh->update = 0;
+ nh->offloaded = 1;
+ }
+ adj_index++;
+ }
+ return 0;
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry);
+
+static int
+mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ struct mlxsw_sp_fib_entry *fib_entry;
+ int err;
+
+ list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+ err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static void
+mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ struct mlxsw_sp_nexthop *nh;
+ bool offload_change = false;
+ u32 adj_index;
+ u16 ecmp_size = 0;
+ bool old_adj_index_valid;
+ u32 old_adj_index;
+ u16 old_ecmp_size;
+ int ret;
+ int i;
+ int err;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+
+ if (nh->should_offload ^ nh->offloaded) {
+ offload_change = true;
+ if (nh->should_offload)
+ nh->update = 1;
+ }
+ if (nh->should_offload)
+ ecmp_size++;
+ }
+ if (!offload_change) {
+ /* Nothing was added or removed, so no need to reallocate. Just
+ * update MAC on existing adjacency indexes.
+ */
+ err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+ goto set_trap;
+ }
+ return;
+ }
+ if (!ecmp_size)
+ /* No neigh of this group is connected so we just set
+ * the trap and let everthing flow through kernel.
+ */
+ goto set_trap;
+
+ ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
+ if (ret < 0) {
+ /* We ran out of KVD linear space, just set the
+ * trap and let everything flow through kernel.
+ */
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
+ goto set_trap;
+ }
+ adj_index = ret;
+ old_adj_index_valid = nh_grp->adj_index_valid;
+ old_adj_index = nh_grp->adj_index;
+ old_ecmp_size = nh_grp->ecmp_size;
+ nh_grp->adj_index_valid = 1;
+ nh_grp->adj_index = adj_index;
+ nh_grp->ecmp_size = ecmp_size;
+ err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+ goto set_trap;
+ }
+
+ if (!old_adj_index_valid) {
+ /* The trap was set for fib entries, so we have to call
+ * fib entry update to unset it and use adjacency index.
+ */
+ err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
+ goto set_trap;
+ }
+ return;
+ }
+
+ err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
+ old_adj_index, old_ecmp_size);
+ mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
+ goto set_trap;
+ }
+ return;
+
+set_trap:
+ old_adj_index_valid = nh_grp->adj_index_valid;
+ nh_grp->adj_index_valid = 0;
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+ nh->offloaded = 0;
+ }
+ err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+ if (err)
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
+ if (old_adj_index_valid)
+ mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
+}
+
+static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
+ bool removing)
+{
+ if (!removing && !nh->should_offload)
+ nh->should_offload = 1;
+ else if (removing && nh->offloaded)
+ nh->should_offload = 0;
+ nh->update = 1;
+}
+
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ bool removing)
+{
+ struct mlxsw_sp_nexthop *nh;
+
+ /* Take RTNL mutex here to prevent lists from changes */
+ rtnl_lock();
+ list_for_each_entry(nh, &neigh_entry->nexthop_list,
+ neigh_list_node) {
+ __mlxsw_sp_nexthop_neigh_update(nh, removing);
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+ }
+ rtnl_unlock();
+}
+
+static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ struct mlxsw_sp_nexthop *nh,
+ struct fib_nh *fib_nh)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+ u32 gwip = ntohl(fib_nh->nh_gw);
+ struct net_device *dev = fib_nh->nh_dev;
+ struct neighbour *n;
+ u8 nud_state;
+
+ neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
+ sizeof(gwip), dev);
+ if (!neigh_entry) {
+ __be32 gwipn = htonl(gwip);
+
+ n = neigh_create(&arp_tbl, &gwipn, dev);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ neigh_event_send(n, NULL);
+ neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
+ sizeof(gwip), dev);
+ if (!neigh_entry) {
+ neigh_release(n);
+ return -EINVAL;
+ }
+ } else {
+ /* Take a reference of neigh here ensuring that neigh would
+ * not be detructed before the nexthop entry is finished.
+ * The second branch takes the reference in neith_create()
+ */
+ n = neigh_entry->n;
+ neigh_clone(n);
+ }
+ nh->nh_grp = nh_grp;
+ nh->neigh_entry = neigh_entry;
+ list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ read_unlock_bh(&n->lock);
+ __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID));
+
+ return 0;
+}
+
+static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+
+ list_del(&nh->neigh_list_node);
+ neigh_release(neigh_entry->n);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+ struct mlxsw_sp_nexthop *nh;
+ struct fib_nh *fib_nh;
+ size_t alloc_size;
+ int i;
+ int err;
+
+ alloc_size = sizeof(*nh_grp) +
+ fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
+ nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+ if (!nh_grp)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&nh_grp->fib_list);
+ nh_grp->count = fi->fib_nhs;
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+ fib_nh = &fi->fib_nh[i];
+ err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
+ if (err)
+ goto err_nexthop_init;
+ }
+ list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list);
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ return nh_grp;
+
+err_nexthop_init:
+ for (i--; i >= 0; i--)
+ mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+ kfree(nh_grp);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ struct mlxsw_sp_nexthop *nh;
+ int i;
+
+ list_del(&nh_grp->list);
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+ }
+ kfree(nh_grp);
+}
+
+static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
+ struct fib_info *fi)
+{
+ int i;
+
+ for (i = 0; i < fi->fib_nhs; i++) {
+ struct fib_nh *fib_nh = &fi->fib_nh[i];
+ u32 gwip = ntohl(fib_nh->nh_gw);
+
+ if (memcmp(nh->neigh_entry->key.addr,
+ &gwip, sizeof(u32)) == 0 &&
+ nh->neigh_entry->key.dev == fib_nh->nh_dev)
+ return true;
+ }
+ return false;
+}
+
+static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp,
+ struct fib_info *fi)
+{
+ int i;
+
+ if (nh_grp->count != fi->fib_nhs)
+ return false;
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+ if (!mlxsw_sp_nexthop_match(nh, fi))
+ return false;
+ }
+ return true;
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+
+ list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list,
+ list) {
+ if (mlxsw_sp_nexthop_group_match(nh_grp, fi))
+ return nh_grp;
+ }
+ return NULL;
+}
+
+static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ struct fib_info *fi)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+
+ nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi);
+ if (!nh_grp) {
+ nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
+ if (IS_ERR(nh_grp))
+ return PTR_ERR(nh_grp);
+ }
+ list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
+ fib_entry->nh_group = nh_grp;
+ return 0;
+}
+
+static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+ list_del(&fib_entry->nexthop_group_node);
+ if (!list_empty(&nh_grp->fib_list))
+ return;
+ mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
+}
+
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
char rgcr_pl[MLXSW_REG_RGCR_LEN];
{
int err;
+ INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
err = __mlxsw_sp_router_init(mlxsw_sp);
if (err)
return err;
__mlxsw_sp_router_fini(mlxsw_sp);
}
+static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
+ u32 *p_dip = (u32 *) fib_entry->key.addr;
+ struct mlxsw_sp_vr *vr = fib_entry->vr;
+ enum mlxsw_reg_ralue_trap_action trap_action;
+ u16 trap_id = 0;
+ u32 adjacency_index = 0;
+ u16 ecmp_size = 0;
+
+ /* In case the nexthop group adjacency index is valid, use it
+ * with provided ECMP size. Otherwise, setup trap and pass
+ * traffic to kernel.
+ */
+ if (fib_entry->nh_group->adj_index_valid) {
+ trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
+ adjacency_index = fib_entry->nh_group->adj_index;
+ ecmp_size = fib_entry->nh_group->ecmp_size;
+ } else {
+ trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
+ trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
+ }
+
+ mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
+ fib_entry->key.prefix_len, *p_dip);
+ mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
+ adjacency_index, ecmp_size);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op)
{
switch (fib_entry->type) {
case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
- return -EINVAL;
+ return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
fib_entry->rif = r->rif;
return 0;
}
- return -EINVAL;
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+ return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
+}
+
+static void
+mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
+ return;
+ mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
}
static int
return 0;
err_alloc_info:
+ mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
err_fib4_entry_init:
mlxsw_sp_fib_entry_destroy(fib_entry);
err_fib_entry_create:
err_fib_entry_add:
mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
err_fib_entry_insert:
+ mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
mlxsw_sp_fib_entry_destroy(fib_entry);
mlxsw_sp_vr_put(mlxsw_sp, vr);
return err;
}
mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry);
mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
+ mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
mlxsw_sp_fib_entry_destroy(fib_entry);
mlxsw_sp_vr_put(mlxsw_sp, vr);
return 0;