aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLexi Winter <ivy@FreeBSD.org>2025-08-03 20:09:33 +0100
committerLexi Winter <ivy@FreeBSD.org>2025-08-03 20:09:33 +0100
commitd6e02a423d65d897bbabeabc190cb054e6731842 (patch)
treeaa059aa8e9f5d4f505e9fa1c0390d50d7e27a665
parentd1095367eb5116a0b9a1b9fb6eb913a77eeb5e5d (diff)
-rw-r--r--sys/conf/files1
-rw-r--r--sys/modules/if_bridge/Makefile3
-rw-r--r--sys/net/bridge_fdb.c207
-rw-r--r--sys/net/bridge_fdb.h239
-rw-r--r--sys/net/if_bridge.c61
5 files changed, 497 insertions, 14 deletions
diff --git a/sys/conf/files b/sys/conf/files
index b7c19fae0b8e..74935b8b90b3 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4154,6 +4154,7 @@ net/ieee8023ad_lacp.c optional lagg
net/if.c standard
net/ifq.c standard
net/if_bridge.c optional bridge inet | if_bridge inet
+net/bridge_fdb.c optional bridge inet | if_bridge inet
net/if_clone.c standard
net/if_dead.c standard
net/if_disc.c optional disc
diff --git a/sys/modules/if_bridge/Makefile b/sys/modules/if_bridge/Makefile
index 6fb47110c33f..5c0e56ea0a1d 100644
--- a/sys/modules/if_bridge/Makefile
+++ b/sys/modules/if_bridge/Makefile
@@ -1,5 +1,6 @@
.PATH: ${SRCTOP}/sys/net
KMOD= if_bridge
-SRCS= if_bridge.c opt_inet.h opt_inet6.h opt_carp.h
+SRCS= if_bridge.c bridge_fdb.c opt_inet.h opt_inet6.h opt_carp.h
+CWARNFLAGS.clang+= -Wthread-safety
.include <bsd.kmod.mk>
diff --git a/sys/net/bridge_fdb.c b/sys/net/bridge_fdb.c
new file mode 100644
index 000000000000..e678588e78ec
--- /dev/null
+++ b/sys/net/bridge_fdb.c
@@ -0,0 +1,207 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause OR ISC
+ *
+ * Copyright (c) 2025 Lexi Winter.
+ */
+
+#include <sys/param.h>
+#include <sys/pctrie.h>
+
+#include <vm/uma.h>
+
+#include <net/bridge_fdb.h>
+#include <net/ethernet.h>
+#include <net/vnet.h>
+
+/* UMA zone for host entries */
+VNET_DEFINE_STATIC(uma_zone_t, fdb_host_zone);
+#define V_fdb_host_zone VNET(fdb_host_zone)
+
+/*
+ * Our pctrie
+ */
+
+VNET_DEFINE_STATIC(uma_zone_t, fdb_node_zone);
+#define V_fdb_node_zone VNET(fdb_node_zone)
+
+VNET_DEFINE_STATIC(smr_t, fdb_smr);
+#define V_fdb_smr VNET(fdb_smr)
+
+static void *fdb_node_alloc(struct pctrie *);
+static void fdb_node_free(struct pctrie *, void *);
+
+PCTRIE_DEFINE_SMR(FDB, __fdb_host, __fdh_key, fdb_node_alloc, fdb_node_free,
+ V_fdb_smr);
+
+static void *
+fdb_node_alloc(struct pctrie *trie __unused)
+{
+ return (uma_zalloc_smr(V_fdb_node_zone, M_NOWAIT | M_ZERO));
+}
+
+static void
+fdb_node_free(struct pctrie *trie __unused, void *addr)
+{
+ uma_zfree_smr(V_fdb_node_zone, addr);
+}
+
+void
+fdb_vnet_init(void)
+{
+ V_fdb_node_zone = uma_zcreate("fdb node", pctrie_node_size(),
+ NULL, NULL, pctrie_zone_init, NULL,
+ PCTRIE_PAD, UMA_ZONE_VM | UMA_ZONE_SMR);
+ V_fdb_smr = uma_zone_get_smr(V_fdb_node_zone);
+
+ V_fdb_host_zone = uma_zcreate("fdb host",
+ sizeof(fdb_host_t), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+}
+
+void
+fdb_vnet_fini(void)
+{
+ uma_zdestroy(V_fdb_node_zone);
+ uma_zdestroy(V_fdb_host_zone);
+}
+
+void
+fdb_create(fdb_t *fdb)
+{
+ pctrie_init(&fdb->__fdb_trie);
+ mtx_init(&fdb->__fdb_mtx, "fdb", NULL, MTX_DEF);
+}
+
+void
+fdb_destroy(fdb_t *fdb __unused)
+{
+ mtx_destroy(&fdb->__fdb_mtx);
+}
+
+int
+fdb_find(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan,
+ fdb_host_t **phost)
+{
+ fdb_host_t *host;
+
+ NET_EPOCH_ASSERT();
+
+ host = FDB_PCTRIE_LOOKUP(&fdb->__fdb_trie, fdb_key(addr, vlan));
+ if (host != NULL) {
+ *phost = host;
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+int
+fdb_add_or_replace(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan,
+ void *udata, uint8_t flags)
+{
+ fdb_host_t *host;
+ fdb_key_t key;
+ int ret;
+
+ key = fdb_key(addr, vlan);
+
+ fdb_lock(fdb);
+
+ /* See if we have an existing node */
+ host = FDB_PCTRIE_LOOKUP_UNLOCKED(&fdb->__fdb_trie, key);
+ if (host != NULL) {
+ /*
+ * Update it and return. Although we're synchronised against
+ * concurrent updates, use an atomic store to protect unlocked
+ * readers.
+ */
+ atomic_set_ptr(&host->__fdh_udata, (uintptr_t)udata);
+ atomic_set_8(&host->__fdh_flags, flags);
+
+ fdb_unlock(fdb);
+ return (0);
+ }
+
+ /* Otherwise, insert a new one */
+ host = uma_zalloc(V_fdb_host_zone, M_NOWAIT | M_ZERO);
+ if (host == NULL) {
+ fdb_unlock(fdb);
+ return (ENOMEM);
+ }
+
+ host->__fdh_key = key;
+ host->__fdh_udata = (uintptr_t)udata;
+ host->__fdh_flags = flags;
+
+ ret = FDB_PCTRIE_INSERT(&fdb->__fdb_trie, host);
+ if (ret != 0) {
+ uma_zfree(V_fdb_host_zone, host);
+ fdb_unlock(fdb);
+ return (ret);
+ }
+
+ atomic_add_32(&fdb->__fdb_curaddr, 1);
+ fdb_unlock(fdb);
+printf("fdb_add_or_replace: returning okay\n");
+ return (0);
+}
+
+int
+fdb_remove(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan)
+{
+ struct pctrie_iter it;
+ fdb_host_t *host;
+
+ fdb_lock(fdb);
+
+ pctrie_iter_init(&it, &fdb->__fdb_trie);
+ host = FDB_PCTRIE_ITER_LOOKUP(&it, fdb_key(addr, vlan));
+
+ if (host == NULL) {
+ fdb_unlock(fdb);
+ return (ENOENT);
+ }
+
+ FDB_PCTRIE_ITER_REMOVE(&it);
+ atomic_subtract_32(&fdb->__fdb_curaddr, 1);
+ fdb_unlock(fdb);
+ return (0);
+}
+
+void
+fdb_remove_all(fdb_t *fdb, const uint8_t *addr)
+{
+ struct pctrie_iter it;
+ fdb_host_t *host;
+
+ fdb_lock(fdb);
+
+ /* Get an iterator to the first host >= the one we want */
+ pctrie_iter_init(&it, &fdb->__fdb_trie);
+ host = FDB_PCTRIE_ITER_LOOKUP_GE(&it, fdb_key(addr, 0));
+
+ /* Iterate while the host matches */
+ while (host != NULL) {
+ if (memcmp(addr, &host->__fdh_key, ETHER_ADDR_LEN) != 0)
+ break;
+
+ FDB_PCTRIE_ITER_REMOVE(&it);
+ host = FDB_PCTRIE_ITER_NEXT(&it);
+ atomic_subtract_32(&fdb->__fdb_curaddr, 1);
+ }
+
+ fdb_unlock(fdb);
+}
+
+void
+fdb_iter_init(fdb_t *fdb, fdb_iter_t *iter)
+{
+ fdb_assert_locked(fdb);
+ pctrie_iter_init(iter, &fdb->__fdb_trie);
+}
+
+fdb_host_t *
+fdb_iter_next(fdb_iter_t *iter)
+{
+ return FDB_PCTRIE_ITER_NEXT(iter);
+}
diff --git a/sys/net/bridge_fdb.h b/sys/net/bridge_fdb.h
new file mode 100644
index 000000000000..668856ff24f6
--- /dev/null
+++ b/sys/net/bridge_fdb.h
@@ -0,0 +1,239 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause OR ISC
+ *
+ * Copyright (c) 2025 Lexi Winter.
+ *
+ * A forwarding table suitable for use in an Ethernet bridge.
+ *
+ * An fdb stores (addr,vlan) pairs associated with a bridge, along with an
+ * associated data pointer. When a vlan id is provided, the fdb implements
+ * IVL; if an SVL fdb is required, pass the vlan id as 0.
+ *
+ * The lifetime of host entries is managed using the net epoch, therefore all
+ * operations on the table must be done inside the net epoch. Returned data
+ * (e.g., host entries or iterators) are valid until the end of the epoch.
+ */
+
+#ifndef _NET_BRIDGE_FDB_H_
+#define _NET_BRIDGE_FDB_H_
+
+/*
+ * fdb host entry flags
+ */
+#define FDH_STATIC (1U << 0) /* entry does not expire */
+
+#ifdef _KERNEL
+
+#include <sys/param.h>
+#include <sys/cdefs.h>
+#include <sys/epoch.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/pctrie.h>
+#include <sys/systm.h>
+
+#include <machine/atomic.h>
+
+#include <net/ethernet.h>
+
+/*
+ * Initialise and finalise the fdb subsystem for a particular vnet.
+ */
+void fdb_vnet_init(void);
+void fdb_vnet_fini(void);
+
+/*
+ * The lookup key for host entries. This is a 6-byte Ethernet address followed
+ * by a 2-byte vlan id. Do not construct keys manually, use bridge_fdb_key().
+ */
+typedef uint64_t fdb_key_t;
+
+/* Create a key from an address and a vlan id. */
+[[nodiscard]] static __inline __unused fdb_key_t
+fdb_key(const uint8_t *addr, ether_vlanid_t vlan)
+{
+ fdb_key_t key;
+ uint8_t *pkey;
+
+ _Static_assert((ETHER_ADDR_LEN + sizeof(vlan)) == sizeof(key),
+ "fdb key has the wrong size");
+
+ pkey = (uint8_t *)&key;
+
+ memcpy(pkey, addr, ETHER_ADDR_LEN);
+
+ /*
+ * We don't really need to byte-swap the vlan id, but doing so means
+ * they're stored in ascending order, which makes things a bit nicer
+ * for userland tools.
+ */
+ vlan = htons(vlan);
+ memcpy(pkey + ETHER_ADDR_LEN, &vlan, sizeof(vlan));
+
+ return key;
+}
+
+/*
+ * An fdb host entry. The contents of this structure are private and should
+ * not be inspected or modified by the user.
+ */
+typedef struct __fdb_host {
+ /* private */
+ fdb_key_t __fdh_key; /* pctrie key */
+ uint8_t __fdh_flags; /* address flags */
+ uintptr_t __fdh_udata; /* user data */
+ uint32_t __fdh_expire; /* expiration time */
+} fdb_host_t;
+
+/* Fetch the user data for an fdb host */
+#define fdb_host_udata(__fdh) (_Generic((__fdh), \
+ const fdb_host_t *: \
+ (const void *)atomic_load_ptr(&__fdh->__fdh_udata), \
+ fdb_host_t *: \
+ (void *)atomic_load_ptr(&__fdh->__fdh_udata)))
+
+/* Get the Ethernet address from an fdb entry */
+[[nodiscard]] static __inline __unused const uint8_t *
+fdb_host_addr(const fdb_host_t *host)
+{
+ NET_EPOCH_ASSERT();
+
+ return (const uint8_t *)&host->__fdh_key;
+}
+
+/* Get the flags from an fdb entry */
+[[nodiscard]] static __inline __unused uint8_t
+fdb_host_flags(const fdb_host_t *__host)
+{
+ return atomic_load_8(&__host->__fdh_flags);
+}
+
+/* Get the VLAN ID from an fdb entry */
+[[nodiscard]] static __inline __unused ether_vlanid_t
+fdb_host_vid(const fdb_host_t *host)
+{
+ const uint8_t *pkey;
+ ether_vlanid_t vid;
+
+ NET_EPOCH_ASSERT();
+
+ pkey = (const uint8_t *)&host->__fdh_key + ETHER_ADDR_LEN;
+ vid = (ether_vlanid_t)pkey[0] << 8;
+ vid |= (ether_vlanid_t)pkey[1];
+
+ return (ntohs(vid));
+}
+
+/*
+ * An fdb instance. Must be initialised using bridge_fdb_create() and
+ * finalised using bridge_fdb_destroy().
+ */
+typedef struct fdb {
+ uint32_t fdb_maxaddr; /* maximum number of hosts */
+ uint32_t fdb_timeout; /* how long until a host expires */
+
+ /* private */
+ struct pctrie __fdb_trie; /* the host table */
+ struct mtx __fdb_mtx; /* write lock */
+ uint32_t __fdb_curaddr; /* current number of hosts */
+} __lockable fdb_t;
+
+/* Create a new fdb */
+void fdb_create(fdb_t *);
+
+/* Destroy an fdb, releasing all of its hosts */
+void fdb_destroy(fdb_t *);
+
+/* Return the number of hosts in an fdb */
+static __inline __unused uint32_t
+fdb_size(fdb_t *__fdb)
+{
+ return atomic_load_32(&__fdb->__fdb_curaddr);
+}
+
+/*
+ * Operations on the host table.
+ */
+
+/*
+ * Lock the fdb. The lock is only used for writing, so locking doesn't affect
+ * readers. This means any updates to the fdb (or its hosts) must be atomic,
+ * even while the fdb is locked.
+ */
+
+static __inline __unused void
+fdb_lock(fdb_t *__fdb)
+ __locks_exclusive(*__fdb)
+ __lock_annotate(no_thread_safety_analysis)
+{
+ mtx_lock(&__fdb->__fdb_mtx);
+}
+
+static __inline __unused void
+fdb_unlock(fdb_t *__fdb)
+ __unlocks(*__fdb)
+ __lock_annotate(no_thread_safety_analysis)
+{
+ mtx_unlock(&__fdb->__fdb_mtx);
+}
+
+/* Assert the fdb is locked by the current thread, or panic */
+static __inline __unused void
+fdb_assert_locked(fdb_t *__fdb)
+ __asserts_exclusive(__fdb)
+{
+ mtx_assert(&__fdb->__fdb_mtx, MA_OWNED);
+}
+
+/*
+ * Add a new fdb entry or, if an entry for this (addr,vlan) already exists,
+ * update it.
+ *
+ * Returns 0 on success, or an errno value on failure.
+ */
+int fdb_add_or_replace(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan,
+ void *udata, uint8_t flags);
+
+/*
+ * Find an existing host entry and return it into 'host'.
+ * Must be called inside NET_EPOCH.
+ *
+ * Returns 0 on success, or one of the following errors:
+ * ENOENT - The host was not found in the fdb.
+ */
+int fdb_find(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan,
+ fdb_host_t **host);
+
+/*
+ * Find or create a host entry and return it into 'host'.
+ * Must be called inside NET_EPOCH.
+ *
+ * Returns 0 on success, or one of the following errors:
+ * ENOMEM - fdb could not allocate memory for the new node.
+ */
+int fdb_find_or_create(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan);
+
+/*
+ * Remove a host on a specific vlan.
+ *
+ * Returns 0 on success, or one of the following errors:
+ * ENOENT - the fdb entry was not found.
+ */
+int fdb_remove(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan);
+
+/* Remove all hosts with this address on any vlan. */
+void fdb_remove_all(fdb_t *fdb, const uint8_t *addr);
+
+/*
+ * Iterate the fdb host table. Start by calling fdb_iter_init(), then call
+ * fdb_iter_next() to retrieve each entry. After reaching the end of the
+ * table, fdb_iter_next() returns NULL.
+ */
+
+typedef struct pctrie_iter fdb_iter_t;
+
+void fdb_iter_init(fdb_t *, fdb_iter_t *);
+[[nodiscard]] fdb_host_t *fdb_iter_next(fdb_iter_t *);
+
+#endif /* _KERNEL */
+#endif /* _NET_BRIDGE_FDB_H_ */
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 0a35fb4095fb..8ed290d833d5 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -130,6 +130,7 @@
#endif
#include <machine/in_cksum.h>
#include <netinet/if_ether.h>
+#include <net/bridge_fdb.h>
#include <net/bridgestp.h>
#include <net/if_bridgevar.h>
#include <net/if_llc.h>
@@ -292,6 +293,7 @@ struct bridge_softc {
CK_LIST_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
CK_LIST_HEAD(, bridge_rtnode) *sc_rthash; /* our forwarding table */
CK_LIST_HEAD(, bridge_rtnode) sc_rtlist; /* list version of above */
+ fdb_t sc_fdb; /* forwarding database */
uint32_t sc_rthash_key; /* key for hash */
CK_LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
struct bstp_state sc_stp; /* STP state */
@@ -656,6 +658,7 @@ static const char bridge_name[] = "bridge";
static void
vnet_bridge_init(const void *unused __unused)
{
+ fdb_vnet_init();
V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
@@ -675,6 +678,7 @@ VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
static void
vnet_bridge_uninit(const void *unused __unused)
{
+ fdb_vnet_fini();
ifc_detach_cloner(V_bridge_cloner);
V_bridge_cloner = NULL;
@@ -841,6 +845,7 @@ bridge_clone_create(struct if_clone *ifc, char *name, size_t len,
/* Initialize our routing table. */
bridge_rtable_init(sc);
+ fdb_create(&sc->sc_fdb);
callout_init_mtx(&sc->sc_brcallout, &sc->sc_rt_mtx, 0);
@@ -929,6 +934,7 @@ bridge_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
/* Tear down the routing table. */
bridge_rtable_fini(sc);
+ fdb_destroy(&sc->sc_fdb);
BRIDGE_UNLOCK(sc);
@@ -1710,19 +1716,27 @@ static int
bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
{
struct ifbaconf *bac = arg;
- struct bridge_rtnode *brt;
struct ifbareq bareq;
+ fdb_iter_t it;
+ fdb_host_t *host;
char *buf, *outbuf;
- int count, buflen, len, error = 0;
+ uint32_t count;
+ int buflen, len, error = 0;
if (bac->ifbac_len == 0)
return (0);
- count = 0;
- CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)
- count++;
- buflen = sizeof(bareq) * count;
+ /*
+ * Note we don't lock the fdb here, which means the count can change
+ * between when we call fdb_size() and when we actually iterate.
+ * If we get too many hosts, just drop the extra ones, and if we get
+ * fewer than expected, reduce the buffer that we copy out.
+ */
+
+ count = fdb_size(&sc->sc_fdb);
+printf("bridge_ioctl_rts: count=%u\n", (unsigned)count);
+ buflen = sizeof(bareq) * count;
outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
if (outbuf == NULL)
return (ENOMEM);
@@ -1731,26 +1745,43 @@ bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
buf = outbuf;
len = min(bac->ifbac_len, buflen);
bzero(&bareq, sizeof(bareq));
- CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
+
+ fdb_lock(&sc->sc_fdb);
+ fdb_iter_init(&sc->sc_fdb, &it);
+printf("bridge_ioctl_rts: iterating\n");
+ while ((host = fdb_iter_next(&it)) != NULL) {
+ struct bridge_iflist *bif;
+
+printf("bridge_ioctl_rts: got a host\n");
+
+ /* Ran out of space in the output buffer */
if (len < sizeof(bareq))
- goto out;
- strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
+ break;
+
+ bif = fdb_host_udata(host);
+
+ strlcpy(bareq.ifba_ifsname, if_name(bif->bif_ifp),
sizeof(bareq.ifba_ifsname));
- memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
- bareq.ifba_vlan = brt->brt_vlan;
+ memcpy(bareq.ifba_dst, fdb_host_addr(host), ETHER_ADDR_LEN);
+ bareq.ifba_vlan = fdb_host_vid(host);
+
+#if 0 /* XXXLW */
if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
time_uptime < brt->brt_expire)
bareq.ifba_expire = brt->brt_expire - time_uptime;
else
bareq.ifba_expire = 0;
- bareq.ifba_flags = brt->brt_flags;
+#endif
+ bareq.ifba_flags = fdb_host_flags(host);
+ /* Add this host to the output buffer */
memcpy(buf, &bareq, sizeof(bareq));
count++;
buf += sizeof(bareq);
len -= sizeof(bareq);
}
-out:
+ fdb_unlock(&sc->sc_fdb);
+
bac->ifbac_len = sizeof(bareq) * count;
error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);
free(outbuf, M_TEMP);
@@ -1772,6 +1803,10 @@ bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
return (EXTERROR(ENOENT, "Interface is not a bridge member"));
}
+ error = fdb_add_or_replace(&sc->sc_fdb, req->ifba_dst, req->ifba_vlan,
+ bif, req->ifba_flags);
+printf("bridge_ioctl_saddr: error=%d\n", error);
+
/* bridge_rtupdate() may acquire the lock. */
error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
req->ifba_flags);