diff options
| author | Lexi Winter <ivy@FreeBSD.org> | 2025-08-03 20:09:33 +0100 |
|---|---|---|
| committer | Lexi Winter <ivy@FreeBSD.org> | 2025-08-03 20:09:33 +0100 |
| commit | d6e02a423d65d897bbabeabc190cb054e6731842 (patch) | |
| tree | aa059aa8e9f5d4f505e9fa1c0390d50d7e27a665 | |
| parent | d1095367eb5116a0b9a1b9fb6eb913a77eeb5e5d (diff) | |
fdb wiplf/dev/bridge-pctrie
| -rw-r--r-- | sys/conf/files | 1 | ||||
| -rw-r--r-- | sys/modules/if_bridge/Makefile | 3 | ||||
| -rw-r--r-- | sys/net/bridge_fdb.c | 207 | ||||
| -rw-r--r-- | sys/net/bridge_fdb.h | 239 | ||||
| -rw-r--r-- | sys/net/if_bridge.c | 61 |
5 files changed, 497 insertions, 14 deletions
diff --git a/sys/conf/files b/sys/conf/files index b7c19fae0b8e..74935b8b90b3 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4154,6 +4154,7 @@ net/ieee8023ad_lacp.c optional lagg net/if.c standard net/ifq.c standard net/if_bridge.c optional bridge inet | if_bridge inet +net/bridge_fdb.c optional bridge inet | if_bridge inet net/if_clone.c standard net/if_dead.c standard net/if_disc.c optional disc diff --git a/sys/modules/if_bridge/Makefile b/sys/modules/if_bridge/Makefile index 6fb47110c33f..5c0e56ea0a1d 100644 --- a/sys/modules/if_bridge/Makefile +++ b/sys/modules/if_bridge/Makefile @@ -1,5 +1,6 @@ .PATH: ${SRCTOP}/sys/net KMOD= if_bridge -SRCS= if_bridge.c opt_inet.h opt_inet6.h opt_carp.h +SRCS= if_bridge.c bridge_fdb.c opt_inet.h opt_inet6.h opt_carp.h +CWARNFLAGS.clang+= -Wthread-safety .include <bsd.kmod.mk> diff --git a/sys/net/bridge_fdb.c b/sys/net/bridge_fdb.c new file mode 100644 index 000000000000..e678588e78ec --- /dev/null +++ b/sys/net/bridge_fdb.c @@ -0,0 +1,207 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR ISC + * + * Copyright (c) 2025 Lexi Winter. + */ + +#include <sys/param.h> +#include <sys/pctrie.h> + +#include <vm/uma.h> + +#include <net/bridge_fdb.h> +#include <net/ethernet.h> +#include <net/vnet.h> + +/* UMA zone for host entries */ +VNET_DEFINE_STATIC(uma_zone_t, fdb_host_zone); +#define V_fdb_host_zone VNET(fdb_host_zone) + +/* + * Our pctrie + */ + +VNET_DEFINE_STATIC(uma_zone_t, fdb_node_zone); +#define V_fdb_node_zone VNET(fdb_node_zone) + +VNET_DEFINE_STATIC(smr_t, fdb_smr); +#define V_fdb_smr VNET(fdb_smr) + +static void *fdb_node_alloc(struct pctrie *); +static void fdb_node_free(struct pctrie *, void *); + +PCTRIE_DEFINE_SMR(FDB, __fdb_host, __fdh_key, fdb_node_alloc, fdb_node_free, + V_fdb_smr); + +static void * +fdb_node_alloc(struct pctrie *trie __unused) +{ + return (uma_zalloc_smr(V_fdb_node_zone, M_NOWAIT | M_ZERO)); +} + +static void +fdb_node_free(struct pctrie *trie __unused, void *addr) +{ + uma_zfree_smr(V_fdb_node_zone, addr); +} + +void +fdb_vnet_init(void) +{ + V_fdb_node_zone = uma_zcreate("fdb node", pctrie_node_size(), + NULL, NULL, pctrie_zone_init, NULL, + PCTRIE_PAD, UMA_ZONE_VM | UMA_ZONE_SMR); + V_fdb_smr = uma_zone_get_smr(V_fdb_node_zone); + + V_fdb_host_zone = uma_zcreate("fdb host", + sizeof(fdb_host_t), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); +} + +void +fdb_vnet_fini(void) +{ + uma_zdestroy(V_fdb_node_zone); + uma_zdestroy(V_fdb_host_zone); +} + +void +fdb_create(fdb_t *fdb) +{ + pctrie_init(&fdb->__fdb_trie); + mtx_init(&fdb->__fdb_mtx, "fdb", NULL, MTX_DEF); +} + +void +fdb_destroy(fdb_t *fdb __unused) +{ + mtx_destroy(&fdb->__fdb_mtx); +} + +int +fdb_find(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan, + fdb_host_t **phost) +{ + fdb_host_t *host; + + NET_EPOCH_ASSERT(); + + host = FDB_PCTRIE_LOOKUP(&fdb->__fdb_trie, fdb_key(addr, vlan)); + if (host != NULL) { + *phost = host; + return (0); + } + + return (ENOENT); +} + +int +fdb_add_or_replace(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan, + void *udata, uint8_t flags) +{ + fdb_host_t *host; + fdb_key_t key; + int ret; + + key = fdb_key(addr, vlan); + + fdb_lock(fdb); + + /* See if we have an existing node */ + host = FDB_PCTRIE_LOOKUP_UNLOCKED(&fdb->__fdb_trie, key); + if (host != NULL) { + /* + * Update it and return. Although we're synchronised against + * concurrent updates, use an atomic store to protect unlocked + * readers. + */ + atomic_set_ptr(&host->__fdh_udata, (uintptr_t)udata); + atomic_set_8(&host->__fdh_flags, flags); + + fdb_unlock(fdb); + return (0); + } + + /* Otherwise, insert a new one */ + host = uma_zalloc(V_fdb_host_zone, M_NOWAIT | M_ZERO); + if (host == NULL) { + fdb_unlock(fdb); + return (ENOMEM); + } + + host->__fdh_key = key; + host->__fdh_udata = (uintptr_t)udata; + host->__fdh_flags = flags; + + ret = FDB_PCTRIE_INSERT(&fdb->__fdb_trie, host); + if (ret != 0) { + uma_zfree(V_fdb_host_zone, host); + fdb_unlock(fdb); + return (ret); + } + + atomic_add_32(&fdb->__fdb_curaddr, 1); + fdb_unlock(fdb); +printf("fdb_add_or_replace: returning okay\n"); + return (0); +} + +int +fdb_remove(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan) +{ + struct pctrie_iter it; + fdb_host_t *host; + + fdb_lock(fdb); + + pctrie_iter_init(&it, &fdb->__fdb_trie); + host = FDB_PCTRIE_ITER_LOOKUP(&it, fdb_key(addr, vlan)); + + if (host == NULL) { + fdb_unlock(fdb); + return (ENOENT); + } + + FDB_PCTRIE_ITER_REMOVE(&it); + atomic_subtract_32(&fdb->__fdb_curaddr, 1); + fdb_unlock(fdb); + return (0); +} + +void +fdb_remove_all(fdb_t *fdb, const uint8_t *addr) +{ + struct pctrie_iter it; + fdb_host_t *host; + + fdb_lock(fdb); + + /* Get an iterator to the first host >= the one we want */ + pctrie_iter_init(&it, &fdb->__fdb_trie); + host = FDB_PCTRIE_ITER_LOOKUP_GE(&it, fdb_key(addr, 0)); + + /* Iterate while the host matches */ + while (host != NULL) { + if (memcmp(addr, &host->__fdh_key, ETHER_ADDR_LEN) != 0) + break; + + FDB_PCTRIE_ITER_REMOVE(&it); + host = FDB_PCTRIE_ITER_NEXT(&it); + atomic_subtract_32(&fdb->__fdb_curaddr, 1); + } + + fdb_unlock(fdb); +} + +void +fdb_iter_init(fdb_t *fdb, fdb_iter_t *iter) +{ + fdb_assert_locked(fdb); + pctrie_iter_init(iter, &fdb->__fdb_trie); +} + +fdb_host_t * +fdb_iter_next(fdb_iter_t *iter) +{ + return FDB_PCTRIE_ITER_NEXT(iter); +} diff --git a/sys/net/bridge_fdb.h b/sys/net/bridge_fdb.h new file mode 100644 index 000000000000..668856ff24f6 --- /dev/null +++ b/sys/net/bridge_fdb.h @@ -0,0 +1,239 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause OR ISC + * + * Copyright (c) 2025 Lexi Winter. + * + * A forwarding table suitable for use in an Ethernet bridge. + * + * An fdb stores (addr,vlan) pairs associated with a bridge, along with an + * associated data pointer. When a vlan id is provided, the fdb implements + * IVL; if an SVL fdb is required, pass the vlan id as 0. + * + * The lifetime of host entries is managed using the net epoch, therefore all + * operations on the table must be done inside the net epoch. Returned data + * (e.g., host entries or iterators) are valid until the end of the epoch. + */ + +#ifndef _NET_BRIDGE_FDB_H_ +#define _NET_BRIDGE_FDB_H_ + +/* + * fdb host entry flags + */ +#define FDH_STATIC (1U << 0) /* entry does not expire */ + +#ifdef _KERNEL + +#include <sys/param.h> +#include <sys/cdefs.h> +#include <sys/epoch.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/pctrie.h> +#include <sys/systm.h> + +#include <machine/atomic.h> + +#include <net/ethernet.h> + +/* + * Initialise and finalise the fdb subsystem for a particular vnet. + */ +void fdb_vnet_init(void); +void fdb_vnet_fini(void); + +/* + * The lookup key for host entries. This is a 6-byte Ethernet address followed + * by a 2-byte vlan id. Do not construct keys manually, use bridge_fdb_key(). + */ +typedef uint64_t fdb_key_t; + +/* Create a key from an address and a vlan id. */ +[[nodiscard]] static __inline __unused fdb_key_t +fdb_key(const uint8_t *addr, ether_vlanid_t vlan) +{ + fdb_key_t key; + uint8_t *pkey; + + _Static_assert((ETHER_ADDR_LEN + sizeof(vlan)) == sizeof(key), + "fdb key has the wrong size"); + + pkey = (uint8_t *)&key; + + memcpy(pkey, addr, ETHER_ADDR_LEN); + + /* + * We don't really need to byte-swap the vlan id, but doing so means + * they're stored in ascending order, which makes things a bit nicer + * for userland tools. + */ + vlan = htons(vlan); + memcpy(pkey + ETHER_ADDR_LEN, &vlan, sizeof(vlan)); + + return key; +} + +/* + * An fdb host entry. The contents of this structure are private and should + * not be inspected or modified by the user. + */ +typedef struct __fdb_host { + /* private */ + fdb_key_t __fdh_key; /* pctrie key */ + uint8_t __fdh_flags; /* address flags */ + uintptr_t __fdh_udata; /* user data */ + uint32_t __fdh_expire; /* expiration time */ +} fdb_host_t; + +/* Fetch the user data for an fdb host */ +#define fdb_host_udata(__fdh) (_Generic((__fdh), \ + const fdb_host_t *: \ + (const void *)atomic_load_ptr(&__fdh->__fdh_udata), \ + fdb_host_t *: \ + (void *)atomic_load_ptr(&__fdh->__fdh_udata))) + +/* Get the Ethernet address from an fdb entry */ +[[nodiscard]] static __inline __unused const uint8_t * +fdb_host_addr(const fdb_host_t *host) +{ + NET_EPOCH_ASSERT(); + + return (const uint8_t *)&host->__fdh_key; +} + +/* Get the flags from an fdb entry */ +[[nodiscard]] static __inline __unused uint8_t +fdb_host_flags(const fdb_host_t *__host) +{ + return atomic_load_8(&__host->__fdh_flags); +} + +/* Get the VLAN ID from an fdb entry */ +[[nodiscard]] static __inline __unused ether_vlanid_t +fdb_host_vid(const fdb_host_t *host) +{ + const uint8_t *pkey; + ether_vlanid_t vid; + + NET_EPOCH_ASSERT(); + + pkey = (const uint8_t *)&host->__fdh_key + ETHER_ADDR_LEN; + vid = (ether_vlanid_t)pkey[0] << 8; + vid |= (ether_vlanid_t)pkey[1]; + + return (ntohs(vid)); +} + +/* + * An fdb instance. Must be initialised using bridge_fdb_create() and + * finalised using bridge_fdb_destroy(). + */ +typedef struct fdb { + uint32_t fdb_maxaddr; /* maximum number of hosts */ + uint32_t fdb_timeout; /* how long until a host expires */ + + /* private */ + struct pctrie __fdb_trie; /* the host table */ + struct mtx __fdb_mtx; /* write lock */ + uint32_t __fdb_curaddr; /* current number of hosts */ +} __lockable fdb_t; + +/* Create a new fdb */ +void fdb_create(fdb_t *); + +/* Destroy an fdb, releasing all of its hosts */ +void fdb_destroy(fdb_t *); + +/* Return the number of hosts in an fdb */ +static __inline __unused uint32_t +fdb_size(fdb_t *__fdb) +{ + return atomic_load_32(&__fdb->__fdb_curaddr); +} + +/* + * Operations on the host table. + */ + +/* + * Lock the fdb. The lock is only used for writing, so locking doesn't affect + * readers. This means any updates to the fdb (or its hosts) must be atomic, + * even while the fdb is locked. + */ + +static __inline __unused void +fdb_lock(fdb_t *__fdb) + __locks_exclusive(*__fdb) + __lock_annotate(no_thread_safety_analysis) +{ + mtx_lock(&__fdb->__fdb_mtx); +} + +static __inline __unused void +fdb_unlock(fdb_t *__fdb) + __unlocks(*__fdb) + __lock_annotate(no_thread_safety_analysis) +{ + mtx_unlock(&__fdb->__fdb_mtx); +} + +/* Assert the fdb is locked by the current thread, or panic */ +static __inline __unused void +fdb_assert_locked(fdb_t *__fdb) + __asserts_exclusive(__fdb) +{ + mtx_assert(&__fdb->__fdb_mtx, MA_OWNED); +} + +/* + * Add a new fdb entry or, if an entry for this (addr,vlan) already exists, + * update it. + * + * Returns 0 on success, or an errno value on failure. + */ +int fdb_add_or_replace(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan, + void *udata, uint8_t flags); + +/* + * Find an existing host entry and return it into 'host'. + * Must be called inside NET_EPOCH. + * + * Returns 0 on success, or one of the following errors: + * ENOENT - The host was not found in the fdb. + */ +int fdb_find(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan, + fdb_host_t **host); + +/* + * Find or create a host entry and return it into 'host'. + * Must be called inside NET_EPOCH. + * + * Returns 0 on success, or one of the following errors: + * ENOMEM - fdb could not allocate memory for the new node. + */ +int fdb_find_or_create(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan); + +/* + * Remove a host on a specific vlan. + * + * Returns 0 on success, or one of the following errors: + * ENOENT - the fdb entry was not found. + */ +int fdb_remove(fdb_t *fdb, const uint8_t *addr, ether_vlanid_t vlan); + +/* Remove all hosts with this address on any vlan. */ +void fdb_remove_all(fdb_t *fdb, const uint8_t *addr); + +/* + * Iterate the fdb host table. Start by calling fdb_iter_init(), then call + * fdb_iter_next() to retrieve each entry. After reaching the end of the + * table, fdb_iter_next() returns NULL. + */ + +typedef struct pctrie_iter fdb_iter_t; + +void fdb_iter_init(fdb_t *, fdb_iter_t *); +[[nodiscard]] fdb_host_t *fdb_iter_next(fdb_iter_t *); + +#endif /* _KERNEL */ +#endif /* _NET_BRIDGE_FDB_H_ */ diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 0a35fb4095fb..8ed290d833d5 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -130,6 +130,7 @@ #endif #include <machine/in_cksum.h> #include <netinet/if_ether.h> +#include <net/bridge_fdb.h> #include <net/bridgestp.h> #include <net/if_bridgevar.h> #include <net/if_llc.h> @@ -292,6 +293,7 @@ struct bridge_softc { CK_LIST_HEAD(, bridge_iflist) sc_iflist; /* member interface list */ CK_LIST_HEAD(, bridge_rtnode) *sc_rthash; /* our forwarding table */ CK_LIST_HEAD(, bridge_rtnode) sc_rtlist; /* list version of above */ + fdb_t sc_fdb; /* forwarding database */ uint32_t sc_rthash_key; /* key for hash */ CK_LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */ struct bstp_state sc_stp; /* STP state */ @@ -656,6 +658,7 @@ static const char bridge_name[] = "bridge"; static void vnet_bridge_init(const void *unused __unused) { + fdb_vnet_init(); V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode", sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL, @@ -675,6 +678,7 @@ VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, static void vnet_bridge_uninit(const void *unused __unused) { + fdb_vnet_fini(); ifc_detach_cloner(V_bridge_cloner); V_bridge_cloner = NULL; @@ -841,6 +845,7 @@ bridge_clone_create(struct if_clone *ifc, char *name, size_t len, /* Initialize our routing table. */ bridge_rtable_init(sc); + fdb_create(&sc->sc_fdb); callout_init_mtx(&sc->sc_brcallout, &sc->sc_rt_mtx, 0); @@ -929,6 +934,7 @@ bridge_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) /* Tear down the routing table. */ bridge_rtable_fini(sc); + fdb_destroy(&sc->sc_fdb); BRIDGE_UNLOCK(sc); @@ -1710,19 +1716,27 @@ static int bridge_ioctl_rts(struct bridge_softc *sc, void *arg) { struct ifbaconf *bac = arg; - struct bridge_rtnode *brt; struct ifbareq bareq; + fdb_iter_t it; + fdb_host_t *host; char *buf, *outbuf; - int count, buflen, len, error = 0; + uint32_t count; + int buflen, len, error = 0; if (bac->ifbac_len == 0) return (0); - count = 0; - CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) - count++; - buflen = sizeof(bareq) * count; + /* + * Note we don't lock the fdb here, which means the count can change + * between when we call fdb_size() and when we actually iterate. + * If we get too many hosts, just drop the extra ones, and if we get + * fewer than expected, reduce the buffer that we copy out. + */ + + count = fdb_size(&sc->sc_fdb); +printf("bridge_ioctl_rts: count=%u\n", (unsigned)count); + buflen = sizeof(bareq) * count; outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO); if (outbuf == NULL) return (ENOMEM); @@ -1731,26 +1745,43 @@ bridge_ioctl_rts(struct bridge_softc *sc, void *arg) buf = outbuf; len = min(bac->ifbac_len, buflen); bzero(&bareq, sizeof(bareq)); - CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { + + fdb_lock(&sc->sc_fdb); + fdb_iter_init(&sc->sc_fdb, &it); +printf("bridge_ioctl_rts: iterating\n"); + while ((host = fdb_iter_next(&it)) != NULL) { + struct bridge_iflist *bif; + +printf("bridge_ioctl_rts: got a host\n"); + + /* Ran out of space in the output buffer */ if (len < sizeof(bareq)) - goto out; - strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname, + break; + + bif = fdb_host_udata(host); + + strlcpy(bareq.ifba_ifsname, if_name(bif->bif_ifp), sizeof(bareq.ifba_ifsname)); - memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); - bareq.ifba_vlan = brt->brt_vlan; + memcpy(bareq.ifba_dst, fdb_host_addr(host), ETHER_ADDR_LEN); + bareq.ifba_vlan = fdb_host_vid(host); + +#if 0 /* XXXLW */ if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && time_uptime < brt->brt_expire) bareq.ifba_expire = brt->brt_expire - time_uptime; else bareq.ifba_expire = 0; - bareq.ifba_flags = brt->brt_flags; +#endif + bareq.ifba_flags = fdb_host_flags(host); + /* Add this host to the output buffer */ memcpy(buf, &bareq, sizeof(bareq)); count++; buf += sizeof(bareq); len -= sizeof(bareq); } -out: + fdb_unlock(&sc->sc_fdb); + bac->ifbac_len = sizeof(bareq) * count; error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); free(outbuf, M_TEMP); @@ -1772,6 +1803,10 @@ bridge_ioctl_saddr(struct bridge_softc *sc, void *arg) return (EXTERROR(ENOENT, "Interface is not a bridge member")); } + error = fdb_add_or_replace(&sc->sc_fdb, req->ifba_dst, req->ifba_vlan, + bif, req->ifba_flags); +printf("bridge_ioctl_saddr: error=%d\n", error); + /* bridge_rtupdate() may acquire the lock. */ error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1, req->ifba_flags); |
