aboutsummaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/conf/GENERIC2
-rw-r--r--sys/amd64/conf/GENERIC-NODEBUG31
-rw-r--r--sys/amd64/conf/MINIMAL2
-rw-r--r--sys/amd64/conf/MINIMAL-NODEBUG11
-rw-r--r--sys/arm/conf/GENERIC-NODEBUG31
-rw-r--r--sys/arm/conf/std.armv73
-rw-r--r--sys/arm64/arm64/machdep.c2
-rw-r--r--sys/arm64/arm64/pmap.c96
-rw-r--r--sys/arm64/conf/GENERIC-MMCCAM-NODEBUG14
-rw-r--r--sys/arm64/conf/GENERIC-NODEBUG31
-rw-r--r--sys/arm64/conf/std.arm642
-rw-r--r--sys/arm64/include/cpu.h3
-rw-r--r--sys/arm64/include/pmap.h2
-rw-r--r--sys/compat/freebsd32/freebsd32_syscall.h4
-rw-r--r--sys/compat/freebsd32/freebsd32_syscalls.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_sysent.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_systrace_args.c44
-rw-r--r--sys/conf/files1
-rw-r--r--sys/dev/cyapa/cyapa.c95
-rw-r--r--sys/dev/sound/pci/hda/hdac.c10
-rw-r--r--sys/dev/virtio/network/if_vtnet.c11
-rw-r--r--sys/i386/conf/GENERIC2
-rw-r--r--sys/i386/conf/GENERIC-NODEBUG33
-rw-r--r--sys/i386/conf/MINIMAL2
-rw-r--r--sys/i386/conf/MINIMAL-NODEBUG11
-rw-r--r--sys/kern/init_sysent.c2
-rw-r--r--sys/kern/kern_descrip.c2
-rw-r--r--sys/kern/kern_event.c139
-rw-r--r--sys/kern/kern_jail.c460
-rw-r--r--sys/kern/kern_jaildesc.c337
-rw-r--r--sys/kern/syscalls.c2
-rw-r--r--sys/kern/syscalls.master10
-rw-r--r--sys/kern/systrace_args.c44
-rw-r--r--sys/modules/sound/driver/hda/Makefile4
-rw-r--r--sys/netinet/tcp_sack.c21
-rw-r--r--sys/powerpc/conf/GENERIC2
-rw-r--r--sys/powerpc/conf/GENERIC-NODEBUG31
-rw-r--r--sys/powerpc/conf/GENERIC642
-rw-r--r--sys/powerpc/conf/GENERIC64-NODEBUG31
-rw-r--r--sys/powerpc/conf/GENERIC64LE2
-rw-r--r--sys/powerpc/conf/GENERIC64LE-NODEBUG31
-rw-r--r--sys/riscv/conf/GENERIC2
-rw-r--r--sys/riscv/conf/GENERIC-NODEBUG31
-rw-r--r--sys/sys/event.h19
-rw-r--r--sys/sys/file.h1
-rw-r--r--sys/sys/jail.h20
-rw-r--r--sys/sys/jaildesc.h85
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/syscall.h4
-rw-r--r--sys/sys/syscall.mk4
-rw-r--r--sys/sys/sysproto.h10
-rw-r--r--sys/sys/user.h4
52 files changed, 1334 insertions, 415 deletions
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index 786edc4125c9..c98cfce8613a 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -95,8 +95,6 @@ options RCTL # Resource limits
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# Kernel dump features.
options EKCD # Support for encrypted kernel dumps
diff --git a/sys/amd64/conf/GENERIC-NODEBUG b/sys/amd64/conf/GENERIC-NODEBUG
deleted file mode 100644
index 1939b0efd352..000000000000
--- a/sys/amd64/conf/GENERIC-NODEBUG
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# GENERIC-NODEBUG -- WITNESS and INVARIANTS free kernel configuration file
-# for FreeBSD/amd64
-#
-# This configuration file removes several debugging options, including
-# WITNESS and INVARIANTS checking, which are known to have significant
-# performance impact on running systems. When benchmarking new features
-# this kernel should be used instead of the standard GENERIC.
-# This kernel configuration should never appear outside of the HEAD
-# of the FreeBSD tree.
-#
-# For more information on this file, please read the config(5) manual page,
-# and/or the handbook section on Kernel Configuration Files:
-#
-# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-
-include GENERIC
-include "std.nodebug"
-
-ident GENERIC-NODEBUG
diff --git a/sys/amd64/conf/MINIMAL b/sys/amd64/conf/MINIMAL
index 0baf6d6431de..ec5ab2fcaee3 100644
--- a/sys/amd64/conf/MINIMAL
+++ b/sys/amd64/conf/MINIMAL
@@ -73,8 +73,6 @@ options INCLUDE_CONFIG_FILE # Include this file in kernel
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel
diff --git a/sys/amd64/conf/MINIMAL-NODEBUG b/sys/amd64/conf/MINIMAL-NODEBUG
deleted file mode 100644
index 7b7c22bbcaf6..000000000000
--- a/sys/amd64/conf/MINIMAL-NODEBUG
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# MINIMAL-NODEBUG -- Non-debug MINIMAL kernel.
-#
-# This is the MINIMAL equivalent to GENERIC-NODEBUG.
-
-#NO_UNIVERSE
-
-include MINIMAL
-include "std.nodebug"
-
-ident MINIMAL-NODEBUG
diff --git a/sys/arm/conf/GENERIC-NODEBUG b/sys/arm/conf/GENERIC-NODEBUG
deleted file mode 100644
index 0b3199245187..000000000000
--- a/sys/arm/conf/GENERIC-NODEBUG
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# GENERIC-NODEBUG -- WITNESS and INVARIANTS free kernel configuration file
-# for FreeBSD/arm
-#
-# This configuration file removes several debugging options, including
-# WITNESS and INVARIANTS checking, which are known to have significant
-# performance impact on running systems. When benchmarking new features
-# this kernel should be used instead of the standard GENERIC.
-# This kernel configuration should never appear outside of the HEAD
-# of the FreeBSD tree.
-#
-# For more information on this file, please read the config(5) manual page,
-# and/or the handbook section on Kernel Configuration Files:
-#
-# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-
-include GENERIC
-include "std.nodebug"
-
-ident GENERIC-NODEBUG
diff --git a/sys/arm/conf/std.armv7 b/sys/arm/conf/std.armv7
index 4ef60c331212..15d8304ae5f1 100644
--- a/sys/arm/conf/std.armv7
+++ b/sys/arm/conf/std.armv7
@@ -65,9 +65,6 @@ options KDB_TRACE # Print a stack trace for a panic.
options USB_DEBUG # Enable usb debug support code
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
-
# Optional extras, never enabled by default:
#options BOOTVERBOSE
#options DEBUG # May result in extreme spewage
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 627b02e82d34..47c701e8588c 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -858,7 +858,7 @@ initarm(struct arm64_bootparams *abp)
cninit();
set_ttbr0(abp->kern_ttbr0);
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
if (!valid)
panic("Invalid bus configuration: %s",
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 48b62442e68f..8a4395aa1c89 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -190,6 +190,8 @@ pt_entry_t __read_mostly pmap_gp_attr;
#define PMAP_SAN_PTE_BITS (ATTR_AF | ATTR_S1_XN | pmap_sh_attr | \
ATTR_KERN_GP | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | ATTR_S1_AP(ATTR_S1_AP_RW))
+static bool __read_mostly pmap_multiple_tlbi = false;
+
struct pmap_large_md_page {
struct rwlock pv_lock;
struct md_page pv_page;
@@ -1297,7 +1299,7 @@ pmap_bootstrap_dmap(vm_size_t kernlen)
}
}
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
bs_state.dmap_valid = true;
@@ -1399,7 +1401,7 @@ pmap_bootstrap(void)
/* And the l3 tables for the early devmap */
pmap_bootstrap_l3(VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE));
- cpu_tlb_flushID();
+ pmap_s1_invalidate_all_kernel();
#define alloc_pages(var, np) \
(var) = bs_state.freemempos; \
@@ -1723,6 +1725,51 @@ CPU_FEAT(feat_hafdbs, "Hardware management of the Access flag and dirty state",
pmap_dbm_check, pmap_dbm_has_errata, pmap_dbm_enable,
CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
+static cpu_feat_en
+pmap_multiple_tlbi_check(const struct cpu_feat *feat __unused, u_int midr)
+{
+ /*
+ * Cortex-A55 erratum 2441007 (Cat B rare)
+ * Present in all revisions
+ */
+ if (CPU_IMPL(midr) == CPU_IMPL_ARM &&
+ CPU_PART(midr) == CPU_PART_CORTEX_A55)
+ return (FEAT_DEFAULT_DISABLE);
+
+ /*
+ * Cortex-A76 erratum 1286807 (Cat B rare)
+ * Present in r0p0 - r3p0
+ * Fixed in r3p1
+ */
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A76,
+ 0, 0, 3, 0))
+ return (FEAT_DEFAULT_DISABLE);
+
+ /*
+ * Cortex-A510 erratum 2441009 (Cat B rare)
+ * Present in r0p0 - r1p1
+ * Fixed in r1p2
+ */
+ if (midr_check_var_part_range(midr, CPU_IMPL_ARM, CPU_PART_CORTEX_A510,
+ 0, 0, 1, 1))
+ return (FEAT_DEFAULT_DISABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
+}
+
+static bool
+pmap_multiple_tlbi_enable(const struct cpu_feat *feat __unused,
+ cpu_feat_errata errata_status, u_int *errata_list __unused,
+ u_int errata_count __unused)
+{
+ pmap_multiple_tlbi = true;
+ return (true);
+}
+
+CPU_FEAT(errata_multi_tlbi, "Multiple TLBI errata",
+ pmap_multiple_tlbi_check, NULL, pmap_multiple_tlbi_enable,
+ CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU);
+
/*
* Initialize the pmap module.
*
@@ -1876,9 +1923,17 @@ pmap_s1_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only)
r = TLBI_VA(va);
if (pmap == kernel_pmap) {
pmap_s1_invalidate_kernel(r, final_only);
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ pmap_s1_invalidate_kernel(r, final_only);
+ }
} else {
r |= ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
pmap_s1_invalidate_user(r, final_only);
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ pmap_s1_invalidate_user(r, final_only);
+ }
}
dsb(ish);
isb();
@@ -1920,12 +1975,24 @@ pmap_s1_invalidate_strided(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
end = TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_kernel(r, final_only);
+
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ for (r = start; r < end; r += TLBI_VA(stride))
+ pmap_s1_invalidate_kernel(r, final_only);
+ }
} else {
start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
start |= TLBI_VA(sva);
end |= TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_user(r, final_only);
+
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ for (r = start; r < end; r += TLBI_VA(stride))
+ pmap_s1_invalidate_user(r, final_only);
+ }
}
dsb(ish);
isb();
@@ -1961,6 +2028,19 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
pmap_s2_invalidate_range(pmap, sva, eva, final_only);
}
+void
+pmap_s1_invalidate_all_kernel(void)
+{
+ dsb(ishst);
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ if (pmap_multiple_tlbi) {
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ }
+ isb();
+}
+
/*
* Invalidates all cached intermediate- and final-level TLB entries for the
* given virtual address space.
@@ -1975,9 +2055,17 @@ pmap_s1_invalidate_all(pmap_t pmap)
dsb(ishst);
if (pmap == kernel_pmap) {
__asm __volatile("tlbi vmalle1is");
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ __asm __volatile("tlbi vmalle1is");
+ }
} else {
r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
__asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ if (pmap_multiple_tlbi) {
+ dsb(ish);
+ __asm __volatile("tlbi aside1is, %0" : : "r" (r));
+ }
}
dsb(ish);
isb();
@@ -7965,7 +8053,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
pa += L2_SIZE;
}
if ((old_l2e & ATTR_DESCR_VALID) != 0)
- pmap_s1_invalidate_all(kernel_pmap);
+ pmap_s1_invalidate_all_kernel();
else {
/*
* Because the old entries were invalid and the new
@@ -8056,7 +8144,7 @@ pmap_unmapbios(void *p, vm_size_t size)
}
}
if (preinit_map) {
- pmap_s1_invalidate_all(kernel_pmap);
+ pmap_s1_invalidate_all_kernel();
return;
}
diff --git a/sys/arm64/conf/GENERIC-MMCCAM-NODEBUG b/sys/arm64/conf/GENERIC-MMCCAM-NODEBUG
deleted file mode 100644
index b2e865129012..000000000000
--- a/sys/arm64/conf/GENERIC-MMCCAM-NODEBUG
+++ /dev/null
@@ -1,14 +0,0 @@
-#
-# GENERIC-MMCCAM-NODEBUG
-#
-# Custom kernel for arm64 plus MMCCAM as opposed to the prior MMC stack. It is
-# present to keep it building in tree since it wouldn't work in LINT. This
-# version without debugging features.
-#
-
-#NO_UNIVERSE
-
-include GENERIC-MMCCAM
-include "std.nodebug"
-
-ident GENERIC-MMCCAM-NODEBUG
diff --git a/sys/arm64/conf/GENERIC-NODEBUG b/sys/arm64/conf/GENERIC-NODEBUG
deleted file mode 100644
index 086942dfaab1..000000000000
--- a/sys/arm64/conf/GENERIC-NODEBUG
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# GENERIC-NODEBUG -- WITNESS and INVARIANTS free kernel configuration file
-# for FreeBSD/arm64
-#
-# This configuration file removes several debugging options, including
-# WITNESS and INVARIANTS checking, which are known to have significant
-# performance impact on running systems. When benchmarking new features
-# this kernel should be used instead of the standard GENERIC.
-# This kernel configuration should never appear outside of the HEAD
-# of the FreeBSD tree.
-#
-# For more information on this file, please read the config(5) manual page,
-# and/or the handbook section on Kernel Configuration Files:
-#
-# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-
-include GENERIC
-include "std.nodebug"
-
-ident GENERIC-NODEBUG
diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64
index a0568466cfaf..58f3748e2700 100644
--- a/sys/arm64/conf/std.arm64
+++ b/sys/arm64/conf/std.arm64
@@ -74,8 +74,6 @@ options PERTHREAD_SSP # Per-thread SSP canary
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# Kernel Sanitizers
#options COVERAGE # Generic kernel coverage. Used by KCOV
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
index f07b67d18abf..07a783138f42 100644
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -193,9 +193,6 @@
(((mask) & PCPU_GET(midr)) == \
((mask) & CPU_ID_RAW((impl), (part), (var), (rev))))
-#define CPU_MATCH_RAW(mask, devid) \
- (((mask) & PCPU_GET(midr)) == ((mask) & (devid)))
-
#if !defined(__ASSEMBLER__)
static inline bool
midr_check_var_part_range(u_int midr, u_int impl, u_int part, u_int var_low,
diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h
index 357c1a0d8232..406b6e2c5e0a 100644
--- a/sys/arm64/include/pmap.h
+++ b/sys/arm64/include/pmap.h
@@ -175,6 +175,8 @@ int pmap_fault(pmap_t, uint64_t, uint64_t);
struct pcb *pmap_switch(struct thread *);
+void pmap_s1_invalidate_all_kernel(void);
+
extern void (*pmap_clean_stage2_tlbi)(void);
extern void (*pmap_stage2_invalidate_range)(uint64_t, vm_offset_t, vm_offset_t,
bool);
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
index 90cd21a80923..54063150eef9 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -515,4 +515,6 @@
#define FREEBSD32_SYS_inotify_rm_watch 594
#define FREEBSD32_SYS_getgroups 595
#define FREEBSD32_SYS_setgroups 596
-#define FREEBSD32_SYS_MAXSYSCALL 597
+#define FREEBSD32_SYS_jail_attach_jd 597
+#define FREEBSD32_SYS_jail_remove_jd 598
+#define FREEBSD32_SYS_MAXSYSCALL 599
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
index f0f8d26554b5..f7cc4c284e4d 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
+ "jail_attach_jd", /* 597 = jail_attach_jd */
+ "jail_remove_jd", /* 598 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
index 12f1a346c3e9..18f809ef04e3 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
index e471c5148021..29a5497e9efa 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 597: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 598: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* jail_attach_jd */
+ case 597:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 598:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 597:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 598:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/conf/files b/sys/conf/files
index d89813c70355..9661bafea8f9 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3808,6 +3808,7 @@ kern/kern_hhook.c standard
kern/kern_idle.c standard
kern/kern_intr.c standard
kern/kern_jail.c standard
+kern/kern_jaildesc.c standard
kern/kern_jailmeta.c standard
kern/kern_kcov.c optional kcov \
compile-with "${NOSAN_C} ${MSAN_CFLAGS}"
diff --git a/sys/dev/cyapa/cyapa.c b/sys/dev/cyapa/cyapa.c
index 50fa4faa560a..ed755f992949 100644
--- a/sys/dev/cyapa/cyapa.c
+++ b/sys/dev/cyapa/cyapa.c
@@ -761,42 +761,60 @@ again:
/*
* Generate report
*/
- c0 = 0;
- if (delta_x < 0)
- c0 |= 0x10;
- if (delta_y < 0)
- c0 |= 0x20;
- c0 |= 0x08;
- if (but & CYAPA_FNGR_LEFT)
- c0 |= 0x01;
- if (but & CYAPA_FNGR_MIDDLE)
- c0 |= 0x04;
- if (but & CYAPA_FNGR_RIGHT)
- c0 |= 0x02;
-
- fifo_write_char(sc, &sc->rfifo, c0);
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_x);
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_y);
- switch(sc->zenabled) {
- case 1:
- /* Z axis all 8 bits */
- fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_z);
- break;
- case 2:
- /*
- * Z axis low 4 bits + 4th button and 5th button
- * (high 2 bits must be left 0). Auto-scale
- * delta_z to fit to avoid a wrong-direction
- * overflow (don't try to retain the remainder).
- */
- while (delta_z > 7 || delta_z < -8)
- delta_z >>= 1;
- c0 = (uint8_t)delta_z & 0x0F;
+ if (sc->mode.level == 1) {
+ c0 = MOUSE_SYS_SYNC;
+ if (but & CYAPA_FNGR_LEFT)
+ c0 |= MOUSE_SYS_BUTTON1UP;
+ if (but & CYAPA_FNGR_MIDDLE)
+ c0 |= MOUSE_SYS_BUTTON2UP;
+ if (but & CYAPA_FNGR_RIGHT)
+ c0 |= MOUSE_SYS_BUTTON3UP;
fifo_write_char(sc, &sc->rfifo, c0);
- break;
- default:
- /* basic PS/2 */
- break;
+ fifo_write_char(sc, &sc->rfifo, delta_x >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_y >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_x - (delta_x >> 1));
+ fifo_write_char(sc, &sc->rfifo, delta_y - (delta_y >> 1));
+ fifo_write_char(sc, &sc->rfifo, delta_z >> 1);
+ fifo_write_char(sc, &sc->rfifo, delta_z - (delta_z >> 1));
+ fifo_write_char(sc, &sc->rfifo, MOUSE_SYS_EXTBUTTONS);
+ } else {
+ c0 = 0;
+ if (delta_x < 0)
+ c0 |= 0x10;
+ if (delta_y < 0)
+ c0 |= 0x20;
+ c0 |= 0x08;
+ if (but & CYAPA_FNGR_LEFT)
+ c0 |= 0x01;
+ if (but & CYAPA_FNGR_MIDDLE)
+ c0 |= 0x04;
+ if (but & CYAPA_FNGR_RIGHT)
+ c0 |= 0x02;
+
+ fifo_write_char(sc, &sc->rfifo, c0);
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_x);
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_y);
+ switch(sc->zenabled) {
+ case 1:
+ /* Z axis all 8 bits */
+ fifo_write_char(sc, &sc->rfifo, (uint8_t)delta_z);
+ break;
+ case 2:
+ /*
+ * Z axis low 4 bits + 4th button and 5th button
+ * (high 2 bits must be left 0). Auto-scale
+ * delta_z to fit to avoid a wrong-direction
+ * overflow (don't try to retain the remainder).
+ */
+ while (delta_z > 7 || delta_z < -8)
+ delta_z >>= 1;
+ c0 = (uint8_t)delta_z & 0x0F;
+ fifo_write_char(sc, &sc->rfifo, c0);
+ break;
+ default:
+ /* basic PS/2 */
+ break;
+ }
}
cyapa_notify(sc);
}
@@ -1205,6 +1223,11 @@ cyapaioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread
((mousemode_t *)data)->packetsize =
MOUSE_PS2_PACKETSIZE;
break;
+ case 1:
+ ((mousemode_t *)data)->protocol = MOUSE_PROTO_SYSMOUSE;
+ ((mousemode_t *)data)->packetsize =
+ MOUSE_SYS_PACKETSIZE;
+ break;
case 2:
((mousemode_t *)data)->protocol = MOUSE_PROTO_PS2;
((mousemode_t *)data)->packetsize =
@@ -1223,7 +1246,7 @@ cyapaioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread
error = EINVAL;
break;
}
- sc->mode.level = *(int *)data ? 2 : 0;
+ sc->mode.level = *(int *)data;
sc->zenabled = sc->mode.level ? 1 : 0;
break;
diff --git a/sys/dev/sound/pci/hda/hdac.c b/sys/dev/sound/pci/hda/hdac.c
index 900578b73de4..90cd74d28b3d 100644
--- a/sys/dev/sound/pci/hda/hdac.c
+++ b/sys/dev/sound/pci/hda/hdac.c
@@ -1773,17 +1773,17 @@ hdac_detach(device_t dev)
struct hdac_softc *sc = device_get_softc(dev);
int i, error;
+ callout_drain(&sc->poll_callout);
+ hdac_irq_free(sc);
+ taskqueue_drain(taskqueue_thread, &sc->unsolq_task);
+
error = bus_generic_detach(dev);
if (error != 0)
return (error);
hdac_lock(sc);
- callout_stop(&sc->poll_callout);
hdac_reset(sc, false);
hdac_unlock(sc);
- callout_drain(&sc->poll_callout);
- taskqueue_drain(taskqueue_thread, &sc->unsolq_task);
- hdac_irq_free(sc);
for (i = 0; i < sc->num_ss; i++)
hdac_dma_free(sc, &sc->streams[i].bdl);
@@ -2206,4 +2206,4 @@ static driver_t hdac_driver = {
sizeof(struct hdac_softc),
};
-DRIVER_MODULE(snd_hda, pci, hdac_driver, NULL, NULL);
+DRIVER_MODULE_ORDERED(snd_hda, pci, hdac_driver, NULL, NULL, SI_ORDER_ANY);
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 4f19af6281a3..73f27ac147ff 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -1178,6 +1178,7 @@ vtnet_setup_interface(struct vtnet_softc *sc)
if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
+ if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
/*
* Capabilities after here are not enabled by default.
@@ -3036,16 +3037,14 @@ vtnet_get_counter(if_t ifp, ift_counter cnt)
return (rxaccum.vrxs_iqdrops);
case IFCOUNTER_IERRORS:
return (rxaccum.vrxs_ierrors);
+ case IFCOUNTER_IBYTES:
+ return (rxaccum.vrxs_ibytes);
case IFCOUNTER_OPACKETS:
return (txaccum.vtxs_opackets);
case IFCOUNTER_OBYTES:
- if (!VTNET_ALTQ_ENABLED)
- return (txaccum.vtxs_obytes);
- /* FALLTHROUGH */
+ return (txaccum.vtxs_obytes);
case IFCOUNTER_OMCASTS:
- if (!VTNET_ALTQ_ENABLED)
- return (txaccum.vtxs_omcasts);
- /* FALLTHROUGH */
+ return (txaccum.vtxs_omcasts);
default:
return (if_get_counter_default(ifp, cnt));
}
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
index 88b8967cd693..f426c3d11874 100644
--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@@ -89,8 +89,6 @@ options RCTL # Resource limits
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# Kernel dump features.
options EKCD # Support for encrypted kernel dumps
diff --git a/sys/i386/conf/GENERIC-NODEBUG b/sys/i386/conf/GENERIC-NODEBUG
deleted file mode 100644
index a93304481b5f..000000000000
--- a/sys/i386/conf/GENERIC-NODEBUG
+++ /dev/null
@@ -1,33 +0,0 @@
-#
-# GENERIC-NODEBUG -- WITNESS and INVARIANTS free kernel configuration file
-# for FreeBSD/i386
-#
-# This configuration file removes several debugging options, including
-# WITNESS and INVARIANTS checking, which are known to have significant
-# performance impact on running systems. When benchmarking new features
-# this kernel should be used instead of the standard GENERIC.
-# This kernel configuration should never appear outside of the HEAD
-# of the FreeBSD tree.
-#
-# For more information on this file, please read the config(5) manual page,
-# and/or the handbook section on Kernel Configuration Files:
-#
-# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-
-#NO_UNIVERSE
-
-include GENERIC
-include "std.nodebug"
-
-ident GENERIC-NODEBUG
diff --git a/sys/i386/conf/MINIMAL b/sys/i386/conf/MINIMAL
index 8019617ca4d4..6b70c4e59825 100644
--- a/sys/i386/conf/MINIMAL
+++ b/sys/i386/conf/MINIMAL
@@ -83,8 +83,6 @@ options INCLUDE_CONFIG_FILE # Include this file in kernel
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel
diff --git a/sys/i386/conf/MINIMAL-NODEBUG b/sys/i386/conf/MINIMAL-NODEBUG
deleted file mode 100644
index 7b7c22bbcaf6..000000000000
--- a/sys/i386/conf/MINIMAL-NODEBUG
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# MINIMAL-NODEBUG -- Non-debug MINIMAL kernel.
-#
-# This is the MINIMAL equivalent to GENERIC-NODEBUG.
-
-#NO_UNIVERSE
-
-include MINIMAL
-include "std.nodebug"
-
-ident MINIMAL-NODEBUG
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index fcd232cde21e..e42e7dcf8b44 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -663,4 +663,6 @@ struct sysent sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index a27ab33b34da..057235574eb5 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -5250,6 +5250,8 @@ file_type_to_name(short type)
return ("eventfd");
case DTYPE_TIMERFD:
return ("timerfd");
+ case DTYPE_JAILDESC:
+ return ("jail");
default:
return ("unkn");
}
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index eb77a5064113..501adc151d44 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -50,6 +50,7 @@
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/fcntl.h>
+#include <sys/jail.h>
#include <sys/kthread.h>
#include <sys/selinfo.h>
#include <sys/queue.h>
@@ -163,6 +164,9 @@ static int filt_kqueue(struct knote *kn, long hint);
static int filt_procattach(struct knote *kn);
static void filt_procdetach(struct knote *kn);
static int filt_proc(struct knote *kn, long hint);
+static int filt_jailattach(struct knote *kn);
+static void filt_jaildetach(struct knote *kn);
+static int filt_jail(struct knote *kn, long hint);
static int filt_fileattach(struct knote *kn);
static void filt_timerexpire(void *knx);
static void filt_timerexpire_l(struct knote *kn, bool proc_locked);
@@ -195,6 +199,12 @@ static const struct filterops proc_filtops = {
.f_detach = filt_procdetach,
.f_event = filt_proc,
};
+static const struct filterops jail_filtops = {
+ .f_isfd = 0,
+ .f_attach = filt_jailattach,
+ .f_detach = filt_jaildetach,
+ .f_event = filt_jail,
+};
static const struct filterops timer_filtops = {
.f_isfd = 0,
.f_attach = filt_timerattach,
@@ -365,6 +375,7 @@ static struct {
[~EVFILT_USER] = { &user_filtops, 1 },
[~EVFILT_SENDFILE] = { &null_filtops },
[~EVFILT_EMPTY] = { &file_filtops, 1 },
+ [~EVFILT_JAIL] = { &jail_filtops, 1 },
};
/*
@@ -528,7 +539,8 @@ filt_proc(struct knote *kn, long hint)
* process forked. Additionally, for each knote attached to the
* parent, check whether user wants to track the new process. If so
* attach a new knote to it, and immediately report an event with the
- * child's pid.
+ * child's pid. This is also called on jail creation, which is treated
+ * the same way by jail events.
*/
void
knote_fork(struct knlist *list, int pid)
@@ -555,6 +567,8 @@ knote_fork(struct knlist *list, int pid)
/*
* The same as knote(), activate the event.
*/
+ _Static_assert(NOTE_JAIL_CHILD == NOTE_FORK,
+ "NOTE_JAIL_CHILD should be the same as NOTE_FORK");
if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
if (kn->kn_fop->f_event(kn, NOTE_FORK))
KNOTE_ACTIVATE(kn, 1);
@@ -614,6 +628,124 @@ knote_fork(struct knlist *list, int pid)
}
}
+int
+filt_jailattach(struct knote *kn)
+{
+ struct prison *pr;
+ bool immediate;
+
+ immediate = false;
+ if (kn->kn_id == 0) {
+ /* Let jid=0 watch the current prison (including prison0). */
+ pr = curthread->td_ucred->cr_prison;
+ mtx_lock(&pr->pr_mtx);
+ } else if (kn->kn_flags & (EV_FLAG1 | EV_FLAG2)) {
+ /*
+ * The kernel registers prisons before they are valid,
+ * so prison_find_child will fail.
+ */
+ TAILQ_FOREACH(pr, &allprison, pr_list) {
+ if (pr->pr_id < kn->kn_id)
+ continue;
+ if (pr->pr_id > kn->kn_id) {
+ pr = NULL;
+ break;
+ }
+ mtx_lock(&pr->pr_mtx);
+ break;
+ }
+ if (pr == NULL)
+ return (ENOENT);
+ } else {
+ sx_slock(&allprison_lock);
+ pr = prison_find_child(curthread->td_ucred->cr_prison,
+ kn->kn_id);
+ sx_sunlock(&allprison_lock);
+ if (pr == NULL)
+ return (ENOENT);
+ if (!prison_isalive(pr)) {
+ mtx_unlock(&pr->pr_mtx);
+ return (ENOENT);
+ }
+ }
+ kn->kn_ptr.p_prison = pr;
+ kn->kn_flags |= EV_CLEAR;
+
+ /*
+ * Internal flag indicating registration done by kernel for the
+ * purposes of getting a NOTE_CHILD notification.
+ */
+ if (kn->kn_flags & EV_FLAG2) {
+ kn->kn_flags &= ~EV_FLAG2;
+ kn->kn_data = kn->kn_sdata; /* parent id */
+ kn->kn_fflags = NOTE_CHILD;
+ kn->kn_sfflags &= ~NOTE_JAIL_CTRLMASK;
+ immediate = true; /* Force immediate activation of child note. */
+ }
+ /*
+ * Internal flag indicating registration done by kernel (for other than
+ * NOTE_CHILD).
+ */
+ if (kn->kn_flags & EV_FLAG1) {
+ kn->kn_flags &= ~EV_FLAG1;
+ }
+
+ knlist_add(pr->pr_klist, kn, 1);
+
+ /* Immediately activate any child notes. */
+ if (immediate)
+ KNOTE_ACTIVATE(kn, 0);
+
+ mtx_unlock(&pr->pr_mtx);
+ return (0);
+}
+
+void
+filt_jaildetach(struct knote *kn)
+{
+ if (kn->kn_ptr.p_prison != NULL) {
+ knlist_remove(kn->kn_knlist, kn, 0);
+ kn->kn_ptr.p_prison = NULL;
+ } else
+ kn->kn_status |= KN_DETACHED;
+}
+
+int
+filt_jail(struct knote *kn, long hint)
+{
+ struct prison *pr;
+ u_int event;
+
+ pr = kn->kn_ptr.p_prison;
+ if (pr == NULL) /* already activated, from attach filter */
+ return (0);
+
+ /* Mask off extra data. */
+ event = (u_int)hint & NOTE_JAIL_CTRLMASK;
+
+ /* If the user is interested in this event, record it. */
+ if (kn->kn_sfflags & event)
+ kn->kn_fflags |= event;
+
+ /* Report the attached process id. */
+ if (event == NOTE_JAIL_ATTACH) {
+ if (kn->kn_data != 0)
+ kn->kn_fflags |= NOTE_JAIL_ATTACH_MULTI;
+ kn->kn_data = hint & NOTE_JAIL_DATAMASK;
+ }
+
+ /* Prison is gone, so flag the event as finished. */
+ if (event == NOTE_JAIL_REMOVE) {
+ kn->kn_flags |= EV_EOF | EV_ONESHOT;
+ kn->kn_ptr.p_prison = NULL;
+ if (kn->kn_fflags == 0)
+ kn->kn_flags |= EV_DROP;
+ return (1);
+ }
+
+ return (kn->kn_fflags != 0);
+}
+
/*
* XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
* interval timer support code.
@@ -1597,8 +1729,8 @@ findkn:
/*
* If possible, find an existing knote to use for this kevent.
*/
- if (kev->filter == EVFILT_PROC &&
- (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
+ if ((kev->filter == EVFILT_PROC || kev->filter == EVFILT_JAIL)
+ && (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
/* This is an internal creation of a process tracking
* note. Don't attempt to coalesce this with an
* existing note.
@@ -2800,6 +2932,7 @@ knote_init(void)
knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, 0);
ast_register(TDA_KQUEUE, ASTR_ASTF_REQUIRED, 0, ast_kqueue);
+ prison0.pr_klist = knlist_alloc(&prison0.pr_mtx);
}
SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 7c9a15ae18f3..5a1fbe23ddeb 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -39,15 +39,18 @@
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/errno.h>
+#include <sys/file.h>
#include <sys/sysproto.h>
#include <sys/malloc.h>
#include <sys/osd.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/epoch.h>
+#include <sys/event.h>
#include <sys/taskqueue.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
+#include <sys/jaildesc.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/mman.h>
@@ -154,7 +157,8 @@ static void prison_complete(void *context, int pending);
static void prison_deref(struct prison *pr, int flags);
static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison);
static int prison_lock_xlock(struct prison *pr, int flags);
-static void prison_cleanup(struct prison *pr);
+static void prison_cleanup_locked(struct prison *pr);
+static void prison_cleanup_unlocked(struct prison *pr);
static void prison_free_not_last(struct prison *pr);
static void prison_proc_free_not_last(struct prison *pr);
static void prison_proc_relink(struct prison *opr, struct prison *npr,
@@ -167,6 +171,7 @@ static void prison_racct_attach(struct prison *pr);
static void prison_racct_modify(struct prison *pr);
static void prison_racct_detach(struct prison *pr);
#endif
+static void prison_knote(struct prison *pr, long hint);
/* Flags for prison_deref */
#define PD_DEREF 0x01 /* Decrement pr_ref */
@@ -985,6 +990,8 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af)
int
kern_jail_set(struct thread *td, struct uio *optuio, int flags)
{
+ struct file *jfp_out;
+ struct jaildesc *desc_in;
struct nameidata nd;
#ifdef INET
struct prison_ip *ip4;
@@ -995,6 +1002,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
struct vfsopt *opt;
struct vfsoptlist *opts;
struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr;
+ struct ucred *jdcred;
struct vnode *root;
char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
char *g_path, *osrelstr;
@@ -1008,7 +1016,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
int created, cuflags, descend, drflags, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
- int deadid, jid, jsys, len, level;
+ int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
#ifdef INET
int ip4s;
@@ -1018,22 +1026,32 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
int ip6s;
bool redo_ip6;
#endif
+ bool maybe_changed;
uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
uint64_t pr_allow_diff;
unsigned tallow;
char numbuf[12];
- error = priv_check(td, PRIV_JAIL_SET);
- if (!error && (flags & JAIL_ATTACH))
- error = priv_check(td, PRIV_JAIL_ATTACH);
- if (error)
- return (error);
mypr = td->td_ucred->cr_prison;
- if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
+ if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE)
+ && mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
+ == (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
+ prison_hold(mypr);
+#ifdef INET
+ ip4 = NULL;
+#endif
+#ifdef INET6
+ ip6 = NULL;
+#endif
+ g_path = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
* Check all the parameters before committing to anything. Not all
* errors can be caught early, but we may as well try. Also, this
@@ -1046,14 +1064,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
*/
error = vfs_buildopts(optuio, &opts);
if (error)
- return (error);
-#ifdef INET
- ip4 = NULL;
-#endif
-#ifdef INET6
- ip6 = NULL;
-#endif
- g_path = NULL;
+ goto done_free;
cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
if (!cuflags) {
@@ -1062,6 +1073,72 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_errmsg;
}
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done_errmsg;
+ }
+ jfd_in = -1;
+ } else if (error != 0)
+ goto done_free;
+ else {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done_errmsg;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /*
+ * Look up and create jails based on the
+ * descriptor's prison.
+ */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
+ NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done_errmsg;
+ }
+ /*
+ * Check file permissions using the current
+ * credentials, and operation permissions
+ * using the descriptor's credentials.
+ */
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VEXEC, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error != 0)
+ goto done_free;
+ if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) {
+ error = EPERM;
+ goto done_free;
+ }
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done_free;
+ }
+ }
+
+ /*
+ * Delay the permission check if using a jail descriptor,
+ * until we get the descriptor's credentials.
+ */
+ if (!(flags & JAIL_USE_DESC)) {
+ error = priv_check(td, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check(td, PRIV_JAIL_ATTACH);
+ if (error)
+ goto done_free;
+ }
+
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@@ -1422,6 +1499,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
pr = NULL;
inspr = NULL;
deadpr = NULL;
+ maybe_changed = false;
if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
namelc = strrchr(name, '.');
jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
@@ -1436,7 +1514,57 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = EAGAIN;
goto done_deref;
}
- if (jid != 0) {
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &desc_in, &pr, &jdcred);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done_deref;
+ }
+ drflags |= PD_DEREF;
+ /*
+ * Check file permissions using the current credentials,
+ * and operation permissions using the descriptor's
+ * credentials.
+ */
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VWRITE, td->td_ucred);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VEXEC, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error == 0)
+ error = priv_check_cred(jdcred, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto done_deref;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (cuflags == JAIL_CREATE) {
+ error = EEXIST;
+ vfs_opterror(opts, "jail %d already exists",
+ pr->pr_id);
+ goto done_deref;
+ }
+ if (!prison_isalive(pr)) {
+ /* While a jid can be resurrected, the prison
+ * itself cannot.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying", pr->pr_id);
+ goto done_deref;
+ }
+ if (jid != 0 && jid != pr->pr_id) {
+ error = EINVAL;
+ vfs_opterror(opts, "cannot change jid");
+ goto done_deref;
+ }
+ jid = pr->pr_id;
+ } else if (jid != 0) {
if (jid < 0) {
error = EINVAL;
vfs_opterror(opts, "negative jid");
@@ -1570,7 +1698,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
}
- /* Update: must provide a jid or name. */
+ /* Update: must provide a desc, jid, or name. */
else if (cuflags == JAIL_UPDATE && pr == NULL) {
error = ENOENT;
vfs_opterror(opts, "update specified no jail");
@@ -1643,6 +1771,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
tpr->pr_childcount++;
+ pr->pr_klist = knlist_alloc(&pr->pr_mtx);
/* Set some default values, and inherit some from the parent. */
if (namelc == NULL)
@@ -1722,8 +1851,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* Grab a reference for existing prisons, to ensure they
* continue to exist for the duration of the call.
*/
- prison_hold(pr);
- drflags |= PD_DEREF;
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
if ((pr->pr_flags & PR_VNET) &&
(ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
@@ -1880,6 +2011,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_deref;
}
}
+ maybe_changed = true;
/* Set the parameters of the prison. */
#ifdef INET
@@ -2112,7 +2244,12 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* reference via persistence, or is about to gain one via attachment.
*/
if (created) {
- drflags = prison_lock_xlock(pr, drflags);
+ sx_assert(&allprison_lock, SX_XLOCKED);
+ mtx_lock(&ppr->pr_mtx);
+ knote_fork(ppr->pr_klist, pr->pr_id);
+ mtx_unlock(&ppr->pr_mtx);
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
pr->pr_state = PRISON_STATE_ALIVE;
}
@@ -2146,10 +2283,37 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
printf("Warning jail jid=%d: mountd/nfsd requires a separate"
" file system\n", pr->pr_id);
+ /*
+ * Now that the prison is fully created without error, set the
+ * jail descriptor if one was requested. This is the only
+ * parameter that is returned to the caller (except the error
+ * message).
+ */
+ if (jfd_out >= 0) {
+ if (!(drflags & PD_LOCKED)) {
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ }
+ jfd_pos = 2 * vfs_getopt_pos(opts, "desc") + 1;
+ if (optuio->uio_segflg == UIO_SYSSPACE)
+ *(int*)optuio->uio_iov[jfd_pos].iov_base = jfd_out;
+ else
+ (void)copyout(&jfd_out,
+ optuio->uio_iov[jfd_pos].iov_base, sizeof(jfd_out));
+ jaildesc_set_prison(jfp_out, pr);
+ }
+
drflags &= ~PD_KILL;
td->td_retval[0] = pr->pr_id;
done_deref:
+ /*
+ * Report changes to kevent. This can happen even if the
+ * system call fails, as changes might have been made before
+ * the failure.
+ */
+ if (maybe_changed && !created)
+ prison_knote(pr, NOTE_JAIL_SET);
/* Release any temporary prison holds and/or locks. */
if (pr != NULL)
prison_deref(pr, drflags);
@@ -2176,15 +2340,21 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
done_free:
+ /* Clean up other resources. */
#ifdef INET
prison_ip_free(ip4);
#endif
#ifdef INET6
prison_ip_free(ip6);
#endif
+ if (jfp_out != NULL)
+ fdrop(jfp_out, td);
+ if (error && jfd_out >= 0)
+ (void)kern_close(td, jfd_out);
if (g_path != NULL)
free(g_path, M_TEMP);
vfs_freeopts(opts);
+ prison_free(mypr);
return (error);
}
@@ -2329,16 +2499,22 @@ int
kern_jail_get(struct thread *td, struct uio *optuio, int flags)
{
struct bool_flags *bf;
+ struct file *jfp_out;
+ struct jaildesc *desc_in;
struct jailsys_flags *jsf;
struct prison *pr, *mypr;
struct vfsopt *opt;
struct vfsoptlist *opts;
char *errmsg, *name;
int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos;
+ int jfd_in, jfd_out;
unsigned f;
if (flags & ~JAIL_GET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
+ == (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
/* Get the parameter list. */
error = vfs_buildopts(optuio, &opts);
@@ -2346,13 +2522,81 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
return (error);
errmsg_pos = vfs_getopt_pos(opts, "errmsg");
mypr = td->td_ucred->cr_prison;
+ prison_hold(mypr);
pr = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
- * Find the prison specified by one of: lastjid, jid, name.
+ * Find the prison specified by one of: desc, lastjid, jid, name.
*/
sx_slock(&allprison_lock);
drflags = PD_LIST_SLOCKED;
+
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done;
+ }
+ } else if (error == 0) {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done;
+ }
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &desc_in, &pr, NULL);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done;
+ }
+ drflags |= PD_DEREF;
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VREAD, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error != 0)
+ goto done;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying",
+ pr->pr_id);
+ goto done;
+ }
+ goto found_prison;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /* Look up jails based on the descriptor's prison. */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
+ NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done;
+ }
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VEXEC, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error != 0)
+ goto done;
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done;
+ }
+ } else
+ goto done;
+
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
@@ -2421,9 +2665,17 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
found_prison:
/* Get the parameters of the prison. */
- prison_hold(pr);
- drflags |= PD_DEREF;
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
td->td_retval[0] = pr->pr_id;
+ if (jfd_out >= 0) {
+ error = vfs_setopt(opts, "desc", &jfd_out, sizeof(jfd_out));
+ if (error != 0 && error != ENOENT)
+ goto done;
+ jaildesc_set_prison(jfp_out, pr);
+ }
error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
if (error != 0 && error != ENOENT)
goto done;
@@ -2603,6 +2855,13 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
prison_deref(pr, drflags);
else if (drflags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
+ else if (drflags & PD_LIST_XLOCKED)
+ sx_xunlock(&allprison_lock);
+ /* Clean up other resources. */
+ if (jfp_out != NULL)
+ (void)fdrop(jfp_out, td);
+ if (error && jfd_out >= 0)
+ (void)kern_close(td, jfd_out);
if (error && errmsg_pos >= 0) {
/* Write the error message back to userspace. */
vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
@@ -2619,6 +2878,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
}
}
vfs_freeopts(opts);
+ prison_free(mypr);
return (error);
}
@@ -2643,14 +2903,63 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
sx_xunlock(&allprison_lock);
return (EINVAL);
}
+ prison_hold(pr);
+ prison_remove(pr);
+ return (0);
+}
+
+/*
+ * struct jail_remove_jd_args {
+ * int fd;
+ * };
+ */
+int
+sys_jail_remove_jd(struct thread *td, struct jail_remove_jd_args *uap)
+{
+ struct jaildesc *jd;
+ struct prison *pr;
+ struct ucred *jdcred;
+ int error;
+
+ error = jaildesc_find(td, uap->fd, &jd, &pr, &jdcred);
+ if (error)
+ return (error);
+ /*
+ * Check file permissions using the current credentials, and
+ * operation permissions using the descriptor's credentials.
+ */
+ error = vaccess(VREG, jd->jd_mode, jd->jd_uid, jd->jd_gid, VWRITE,
+ td->td_ucred);
+ JAILDESC_UNLOCK(jd);
+ if (error == 0)
+ error = priv_check_cred(jdcred, PRIV_JAIL_REMOVE);
+ crfree(jdcred);
+ if (error) {
+ prison_free(pr);
+ return (error);
+ }
+ sx_xlock(&allprison_lock);
+ mtx_lock(&pr->pr_mtx);
+ prison_remove(pr);
+ return (0);
+}
+
+/*
+ * Begin the removal process for a prison. The allprison lock should
+ * be held exclusively, and the prison should be both locked and held.
+ */
+void
+prison_remove(struct prison *pr)
+{
+ sx_assert(&allprison_lock, SA_XLOCKED);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
if (!prison_isalive(pr)) {
/* Silently ignore already-dying prisons. */
mtx_unlock(&pr->pr_mtx);
sx_xunlock(&allprison_lock);
- return (0);
+ return;
}
- prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
- return (0);
+ prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
}
/*
@@ -2685,6 +2994,53 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
}
+/*
+ * struct jail_attach_jd_args {
+ * int fd;
+ * };
+ */
+int
+sys_jail_attach_jd(struct thread *td, struct jail_attach_jd_args *uap)
+{
+ struct jaildesc *jd;
+ struct prison *pr;
+ struct ucred *jdcred;
+ int drflags, error;
+
+ sx_slock(&allprison_lock);
+ drflags = PD_LIST_SLOCKED;
+ error = jaildesc_find(td, uap->fd, &jd, &pr, &jdcred);
+ if (error)
+ goto fail;
+ drflags |= PD_DEREF;
+ /*
+ * Check file permissions using the current credentials, and
+ * operation permissions using the descriptor's credentials.
+ */
+ error = vaccess(VREG, jd->jd_mode, jd->jd_uid, jd->jd_gid, VEXEC,
+ td->td_ucred);
+ JAILDESC_UNLOCK(jd);
+ if (error == 0)
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto fail;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+
+ /* Do not allow a process to attach to a prison that is not alive. */
+ if (!prison_isalive(pr)) {
+ error = EINVAL;
+ goto fail;
+ }
+
+ return (do_jail_attach(td, pr, drflags));
+
+ fail:
+ prison_deref(pr, drflags);
+ return (error);
+}
+
static int
do_jail_attach(struct thread *td, struct prison *pr, int drflags)
{
@@ -2703,9 +3059,12 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
* a process root from one prison, but attached to the jail
* of another.
*/
- prison_hold(pr);
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
refcount_acquire(&pr->pr_uref);
- drflags |= PD_DEREF | PD_DEUREF;
+ drflags |= PD_DEUREF;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
@@ -2755,6 +3114,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
prison_proc_relink(oldcred->cr_prison, pr, p);
prison_deref(oldcred->cr_prison, drflags);
crfree(oldcred);
+ prison_knote(pr, NOTE_JAIL_ATTACH | td->td_proc->p_pid);
/*
* If the prison was killed while changing credentials, die along
@@ -3182,9 +3542,10 @@ prison_deref(struct prison *pr, int flags)
refcount_load(&prison0.pr_uref) > 0,
("prison0 pr_uref=0"));
pr->pr_state = PRISON_STATE_DYING;
+ prison_cleanup_locked(pr);
mtx_unlock(&pr->pr_mtx);
flags &= ~PD_LOCKED;
- prison_cleanup(pr);
+ prison_cleanup_unlocked(pr);
}
}
}
@@ -3327,8 +3688,9 @@ prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
}
if (!(cpr->pr_flags & PR_REMOVE))
continue;
- prison_cleanup(cpr);
+ prison_cleanup_unlocked(cpr);
mtx_lock(&cpr->pr_mtx);
+ prison_cleanup_locked(cpr);
cpr->pr_flags &= ~PR_REMOVE;
if (cpr->pr_flags & PR_PERSIST) {
cpr->pr_flags &= ~PR_PERSIST;
@@ -3363,8 +3725,9 @@ prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
if (rpr != NULL)
LIST_REMOVE(rpr, pr_sibling);
- prison_cleanup(pr);
+ prison_cleanup_unlocked(pr);
mtx_lock(&pr->pr_mtx);
+ prison_cleanup_locked(pr);
if (pr->pr_flags & PR_PERSIST) {
pr->pr_flags &= ~PR_PERSIST;
prison_proc_free_not_last(pr);
@@ -3411,10 +3774,22 @@ prison_lock_xlock(struct prison *pr, int flags)
/*
* Release a prison's resources when it starts dying (when the last user
- * reference is dropped, or when it is killed).
+ * reference is dropped, or when it is killed). Two functions are called,
+ * for work that requires a locked prison or an unlocked one.
*/
static void
-prison_cleanup(struct prison *pr)
+prison_cleanup_locked(struct prison *pr)
+{
+ sx_assert(&allprison_lock, SA_XLOCKED);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ prison_knote(pr, NOTE_JAIL_REMOVE);
+ knlist_detach(pr->pr_klist);
+ jaildesc_prison_cleanup(pr);
+ pr->pr_klist = NULL;
+}
+
+static void
+prison_cleanup_unlocked(struct prison *pr)
{
sx_assert(&allprison_lock, SA_XLOCKED);
mtx_assert(&pr->pr_mtx, MA_NOTOWNED);
@@ -4616,6 +4991,7 @@ sysctl_jail_param(SYSCTL_HANDLER_ARGS)
* jail creation time but cannot be changed in an existing jail.
*/
SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
+SYSCTL_JAIL_PARAM(, desc, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail descriptor");
SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
@@ -5039,6 +5415,22 @@ prison_racct_detach(struct prison *pr)
}
#endif /* RACCT */
+/*
+ * Submit a knote for a prison, locking if necessary.
+ */
+static void
+prison_knote(struct prison *pr, long hint)
+{
+ int locked;
+
+ locked = mtx_owned(&pr->pr_mtx);
+ if (!locked)
+ mtx_lock(&pr->pr_mtx);
+ KNOTE_LOCKED(pr->pr_klist, hint);
+ if (!locked)
+ mtx_unlock(&pr->pr_mtx);
+}
+
#ifdef DDB
static void
diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c
new file mode 100644
index 000000000000..e00ec9a4bfff
--- /dev/null
+++ b/sys/kern/kern_jaildesc.c
@@ -0,0 +1,337 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 James Gritton.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/jaildesc.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/stat.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/vnode.h>
+
+MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
+
+static fo_stat_t jaildesc_stat;
+static fo_close_t jaildesc_close;
+static fo_chmod_t jaildesc_chmod;
+static fo_chown_t jaildesc_chown;
+static fo_fill_kinfo_t jaildesc_fill_kinfo;
+static fo_cmp_t jaildesc_cmp;
+
+static struct fileops jaildesc_ops = {
+ .fo_read = invfo_rdwr,
+ .fo_write = invfo_rdwr,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = invfo_ioctl,
+ .fo_poll = invfo_poll,
+ .fo_kqfilter = invfo_kqfilter,
+ .fo_stat = jaildesc_stat,
+ .fo_close = jaildesc_close,
+ .fo_chmod = jaildesc_chmod,
+ .fo_chown = jaildesc_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = jaildesc_fill_kinfo,
+ .fo_cmp = jaildesc_cmp,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
+/*
+ * Given a jail descriptor number, return the jaildesc, its prison,
+ * and its credential. The jaildesc will be returned locked, and
+ * prison and the credential will be returned held.
+ */
+int
+jaildesc_find(struct thread *td, int fd, struct jaildesc **jdp,
+ struct prison **prp, struct ucred **ucredp)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ struct prison *pr;
+ int error;
+
+ error = fget(td, fd, &cap_no_rights, &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_JAILDESC) {
+ error = EBADF;
+ goto out;
+ }
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr == NULL || !prison_isvalid(pr)) {
+ error = ENOENT;
+ JAILDESC_UNLOCK(jd);
+ goto out;
+ }
+ prison_hold(pr);
+ *prp = pr;
+ if (jdp != NULL)
+ *jdp = jd;
+ else
+ JAILDESC_UNLOCK(jd);
+ if (ucredp != NULL)
+ *ucredp = crhold(fp->f_cred);
+ out:
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Allocate a new jail decriptor, not yet associated with a prison.
+ * Return the file pointer (with a reference held) and the descriptor
+ * number.
+ */
+int
+jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ int error;
+ mode_t mode;
+
+ if (owning) {
+ error = priv_check(td, PRIV_JAIL_REMOVE);
+ if (error != 0)
+ return (error);
+ mode = S_ISTXT;
+ } else
+ mode = 0;
+ jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
+ error = falloc_caps(td, &fp, fdp, 0, NULL);
+ finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0
+ ? FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
+ if (error != 0) {
+ free(jd, M_JAILDESC);
+ return (error);
+ }
+ JAILDESC_LOCK_INIT(jd);
+ jd->jd_uid = fp->f_cred->cr_uid;
+ jd->jd_gid = fp->f_cred->cr_gid;
+ jd->jd_mode = S_IFREG | S_IRUSR | S_IRGRP | S_IROTH | mode
+ | (priv_check(td, PRIV_JAIL_SET) == 0 ? S_IWUSR | S_IXUSR : 0)
+ | (priv_check(td, PRIV_JAIL_ATTACH) == 0 ? S_IXUSR : 0);
+ *fpp = fp;
+ return (0);
+}
+
+/*
+ * Assocate a jail descriptor with its prison.
+ */
+void
+jaildesc_set_prison(struct file *fp, struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ jd->jd_prison = pr;
+ LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+}
+
+/*
+ * Detach the all jail descriptors from a prison.
+ */
+void
+jaildesc_prison_cleanup(struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ while ((jd = LIST_FIRST(&pr->pr_descs))) {
+ JAILDESC_LOCK(jd);
+ LIST_REMOVE(jd, jd_list);
+ jd->jd_prison = NULL;
+ JAILDESC_UNLOCK(jd);
+ prison_free(pr);
+ }
+}
+
+static int
+jaildesc_close(struct file *fp, struct thread *td)
+{
+ struct jaildesc *jd;
+ struct prison *pr;
+
+ jd = fp->f_data;
+ fp->f_data = NULL;
+ if (jd != NULL) {
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr != NULL) {
+ /*
+ * Free or remove the associated prison.
+ * This requires a second check after re-
+ * ordering locks. This jaildesc can remain
+ * unlocked once we have a prison reference,
+ * because that prison is the only place that
+ * still points back to it.
+ */
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+ if (jd->jd_mode & S_ISTXT) {
+ sx_xlock(&allprison_lock);
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ /*
+ * Unlink the prison, but don't free
+ * it; that will be done as part of
+ * of prison_remove.
+ */
+ LIST_REMOVE(jd, jd_list);
+ prison_remove(pr);
+ } else {
+ prison_unlock(pr);
+ sx_xunlock(&allprison_lock);
+ }
+ } else {
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ LIST_REMOVE(jd, jd_list);
+ prison_free(pr);
+ }
+ prison_unlock(pr);
+ }
+ prison_free(pr);
+ }
+ JAILDESC_LOCK_DESTROY(jd);
+ free(jd, M_JAILDESC);
+ }
+ finit(fp, 0, DTYPE_NONE, NULL, &badfileops);
+ return (0);
+}
+
+static int
+jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
+{
+ struct jaildesc *jd;
+
+ bzero(sb, sizeof(struct stat));
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (jd->jd_prison != NULL) {
+ sb->st_ino = jd->jd_prison ? jd->jd_prison->pr_id : 0;
+ sb->st_uid = jd->jd_uid;
+ sb->st_gid = jd->jd_gid;
+ sb->st_mode = jd->jd_mode;
+ } else
+ sb->st_mode = S_IFREG;
+ JAILDESC_UNLOCK(jd);
+ return (0);
+}
+
+static int
+jaildesc_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct jaildesc *jd;
+ int error;
+
+ /* Reject permissions that the creator doesn't have. */
+ if (((mode & (S_IWUSR | S_IWGRP | S_IWOTH))
+ && priv_check_cred(fp->f_cred, PRIV_JAIL_SET) != 0)
+ || ((mode & (S_IXUSR | S_IXGRP | S_IXOTH))
+ && priv_check_cred(fp->f_cred, PRIV_JAIL_ATTACH) != 0
+ && priv_check_cred(fp->f_cred, PRIV_JAIL_SET) != 0)
+ || ((mode & S_ISTXT)
+ && priv_check_cred(fp->f_cred, PRIV_JAIL_REMOVE) != 0))
+ return (EPERM);
+ if (mode & (S_ISUID | S_ISGID))
+ return (EINVAL);
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ error = vaccess(VREG, jd->jd_mode, jd->jd_uid, jd->jd_gid, VADMIN,
+ active_cred);
+ if (error == 0)
+ jd->jd_mode = S_IFREG | (mode & ALLPERMS);
+ JAILDESC_UNLOCK(jd);
+ return (error);
+}
+
+static int
+jaildesc_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct jaildesc *jd;
+ int error;
+
+ error = 0;
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (uid == (uid_t)-1)
+ uid = jd->jd_uid;
+ if (gid == (gid_t)-1)
+ gid = jd->jd_gid;
+ if ((uid != jd->jd_uid && uid != active_cred->cr_uid) ||
+ (gid != jd->jd_gid && !groupmember(gid, active_cred)))
+ error = priv_check_cred(active_cred, PRIV_VFS_CHOWN);
+ if (error == 0) {
+ jd->jd_uid = uid;
+ jd->jd_gid = gid;
+ }
+ JAILDESC_UNLOCK(jd);
+ return (error);
+}
+
+static int
+jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
+ struct filedesc *fdp)
+{
+ return (EINVAL);
+}
+
+static int
+jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
+{
+ struct jaildesc *jd1, *jd2;
+ int jid1, jid2;
+
+ if (fp2->f_type != DTYPE_JAILDESC)
+ return (3);
+ jd1 = fp1->f_data;
+ JAILDESC_LOCK(jd1);
+ jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd1);
+ jd2 = fp2->f_data;
+ JAILDESC_LOCK(jd2);
+ jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd2);
+ return (kcmp_cmp(jid1, jid2));
+}
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index 4122f9261871..4cef89cd5219 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -602,4 +602,6 @@ const char *syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
+ "jail_attach_jd", /* 597 = jail_attach_jd */
+ "jail_remove_jd", /* 598 = jail_remove_jd */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index fa64597d14a5..911f9093824b 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3383,5 +3383,15 @@
_In_reads_(gidsetsize) const gid_t *gidset
);
}
+597 AUE_JAIL_ATTACH STD {
+ int jail_attach_jd(
+ int fd
+ );
+ }
+598 AUE_JAIL_REMOVE STD {
+ int jail_remove_jd(
+ int fd
+ );
+ }
; vim: syntax=off
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
index 2b1ea9eed8d4..e28fef931ea8 100644
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -3500,6 +3500,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 597: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 598: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9367,6 +9381,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* jail_attach_jd */
+ case 597:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 598:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11365,6 +11399,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 597:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 598:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/modules/sound/driver/hda/Makefile b/sys/modules/sound/driver/hda/Makefile
index 0eec98fc53e1..1e137dc5671c 100644
--- a/sys/modules/sound/driver/hda/Makefile
+++ b/sys/modules/sound/driver/hda/Makefile
@@ -2,7 +2,7 @@
KMOD= snd_hda
SRCS= device_if.h bus_if.h pci_if.h channel_if.h mixer_if.h hdac_if.h
-SRCS+= hdaa.c hdaa.h hdaa_patches.c hdac.c hdac_if.h hdac_if.c
-SRCS+= hdacc.c hdac_private.h hdac_reg.h hda_reg.h hdac.h
+SRCS+= hdaa.c hdaa.h hdaa_patches.c hdacc.c hdac.c hdac_if.c
+SRCS+= hdac_private.h hdac_reg.h hda_reg.h hdac.h
.include <bsd.kmod.mk>
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index b6c55fac50b3..6e08ad2796a8 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -128,8 +128,25 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
"Enable/Disable TCP SACK support");
VNET_DEFINE(int, tcp_do_newsack) = 1;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, revised, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_newsack), 0,
+
+static int
+sysctl_net_inet_tcp_sack_revised(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int new;
+
+ new = V_tcp_do_newsack;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ V_tcp_do_newsack = new;
+ gone_in(16, "net.inet.tcp.sack.revised will be deprecated."
+ " net.inet.tcp.sack.enable will always follow RFC6675 SACK.\n");
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp_sack, OID_AUTO, revised, CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_INT,
+ &VNET_NAME(tcp_do_newsack), 0, sysctl_net_inet_tcp_sack_revised, "CU",
"Use revised SACK loss recovery per RFC 6675");
VNET_DEFINE(int, tcp_do_lrd) = 1;
diff --git a/sys/powerpc/conf/GENERIC b/sys/powerpc/conf/GENERIC
index 7c7d2809d784..1346fa8f9476 100644
--- a/sys/powerpc/conf/GENERIC
+++ b/sys/powerpc/conf/GENERIC
@@ -90,8 +90,6 @@ options RCTL # Resource limits
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# Kernel dump features.
options EKCD # Support for encrypted kernel dumps
diff --git a/sys/powerpc/conf/GENERIC-NODEBUG b/sys/powerpc/conf/GENERIC-NODEBUG
deleted file mode 100644
index 0761376a8160..000000000000
--- a/sys/powerpc/conf/GENERIC-NODEBUG
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# GENERIC-NODEBUG -- WITNESS and INVARIANTS free kernel configuration file
-# for FreeBSD/powerpc
-#
-# This configuration file removes several debugging options, including
-# WITNESS and INVARIANTS checking, which are known to have significant
-# performance impact on running systems. When benchmarking new features
-# this kernel should be used instead of the standard GENERIC.
-# This kernel configuration should never appear outside of the HEAD
-# of the FreeBSD tree.
-#
-# For more information on this file, please read the config(5) manual page,
-# and/or the handbook section on Kernel Configuration Files:
-#
-# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-
-include GENERIC
-include "std.nodebug"
-
-ident GENERIC-NODEBUG
diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64
index 630c88b97dd7..6675a4d299f4 100644
--- a/sys/powerpc/conf/GENERIC64
+++ b/sys/powerpc/conf/GENERIC64
@@ -100,8 +100,6 @@ options RCTL # Resource limits
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# Kernel dump features.
options EKCD # Support for encrypted kernel dumps
diff --git a/sys/powerpc/conf/GENERIC64-NODEBUG b/sys/powerpc/conf/GENERIC64-NODEBUG
deleted file mode 100644
index a4c3dbd856e2..000000000000
--- a/sys/powerpc/conf/GENERIC64-NODEBUG
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# GENERIC64-NODEBUG -- WITNESS and INVARIANTS free kernel configuration file
-# for FreeBSD/powerpc
-#
-# This configuration file removes several debugging options, including
-# WITNESS and INVARIANTS checking, which are known to have significant
-# performance impact on running systems. When benchmarking new features
-# this kernel should be used instead of the standard GENERIC64.
-# This kernel configuration should never appear outside of the HEAD
-# of the FreeBSD tree.
-#
-# For more information on this file, please read the config(5) manual page,
-# and/or the handbook section on Kernel Configuration Files:
-#
-# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-
-include GENERIC64
-include "std.nodebug"
-
-ident GENERIC64-NODEBUG
diff --git a/sys/powerpc/conf/GENERIC64LE b/sys/powerpc/conf/GENERIC64LE
index eb9a9441425d..f6f7d44c424b 100644
--- a/sys/powerpc/conf/GENERIC64LE
+++ b/sys/powerpc/conf/GENERIC64LE
@@ -96,8 +96,6 @@ options RCTL # Resource limits
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# Kernel dump features.
options EKCD # Support for encrypted kernel dumps
diff --git a/sys/powerpc/conf/GENERIC64LE-NODEBUG b/sys/powerpc/conf/GENERIC64LE-NODEBUG
deleted file mode 100644
index fd2d3ca84a19..000000000000
--- a/sys/powerpc/conf/GENERIC64LE-NODEBUG
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# GENERIC64LE-NODEBUG -- WITNESS and INVARIANTS free kernel configuration file
-# for FreeBSD/powerpc
-#
-# This configuration file removes several debugging options, including
-# WITNESS and INVARIANTS checking, which are known to have significant
-# performance impact on running systems. When benchmarking new features
-# this kernel should be used instead of the standard GENERIC64LE.
-# This kernel configuration should never appear outside of the HEAD
-# of the FreeBSD tree.
-#
-# For more information on this file, please read the config(5) manual page,
-# and/or the handbook section on Kernel Configuration Files:
-#
-# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-
-include GENERIC64LE
-include "std.nodebug"
-
-ident GENERIC64LE-NODEBUG
diff --git a/sys/riscv/conf/GENERIC b/sys/riscv/conf/GENERIC
index 2ff711e80127..187e7396e884 100644
--- a/sys/riscv/conf/GENERIC
+++ b/sys/riscv/conf/GENERIC
@@ -167,8 +167,6 @@ device xilinx_spi # Xilinx AXI SPI Controller
# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.
-# For full debugger support use (turn off in stable branch):
-include "std.debug"
# options EARLY_PRINTF=sbi
# Kernel dump features.
diff --git a/sys/riscv/conf/GENERIC-NODEBUG b/sys/riscv/conf/GENERIC-NODEBUG
deleted file mode 100644
index e4f4b41f2c41..000000000000
--- a/sys/riscv/conf/GENERIC-NODEBUG
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# GENERIC-NODEBUG -- WITNESS and INVARIANTS free kernel configuration file
-# for FreeBSD/riscv
-#
-# This configuration file removes several debugging options, including
-# WITNESS and INVARIANTS checking, which are known to have significant
-# performance impact on running systems. When benchmarking new features
-# this kernel should be used instead of the standard GENERIC.
-# This kernel configuration should never appear outside of the HEAD
-# of the FreeBSD tree.
-#
-# For more information on this file, please read the config(5) manual page,
-# and/or the handbook section on Kernel Configuration Files:
-#
-# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-
-include GENERIC
-include "std.nodebug"
-
-ident GENERIC-NODEBUG
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 1b30e4292de8..f161d2c938c1 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -45,7 +45,8 @@
#define EVFILT_USER (-11) /* User events */
#define EVFILT_SENDFILE (-12) /* attached to sendfile requests */
#define EVFILT_EMPTY (-13) /* empty send socket buf */
-#define EVFILT_SYSCOUNT 13
+#define EVFILT_JAIL (-14) /* attached to struct prison */
+#define EVFILT_SYSCOUNT 14
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define EV_SET(kevp_, a, b, c, d, e, f) do { \
@@ -204,10 +205,19 @@ struct freebsd11_kevent32 {
#define NOTE_PCTRLMASK 0xf0000000 /* mask for hint bits */
#define NOTE_PDATAMASK 0x000fffff /* mask for pid */
-/* additional flags for EVFILT_PROC */
-#define NOTE_TRACK 0x00000001 /* follow across forks */
+/* data/hint flags for EVFILT_JAIL */
+#define NOTE_JAIL_SET 0x80000000 /* jail was modified */
+#define NOTE_JAIL_CHILD 0x40000000 /* child jail was created */
+#define NOTE_JAIL_ATTACH 0x20000000 /* jail was attached to */
+#define NOTE_JAIL_REMOVE 0x10000000 /* jail was removed */
+#define NOTE_JAIL_ATTACH_MULTI 0x08000000 /* multiple procs attached */
+#define NOTE_JAIL_CTRLMASK 0xf0000000 /* mask for hint bits */
+#define NOTE_JAIL_DATAMASK 0x000fffff /* mask for pid */
+
+/* additional flags for EVFILT_PROC and EVFILT_JAIL */
+#define NOTE_TRACK 0x00000001 /* follow across fork/create */
#define NOTE_TRACKERR 0x00000002 /* could not track child */
-#define NOTE_CHILD 0x00000004 /* am a child process */
+#define NOTE_CHILD 0x00000004 /* am a child process/jail */
/* additional flags for EVFILT_TIMER */
#define NOTE_SECONDS 0x00000001 /* data is seconds */
@@ -309,6 +319,7 @@ struct knote {
struct proc *p_proc; /* proc pointer */
struct kaiocb *p_aio; /* AIO job pointer */
struct aioliojob *p_lio; /* LIO job pointer */
+ struct prison *p_prison; /* prison pointer */
void *p_v; /* generic other pointer */
} kn_ptr;
const struct filterops *kn_fop;
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 63313926c4f0..cc3c733580fd 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -72,6 +72,7 @@ struct nameidata;
#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_TIMERFD 14 /* timerfd */
#define DTYPE_INOTIFY 15 /* inotify descriptor */
+#define DTYPE_JAILDESC 16 /* jail descriptor */
#ifdef _KERNEL
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index d2655c52e832..e12e8c3178c9 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -99,8 +99,12 @@ enum prison_state {
#define JAIL_UPDATE 0x02 /* Update parameters of existing jail */
#define JAIL_ATTACH 0x04 /* Attach to jail upon creation */
#define JAIL_DYING 0x08 /* Allow getting a dying jail */
-#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */
-#define JAIL_GET_MASK 0x08
+#define JAIL_USE_DESC 0x10 /* Get/set jail in descriptor */
+#define JAIL_AT_DESC 0x20 /* Find/add jail under descriptor */
+#define JAIL_GET_DESC 0x40 /* Return a new jail descriptor */
+#define JAIL_OWN_DESC 0x80 /* Return a new owning jail descriptor */
+#define JAIL_SET_MASK 0xff /* JAIL_DYING is deprecated/ignored here */
+#define JAIL_GET_MASK 0xf8
#define JAIL_SYS_DISABLE 0
#define JAIL_SYS_NEW 1
@@ -115,7 +119,9 @@ int jail(struct jail *);
int jail_set(struct iovec *, unsigned int, int);
int jail_get(struct iovec *, unsigned int, int);
int jail_attach(int);
+int jail_attach_jd(int);
int jail_remove(int);
+int jail_remove_jd(int);
__END_DECLS
#else /* _KERNEL */
@@ -144,6 +150,8 @@ MALLOC_DECLARE(M_PRISON);
#define JAIL_META_PRIVATE "meta"
#define JAIL_META_SHARED "env"
+struct jaildesc;
+struct knlist;
struct racct;
struct prison_racct;
@@ -189,7 +197,9 @@ struct prison {
struct vnode *pr_root; /* (c) vnode to rdir */
struct prison_ip *pr_addrs[PR_FAMILY_MAX]; /* (p,n) IPs of jail */
struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
- void *pr_sparep[3];
+ struct knlist *pr_klist; /* (m) attached knotes */
+ LIST_HEAD(, jaildesc) pr_descs; /* (a) attached descriptors */
+ void *pr_sparep;
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
@@ -425,10 +435,11 @@ SYSCTL_DECL(_security_jail_param);
/*
* Kernel support functions for jail().
*/
-struct ucred;
+struct knote;
struct mount;
struct sockaddr;
struct statfs;
+struct ucred;
struct vfsconf;
/*
@@ -463,6 +474,7 @@ void prison_proc_free(struct prison *);
void prison_proc_link(struct prison *, struct proc *);
void prison_proc_unlink(struct prison *, struct proc *);
void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *);
+void prison_remove(struct prison *);
void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
bool prison_ischild(struct prison *, struct prison *);
bool prison_isalive(const struct prison *);
diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h
new file mode 100644
index 000000000000..4bed1ab3b88a
--- /dev/null
+++ b/sys/sys/jaildesc.h
@@ -0,0 +1,85 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 James Gritton.
+ * All rights reserved.
+ *
+ * This software was developed at the University of Cambridge Computer
+ * Laboratory with support from a grant from Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SYS_JAILDESC_H_
+#define _SYS_JAILDESC_H_
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#include <sys/_types.h>
+
+struct prison;
+
+/*-
+ * struct jaildesc describes a jail descriptor, which points to a struct
+ * prison. struct prison in turn has a linked list of struct jaildesc.
+ *
+ * Locking key:
+ * (c) set on creation, remains unchanged
+ * (d) jd_lock
+ * (p) jd_prison->pr_mtx
+ */
+struct jaildesc {
+ LIST_ENTRY(jaildesc) jd_list; /* (d,p) this prison's descs */
+ struct prison *jd_prison; /* (d) the prison */
+ struct mtx jd_lock;
+ uid_t jd_uid; /* (d) nominal file owner */
+ gid_t jd_gid; /* (d) nominal file group */
+ mode_t jd_mode; /* (d) descriptor permissions */
+ unsigned jd_flags; /* (d) JDF_* flags */
+};
+
+/*
+ * Locking macros for the jaildesc.
+ */
+#define JAILDESC_LOCK_DESTROY(jd) mtx_destroy(&(jd)->jd_lock)
+#define JAILDESC_LOCK_INIT(jd) mtx_init(&(jd)->jd_lock, "jaildesc", \
+ NULL, MTX_DEF)
+#define JAILDESC_LOCK(jd) mtx_lock(&(jd)->jd_lock)
+#define JAILDESC_UNLOCK(jd) mtx_unlock(&(jd)->jd_lock)
+
+/*
+ * Flags for the jd_flags field
+ */
+#define JDF_REMOVED 0x00000002 /* jail was removed */
+
+int jaildesc_find(struct thread *td, int fd, struct jaildesc **jdp,
+ struct prison **prp, struct ucred **ucredp);
+int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning);
+void jaildesc_set_prison(struct file *jd, struct prison *pr);
+void jaildesc_prison_cleanup(struct prison *pr);
+
+#endif /* _KERNEL */
+
+#endif /* !_SYS_JAILDESC_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index fc2a78883f1e..c21c086e15ad 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -74,7 +74,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1500063
+#define __FreeBSD_version 1500064
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
index 2d6903967e15..cff27b8be316 100644
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -535,4 +535,6 @@
#define SYS_inotify_rm_watch 594
#define SYS_getgroups 595
#define SYS_setgroups 596
-#define SYS_MAXSYSCALL 597
+#define SYS_jail_attach_jd 597
+#define SYS_jail_remove_jd 598
+#define SYS_MAXSYSCALL 599
diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk
index d1172c2dc7bf..443dbadcfbff 100644
--- a/sys/sys/syscall.mk
+++ b/sys/sys/syscall.mk
@@ -438,4 +438,6 @@ MIASM = \
inotify_add_watch_at.o \
inotify_rm_watch.o \
getgroups.o \
- setgroups.o
+ setgroups.o \
+ jail_attach_jd.o \
+ jail_remove_jd.o
diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h
index 98311a6dbf94..8dda4b4533ea 100644
--- a/sys/sys/sysproto.h
+++ b/sys/sys/sysproto.h
@@ -1901,6 +1901,12 @@ struct setgroups_args {
char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)];
char gidset_l_[PADL_(const gid_t *)]; const gid_t * gidset; char gidset_r_[PADR_(const gid_t *)];
};
+struct jail_attach_jd_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
+struct jail_remove_jd_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
int sys__exit(struct thread *, struct _exit_args *);
int sys_fork(struct thread *, struct fork_args *);
int sys_read(struct thread *, struct read_args *);
@@ -2305,6 +2311,8 @@ int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *
int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *);
int sys_getgroups(struct thread *, struct getgroups_args *);
int sys_setgroups(struct thread *, struct setgroups_args *);
+int sys_jail_attach_jd(struct thread *, struct jail_attach_jd_args *);
+int sys_jail_remove_jd(struct thread *, struct jail_remove_jd_args *);
#ifdef COMPAT_43
@@ -3301,6 +3309,8 @@ int freebsd14_setgroups(struct thread *, struct freebsd14_setgroups_args *);
#define SYS_AUE_inotify_rm_watch AUE_INOTIFY
#define SYS_AUE_getgroups AUE_GETGROUPS
#define SYS_AUE_setgroups AUE_SETGROUPS
+#define SYS_AUE_jail_attach_jd AUE_JAIL_ATTACH
+#define SYS_AUE_jail_remove_jd AUE_JAIL_REMOVE
#undef PAD_
#undef PADL_
diff --git a/sys/sys/user.h b/sys/sys/user.h
index 103236b6ed1b..3183f0792256 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -266,6 +266,7 @@ struct user {
#define KF_TYPE_EVENTFD 13
#define KF_TYPE_TIMERFD 14
#define KF_TYPE_INOTIFY 15
+#define KF_TYPE_JAILDESC 16
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -453,6 +454,9 @@ struct kinfo_file {
uint64_t kf_timerfd_addr;
} kf_timerfd;
struct {
+ int32_t kf_jid;
+ } kf_jail;
+ struct {
uint64_t kf_kqueue_addr;
int32_t kf_kqueue_count;
int32_t kf_kqueue_state;