split nihil into separate modules

author: Lexi Winter <lexi@le-fay.org> 2025-06-28 19:25:55 +0100
committer: Lexi Winter <lexi@le-fay.org> 2025-06-28 19:25:55 +0100
commit: a2d7181700ac64b8e7a4472ec26dfa253b38f188 (patch)
tree: 23c5a9c8ec4089ac346e2e0f9391909c3089b66b /nihil.util
parent: f226d46ee02b57dd76a4793593aa8d66e1c58353 (diff)
download: nihil-a2d7181700ac64b8e7a4472ec26dfa253b38f188.tar.gz
nihil-a2d7181700ac64b8e7a4472ec26dfa253b38f188.tar.bz2
12 files changed, 1370 insertions, 0 deletions
diff --git a/nihil.util/CMakeLists.txt b/nihil.util/CMakeLists.txt
new file mode 100644
index 0000000..b809a68
--- /dev/null
+++ b/nihil.util/CMakeLists.txt
@@ -0,0 +1,36 @@
+# This source code is released into the public domain.
+
+add_library(nihil.util STATIC)
+target_link_libraries(nihil.util PRIVATE nihil.core nihil.error nihil.monad)
+target_sources(nihil.util
+	PUBLIC FILE_SET modules TYPE CXX_MODULES FILES
+	nihil.util.ccm
+
+	ctype.ccm
+	parse_size.ccm
+	next_word.ccm
+	skipws.ccm
+	tabulate.ccm
+)
+
+if(NIHIL_TESTS)
+	enable_testing()
+
+	add_executable(nihil.util.test
+		test_ctype.cc
+		test_parse_size.cc
+		test_next_word.cc
+		test_skipws.cc
+		test_tabulate.cc
+	)
+	target_link_libraries(nihil.util.test PRIVATE
+		nihil.util
+		Catch2::Catch2WithMain
+	)
+
+	find_package(Catch2 REQUIRED)
+
+	include(CTest)
+	include(Catch)
+	catch_discover_tests(nihil.util.test)
+endif()
diff --git a/nihil.util/ctype.ccm b/nihil.util/ctype.ccm
new file mode 100644
index 0000000..6d30c4f
--- /dev/null
+++ b/nihil.util/ctype.ccm
@@ -0,0 +1,87 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <concepts>
+#include <locale>
+
+export module nihil.util:ctype;
+
+namespace nihil {
+
+/*
+ * ctype_is: wrap std::ctype<T>::is() in a form suitable for use as an algorithm
+ * predicate, i.e., ctype_is(m) will return a functor object that takes any char
+ * type as an argument and returns bool.
+ *
+ * If the locale is not specified, the current global locale is used by default.
+ *
+ * ctype_is copies the locale, so passing a temporary is fine.
+ */
+
+export struct ctype_is final {
+	ctype_is(std::ctype_base::mask mask_,
+		 std::locale const &locale_ = std::locale())
+	    : m_mask(mask_)
+	    , m_locale(locale_)
+	{}
+
+	[[nodiscard]] auto operator()(this ctype_is const &self,
+				      std::integral auto c)
+	{
+		using ctype = std::ctype<decltype(c)>;
+		auto &facet = std::use_facet<ctype>(self.m_locale);
+		return facet.is(self.m_mask, c);
+	}
+
+private:
+	std::ctype_base::mask	m_mask;
+	std::locale		m_locale;
+};
+
+// Predefined tests for the current global locale. 
+
+export inline auto is_space = ctype_is(std::ctype_base::space);
+export inline auto is_print = ctype_is(std::ctype_base::print);
+export inline auto is_cntrl = ctype_is(std::ctype_base::cntrl);
+export inline auto is_upper = ctype_is(std::ctype_base::upper);
+export inline auto is_lower = ctype_is(std::ctype_base::lower);
+export inline auto is_alpha = ctype_is(std::ctype_base::alpha);
+export inline auto is_digit = ctype_is(std::ctype_base::digit);
+export inline auto is_punct = ctype_is(std::ctype_base::punct);
+export inline auto is_xdigit = ctype_is(std::ctype_base::xdigit);
+export inline auto is_blank = ctype_is(std::ctype_base::blank);
+export inline auto is_alnum = ctype_is(std::ctype_base::alnum);
+export inline auto is_graph = ctype_is(std::ctype_base::graph);
+
+// Predefined tests for the C locale.  The C locale is guaranteed to always be
+// available, so this doesn't create lifetime issues.
+
+export inline auto is_c_space =
+	ctype_is(std::ctype_base::space, std::locale::classic());
+export inline auto is_c_print =
+	ctype_is(std::ctype_base::print, std::locale::classic());
+export inline auto is_c_cntrl =
+	ctype_is(std::ctype_base::cntrl, std::locale::classic());
+export inline auto is_c_upper =
+	ctype_is(std::ctype_base::upper, std::locale::classic());
+export inline auto is_c_lower =
+	ctype_is(std::ctype_base::lower, std::locale::classic());
+export inline auto is_c_alpha =
+	ctype_is(std::ctype_base::alpha, std::locale::classic());
+export inline auto is_c_digit =
+	ctype_is(std::ctype_base::digit, std::locale::classic());
+export inline auto is_c_punct =
+	ctype_is(std::ctype_base::punct, std::locale::classic());
+export inline auto is_c_xdigit =
+	ctype_is(std::ctype_base::xdigit, std::locale::classic());
+export inline auto is_c_blank =
+	ctype_is(std::ctype_base::blank, std::locale::classic());
+export inline auto is_c_alnum =
+	ctype_is(std::ctype_base::alnum, std::locale::classic());
+export inline auto is_c_graph =
+	ctype_is(std::ctype_base::graph, std::locale::classic());
+
+} // namespace nihil
diff --git a/nihil.util/next_word.ccm b/nihil.util/next_word.ccm
new file mode 100644
index 0000000..c5d3ad7
--- /dev/null
+++ b/nihil.util/next_word.ccm
@@ -0,0 +1,49 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <algorithm>
+#include <locale>
+#include <ranges>
+#include <string>
+#include <utility>
+
+export module nihil.util:next_word;
+
+import :skipws;
+
+namespace nihil {
+
+/*
+ * Return the next word from a string_view.  Skips leading whitespace, so
+ * calling this repeatedly will return each word from the string.
+ */
+
+export template<typename Char> [[nodiscard]]
+auto next_word(std::basic_string_view<Char> text,
+	       std::locale const &locale = std::locale())
+	-> std::pair<std::basic_string_view<Char>,
+		     std::basic_string_view<Char>>
+{
+	text = skipws(text, locale);
+
+	auto is_space = ctype_is(std::ctype_base::space, locale);
+	auto split_pos = std::ranges::find_if(text, is_space);
+
+	return {{std::ranges::begin(text), split_pos},
+		{split_pos, std::ranges::end(text)}};
+}
+
+export template<typename Char>
+auto next_word(std::basic_string_view<Char> *text,
+	       std::locale const &locale = std::locale())
+	-> std::basic_string_view<Char>
+{
+	auto [word, rest] = next_word(*text, locale);
+	*text = rest;
+	return word;
+}
+
+} // namespace nihil
diff --git a/nihil.util/nihil.util.ccm b/nihil.util/nihil.util.ccm
new file mode 100644
index 0000000..afd513a
--- /dev/null
+++ b/nihil.util/nihil.util.ccm
@@ -0,0 +1,13 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+export module nihil.util;
+
+export import :ctype;
+export import :parse_size;
+export import :next_word;
+export import :skipws;
+export import :tabulate;
diff --git a/nihil.util/parse_size.ccm b/nihil.util/parse_size.ccm
new file mode 100644
index 0000000..c692578
--- /dev/null
+++ b/nihil.util/parse_size.ccm
@@ -0,0 +1,107 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <algorithm>
+#include <coroutine>
+#include <cstdint>
+#include <expected>
+#include <ranges>
+#include <string>
+#include <system_error>
+#include <utility>
+
+export module nihil.util:parse_size;
+
+import nihil.core;
+import nihil.error;
+import nihil.monad;
+
+import :ctype;
+
+namespace nihil {
+
+template<typename Char>
+auto get_multiplier(Char c) -> std::expected<std::uint64_t, error>
+{
+	auto ret = std::uint64_t{1};
+
+	switch (c) {
+	case 'p': case 'P': ret *= 1024;
+	case 't': case 'T': ret *= 1024;
+	case 'g': case 'G': ret *= 1024;
+	case 'm': case 'M': ret *= 1024;
+	case 'k': case 'K': ret *= 1024;
+		return ret;
+
+	default:
+		return std::unexpected(error(errc::invalid_unit));
+	}
+}
+
+/*
+ * Parse a string containing a human-formatted size, such as "1024"
+ * or "4g".  Parsing is always done in the "C" locale and does not
+ * recognise thousands separators or negative numbers.
+ */
+export template<typename T, typename Char> [[nodiscard]]
+auto parse_size(std::basic_string_view<Char> str)
+	-> std::expected<T, error>
+{
+	// Extract the numeric part of the string.
+	auto it = std::ranges::find_if_not(str, is_c_digit);
+	auto num_str = std::basic_string_view<Char>(
+				std::ranges::begin(str), it);
+
+	if (num_str.empty())
+		co_return std::unexpected(error(errc::empty_string));
+
+	auto ret = T{0};
+
+	for (auto c : num_str) {
+		if (ret > (std::numeric_limits<T>::max() / 10))
+			co_return std::unexpected(error(
+					std::errc::result_out_of_range));
+		ret *= 10;
+
+		auto digit = static_cast<T>(c - '0');
+		if ((std::numeric_limits<T>::max() - digit) < ret)
+			co_return std::unexpected(error(
+					std::errc::result_out_of_range));
+		ret += digit;
+	}
+
+	if (it == str.end())
+		// No multiplier.
+		co_return ret;
+
+	auto mchar = *it++;
+
+	if (it != str.end())
+		// Multiplier is more than one character.
+		co_return std::unexpected(error(errc::invalid_unit));
+
+	auto mult = co_await get_multiplier(mchar);
+
+	if (std::cmp_greater(ret, std::numeric_limits<T>::max() / mult))
+		co_return std::unexpected(error(
+				std::errc::result_out_of_range));
+
+	co_return ret * mult;
+}
+
+export template<typename T>
+[[nodiscard]] inline auto parse_size(char const *s)
+{
+	return parse_size<T>(std::string_view(s));
+}
+
+export template<typename T>
+[[nodiscard]] inline auto parse_size(wchar_t const *s)
+{
+	return parse_size<T>(std::wstring_view(s));
+}
+
+}
diff --git a/nihil.util/skipws.ccm b/nihil.util/skipws.ccm
new file mode 100644
index 0000000..4813ae8
--- /dev/null
+++ b/nihil.util/skipws.ccm
@@ -0,0 +1,40 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <algorithm>
+#include <locale>
+#include <ranges>
+#include <string>
+
+export module nihil.util:skipws;
+
+import :ctype;
+
+namespace nihil {
+
+/*
+ * Remove leading whitespace from a string.
+ */
+
+export template<typename Char> [[nodiscard]]
+auto skipws(std::basic_string_view<Char> text,
+	    std::locale const &locale = std::locale())
+	-> std::basic_string_view<Char>
+{
+	auto is_space = ctype_is(std::ctype_base::space, locale);
+	auto nonws = std::ranges::find_if_not(text, is_space);
+	return {nonws, std::ranges::end(text)};
+}
+
+export template<typename Char>
+auto skipws(std::basic_string_view<Char> *text,
+	    std::locale const &locale = std::locale())
+	-> void
+{
+	*text = skipws(*text, locale);
+}
+
+} // namespace nihil
diff --git a/nihil.util/tabulate.ccm b/nihil.util/tabulate.ccm
new file mode 100644
index 0000000..5998b24
--- /dev/null
+++ b/nihil.util/tabulate.ccm
@@ -0,0 +1,312 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <algorithm>
+#include <cstdlib>
+#include <format>
+#include <ranges>
+#include <iterator>
+#include <vector>
+
+export module nihil.util:tabulate;
+
+import nihil.error;
+import :ctype;
+
+namespace nihil {
+
+/*
+ * tabulate: format the given range in an ASCII table and write the output
+ * to the given output iterator.  The range's values will be converted to
+ * strings as if by std::format.
+ *
+ * tabulate is implemented by copying the range; this allows it to work on
+ * input/forward ranges at the cost of slightly increased memory use.
+ *
+ * The table spec is a string consisting of zero or more field formats,
+ * formatted as {flags:fieldname}; both flags and fieldname are optional.
+ * If there are fewer field formats than fields, the remaining fields
+ * are formatted as if by {:}.
+ *
+ * The following flags are supported:
+ *
+ * 	<	left-align this column (default)
+ * 	>	right-align this column
+ */
+
+// Exception thrown when a table spec is invalid.
+export struct table_spec_error : error {
+	table_spec_error(std::string_view what)
+		: error(what)
+	{
+	}
+};
+
+/*
+ * The specification for a single field.
+ */
+template<typename Char>
+struct field_spec {
+	enum align_t { left, right };
+
+	// Get the name of this field.
+	auto name(this field_spec const &self)
+		-> std::basic_string_view<Char>
+	{
+		return self.m_name;
+	}
+
+	// Set the name of this field.
+	auto name(this field_spec &self,
+		  std::basic_string_view<Char> new_name)
+		-> void
+	{
+		self.m_name = new_name;
+	}
+
+	// Set this field's alignment.
+	auto align(this field_spec &self, align_t new_align) -> void
+	{
+		self.m_align = new_align;
+	}
+
+	// Ensure the length of this field is at least the given width.
+	auto ensure_width(this field_spec &self, std::size_t newwidth)
+		-> void
+	{
+		self.m_width = std::max(self.m_width, newwidth);
+	}
+
+	// Format an object to a string based on our field spec.
+	[[nodiscard]] auto format(this field_spec const &, auto &&obj)
+		-> std::basic_string<Char>
+	{
+		auto format_string = std::basic_string<Char>{'{', '}'};
+		return std::format(std::runtime_format(format_string), obj);
+	}
+
+	// Print a column value to an output iterator according to our field
+	// spec.  If is_last is true, this is the last field on the line, so
+	// we won't output any trailling padding.
+	auto print(this field_spec const &self,
+		   std::basic_string_view<Char> value,
+		   std::output_iterator<Char> auto &out,
+		   bool is_last)
+		-> void
+	{
+		auto padding = self.m_width - value.size();
+
+		if (self.m_align == right)
+			for (std::size_t i = 0; i < padding; ++i)
+				*out++ = ' ';
+
+		std::ranges::copy(value, out);
+
+		if (!is_last && self.m_align == left)
+			for (std::size_t i = 0; i < padding; ++i)
+				*out++ = ' ';
+	}
+
+private:
+	std::basic_string_view<Char>	m_name;
+	std::size_t			m_width = 0;
+	align_t				m_align = left;
+};
+
+/*
+ * The specification for an entire table.
+ */
+template<typename Char>
+struct table_spec {
+	// Add a new field spec to this table.
+	auto add(this table_spec &self, field_spec<Char> field) -> void
+	{
+		self.m_fields.emplace_back(std::move(field));
+	}
+
+	// Return the field spec for a given field.  If the field doesn't
+	// exist, this field and any intermediate fields will be created.
+	[[nodiscard]] auto field(this table_spec &self, std::size_t fieldnr)
+		-> field_spec<Char> &
+	{
+		if (fieldnr >= self.m_fields.size())
+			self.m_fields.resize(fieldnr + 1);
+		return self.m_fields.at(fieldnr);
+	}
+
+	// The number of columns in this table.
+	[[nodiscard]] auto columns(this table_spec const &self) -> std::size_t
+	{
+		return self.m_fields.size();
+	}
+
+	// Return all the fields in this table.
+	[[nodiscard]] auto fields(this table_spec const &self)
+		    -> std::vector<field_spec<Char>> const &
+	{
+		return self.m_fields;
+	}
+
+private:
+	std::vector<field_spec<Char>> m_fields;
+};
+
+// Parse the field flags, e.g. '<'.
+template<typename Char,
+	 std::input_iterator Iterator, std::sentinel_for<Iterator> Sentinel>
+auto parse_field_flags(field_spec<Char> &field, Iterator &pos, Sentinel end)
+	-> void
+{
+	while (pos < end) {
+		switch (*pos) {
+		case '<':
+			field.align(field_spec<Char>::left);
+			break;
+		case '>':
+			field.align(field_spec<Char>::right);
+			break;
+		case ':':
+			++pos;
+			/*FALLTHROUGH*/
+		case '}':
+			return;
+		default:
+			throw table_spec_error("Invalid table spec: "
+					       "unknown flag character");
+		}
+
+		if (++pos == end)
+			throw table_spec_error("Invalid table spec: "
+					       "unterminated field");
+	}
+}
+
+// Parse a complete field spec, e.g. "{<:NAME}".
+template<typename Char,
+	std::input_iterator Iterator, std::sentinel_for<Iterator> Sentinel>
+[[nodiscard]] auto parse_field(Iterator &pos, Sentinel end)
+	-> field_spec<Char>
+{
+	auto field = field_spec<Char>{};
+
+	if (pos == end)
+		throw table_spec_error("Invalid table spec: empty field");
+
+	// The field spec should start with a '{'.
+	if (*pos != '{')
+		throw table_spec_error("Invalid table spec: expected '{'");
+
+	if (++pos == end)
+		throw table_spec_error("Invalid table spec: unterminated field");
+
+	// This consumes 'pos' up to and including the ':'.
+	parse_field_flags(field, pos, end);
+
+	auto brace = std::ranges::find(pos, end, '}');
+	if (brace == end)
+		throw table_spec_error("Invalid table spec: expected '}'");
+
+	field.name(std::basic_string_view<Char>(pos, brace));
+	pos = std::next(brace);
+
+	// The field must be at least as wide as its header.
+	field.ensure_width(field.name().size());
+
+	return field;
+}
+
+template<typename Char>
+[[nodiscard]] auto parse_table_spec(std::basic_string_view<Char> spec)
+	-> table_spec<Char>
+{
+	auto table = table_spec<Char>();
+
+	auto pos = std::ranges::begin(spec);
+	auto end = std::ranges::end(spec);
+
+	for (;;) {
+		// Skip leading whitespace
+		while (pos < end && is_c_space(*pos))
+			++pos;
+
+		if (pos == end)
+			break;
+
+		table.add(parse_field<Char>(pos, end));
+	}
+
+	return table;
+}
+
+export template<typename Char,
+		std::ranges::range Range,
+		std::output_iterator<Char> Iterator>
+auto basic_tabulate(std::basic_string_view<Char> table_spec,
+		    Range &&range,
+		    Iterator &&out)
+	-> void
+{
+	// Parse the table spec.
+	auto table = parse_table_spec(table_spec);
+
+	// Create our copy of the input data.
+	auto data = std::vector<std::vector<std::basic_string<Char>>>();
+	// Reserve the first row for the header.
+	data.resize(1);
+
+	// Find the required length of each field.
+	for (auto &&row : range) {
+		// LLVM doesn't have std::enumerate_view yet
+		auto i = std::size_t{0};
+		auto &this_row = data.emplace_back();
+
+		for (auto &&column : row) {
+			auto &field = table.field(i);
+			auto &str = this_row.emplace_back(field.format(column));
+			field.ensure_width(str.size());
+			++i;
+		}
+	}
+
+	// Add the header row.
+	for (auto &&field : table.fields())
+		data.at(0).emplace_back(std::from_range, field.name());
+
+	// Print the values.
+	for (auto &&row : data) {
+		for (std::size_t i = 0; i < row.size(); ++i) {
+			auto &field = table.field(i);
+			bool is_last = (i == row.size() - 1);
+
+			field.print(row[i], out, is_last);
+
+			if (!is_last)
+				*out++ = ' ';
+		}
+
+		*out++ = '\n';
+	}
+}
+
+export auto tabulate(std::string_view table_spec,
+		     std::ranges::range auto &&range,
+		     std::output_iterator<char> auto &&out)
+{
+	return basic_tabulate<char>(table_spec,
+				    std::forward<decltype(range)>(range),
+				    std::forward<decltype(out)>(out));
+}
+
+export auto wtabulate(std::wstring_view table_spec,
+		      std::ranges::range auto &&range,
+		      std::output_iterator<wchar_t> auto &&out)
+{
+	return basic_tabulate<wchar_t>(table_spec,
+				       std::forward<decltype(range)>(range),
+				       std::forward<decltype(out)>(out));
+}
+
+} // namespace nihil
diff --git a/nihil.util/test_ctype.cc b/nihil.util/test_ctype.cc
new file mode 100644
index 0000000..62721d1
--- /dev/null
+++ b/nihil.util/test_ctype.cc
@@ -0,0 +1,373 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.util;
+
+TEST_CASE("ctype: space", "[ctype]") {
+	auto is_utf8_space =
+		nihil::ctype_is(std::ctype_base::space,
+				std::locale("C.UTF-8"));
+
+	// '\v' (vertical tab) is a space
+	REQUIRE(nihil::is_space('\v') == true);
+	REQUIRE(nihil::is_space(L'\v') == true);
+
+	REQUIRE(nihil::is_c_space('\v') == true);
+	REQUIRE(nihil::is_c_space(L'\v') == true);
+
+	REQUIRE(is_utf8_space('\v') == true);
+	REQUIRE(is_utf8_space(L'\v') == true);
+
+	// 'x' is not a space
+	REQUIRE(nihil::is_space('x') == false);
+	REQUIRE(nihil::is_space(L'x') == false);
+
+	REQUIRE(nihil::is_c_space('x') == false);
+	REQUIRE(nihil::is_c_space(L'x') == false);
+
+	REQUIRE(is_utf8_space('x') == false);
+	REQUIRE(is_utf8_space(L'x') == false);
+
+	// U+2003 EM SPACE is a space
+	REQUIRE(nihil::is_space(L'\u2003') == false);
+	REQUIRE(nihil::is_c_space(L'\u2003') == false);
+	REQUIRE(is_utf8_space(L'\u2003') == true);
+}
+
+TEST_CASE("ctype: print", "[ctype]") {
+	auto is_utf8_print =
+		nihil::ctype_is(std::ctype_base::print,
+				std::locale("C.UTF-8"));
+
+	// 'x' is printable
+	REQUIRE(nihil::is_print('x') == true);
+	REQUIRE(nihil::is_print(L'x') == true);
+
+	REQUIRE(nihil::is_c_print('x') == true);
+	REQUIRE(nihil::is_c_print(L'x') == true);
+
+	REQUIRE(is_utf8_print('x') == true);
+	REQUIRE(is_utf8_print(L'x') == true);
+
+	// '\003' is not printable
+	REQUIRE(nihil::is_print('\003') == false);
+	REQUIRE(nihil::is_print(L'\003') == false);
+
+	REQUIRE(nihil::is_c_print('\003') == false);
+	REQUIRE(nihil::is_c_print(L'\003') == false);
+
+	REQUIRE(is_utf8_print('\003') == false);
+	REQUIRE(is_utf8_print(L'\003') == false);
+
+	// U+0410 CYRILLIC CAPITAL LETTER A is printable
+	REQUIRE(nihil::is_print(L'\u0410') == false);
+	REQUIRE(nihil::is_c_print(L'\u0410') == false);
+	REQUIRE(is_utf8_print(L'\u0410') == true);
+}
+
+TEST_CASE("ctype: cntrl", "[ctype]") {
+	auto is_utf8_cntrl =
+		nihil::ctype_is(std::ctype_base::cntrl,
+				std::locale("C.UTF-8"));
+
+	// '\003' is a control character
+	REQUIRE(nihil::is_cntrl('\003') == true);
+	REQUIRE(nihil::is_cntrl(L'\003') == true);
+
+	REQUIRE(nihil::is_c_cntrl('\003') == true);
+	REQUIRE(nihil::is_c_cntrl(L'\003') == true);
+
+	REQUIRE(is_utf8_cntrl('\003') == true);
+	REQUIRE(is_utf8_cntrl(L'\003') == true);
+
+
+	// 'x' is not a control character
+	REQUIRE(nihil::is_cntrl('x') == false);
+	REQUIRE(nihil::is_cntrl(L'x') == false);
+
+	REQUIRE(nihil::is_c_cntrl('x') == false);
+	REQUIRE(nihil::is_c_cntrl(L'x') == false);
+
+	REQUIRE(is_utf8_cntrl('x') == false);
+	REQUIRE(is_utf8_cntrl(L'x') == false);
+
+	// U+00AD SOFT HYPHEN is a control character.
+	REQUIRE(nihil::is_cntrl(L'\u00ad') == false);
+	REQUIRE(nihil::is_c_cntrl(L'\u00ad') == false);
+	REQUIRE(is_utf8_cntrl(L'\u00ad') == true);
+}
+
+TEST_CASE("ctype: upper", "[ctype]") {
+	auto is_utf8_upper =
+		nihil::ctype_is(std::ctype_base::upper,
+				std::locale("C.UTF-8"));
+
+	// 'A' is upper case
+	REQUIRE(nihil::is_upper('A') == true);
+	REQUIRE(nihil::is_upper(L'A') == true);
+
+	REQUIRE(nihil::is_c_upper('A') == true);
+	REQUIRE(nihil::is_c_upper(L'A') == true);
+
+	REQUIRE(is_utf8_upper('A') == true);
+	REQUIRE(is_utf8_upper(L'A') == true);
+
+	// 'a' is not upper case
+	REQUIRE(nihil::is_upper('a') == false);
+	REQUIRE(nihil::is_upper(L'a') == false);
+
+	REQUIRE(nihil::is_c_upper('a') == false);
+	REQUIRE(nihil::is_c_upper(L'a') == false);
+
+	REQUIRE(is_utf8_upper('a') == false);
+	REQUIRE(is_utf8_upper(L'a') == false);
+
+	// U+0410 CYRILLIC CAPITAL LETTER A is upper case
+	REQUIRE(nihil::is_upper(L'\u0410') == false);
+	REQUIRE(nihil::is_c_upper(L'\u0410') == false);
+	REQUIRE(is_utf8_upper(L'\u0410') == true);
+}
+
+TEST_CASE("ctype: lower", "[ctype]") {
+	auto is_utf8_lower =
+		nihil::ctype_is(std::ctype_base::lower,
+				std::locale("C.UTF-8"));
+
+	// 'a' is lower case
+	REQUIRE(nihil::is_lower('a') == true);
+	REQUIRE(nihil::is_lower(L'a') == true);
+
+	REQUIRE(nihil::is_c_lower('a') == true);
+	REQUIRE(nihil::is_c_lower(L'a') == true);
+
+	REQUIRE(is_utf8_lower('a') == true);
+	REQUIRE(is_utf8_lower(L'a') == true);
+
+	// 'A' is not lower case
+	REQUIRE(nihil::is_lower('A') == false);
+	REQUIRE(nihil::is_lower(L'A') == false);
+
+	REQUIRE(nihil::is_c_lower('A') == false);
+	REQUIRE(nihil::is_c_lower(L'A') == false);
+
+	REQUIRE(is_utf8_lower('A') == false);
+	REQUIRE(is_utf8_lower(L'A') == false);
+
+	// U+0430 CYRILLIC SMALL LETTER A
+	REQUIRE(nihil::is_lower(L'\u0430') == false);
+	REQUIRE(nihil::is_c_lower(L'\u0430') == false);
+	REQUIRE(is_utf8_lower(L'\u0430') == true);
+}
+
+TEST_CASE("ctype: alpha", "[ctype]") {
+	auto is_utf8_alpha =
+		nihil::ctype_is(std::ctype_base::alpha,
+				std::locale("C.UTF-8"));
+
+	// 'a' is alphabetical
+	REQUIRE(nihil::is_alpha('a') == true);
+	REQUIRE(nihil::is_alpha(L'a') == true);
+
+	REQUIRE(nihil::is_c_alpha('a') == true);
+	REQUIRE(nihil::is_c_alpha(L'a') == true);
+
+	REQUIRE(is_utf8_alpha('a') == true);
+	REQUIRE(is_utf8_alpha(L'a') == true);
+
+	// '1' is not alphabetical
+	REQUIRE(nihil::is_alpha('1') == false);
+	REQUIRE(nihil::is_alpha(L'1') == false);
+
+	REQUIRE(nihil::is_c_alpha('1') == false);
+	REQUIRE(nihil::is_c_alpha(L'1') == false);
+
+	REQUIRE(is_utf8_alpha('1') == false);
+	REQUIRE(is_utf8_alpha(L'1') == false);
+
+	// U+0430 CYRILLIC SMALL LETTER A
+	REQUIRE(nihil::is_alpha(L'\u0430') == false);
+	REQUIRE(nihil::is_c_alpha(L'\u0430') == false);
+	REQUIRE(is_utf8_alpha(L'\u0430') == true);
+}
+
+TEST_CASE("ctype: digit", "[ctype]") {
+	auto is_utf8_digit =
+		nihil::ctype_is(std::ctype_base::digit,
+				std::locale("C.UTF-8"));
+
+	// '1' is a digit
+	REQUIRE(nihil::is_digit('1') == true);
+	REQUIRE(nihil::is_digit(L'1') == true);
+
+	REQUIRE(nihil::is_c_digit('1') == true);
+	REQUIRE(nihil::is_c_digit(L'1') == true);
+
+	REQUIRE(is_utf8_digit('1') == true);
+	REQUIRE(is_utf8_digit(L'1') == true);
+
+	// 'a' is not a digit
+	REQUIRE(nihil::is_digit('a') == false);
+	REQUIRE(nihil::is_digit(L'a') == false);
+
+	REQUIRE(nihil::is_c_digit('a') == false);
+	REQUIRE(nihil::is_c_digit(L'a') == false);
+
+	REQUIRE(is_utf8_digit('a') == false);
+	REQUIRE(is_utf8_digit(L'a') == false);
+
+	// U+0660 ARABIC-INDIC DIGIT ZERO
+	REQUIRE(nihil::is_digit(L'\u0660') == false);
+	REQUIRE(nihil::is_c_digit(L'\u0660') == false);
+	REQUIRE(is_utf8_digit(L'\u0660') == true);
+}
+
+TEST_CASE("ctype: punct", "[ctype]") {
+	auto is_utf8_punct =
+		nihil::ctype_is(std::ctype_base::punct,
+				std::locale("C.UTF-8"));
+
+	// ';' is punctuation
+	REQUIRE(nihil::is_punct(';') == true);
+	REQUIRE(nihil::is_punct(L';') == true);
+
+	REQUIRE(nihil::is_c_punct(';') == true);
+	REQUIRE(nihil::is_c_punct(L';') == true);
+
+	REQUIRE(is_utf8_punct(';') == true);
+	REQUIRE(is_utf8_punct(L';') == true);
+
+	// 'a' is not punctuation
+	REQUIRE(nihil::is_punct('a') == false);
+	REQUIRE(nihil::is_punct(L'a') == false);
+
+	REQUIRE(nihil::is_c_punct('a') == false);
+	REQUIRE(nihil::is_c_punct(L'a') == false);
+
+	REQUIRE(is_utf8_punct('a') == false);
+	REQUIRE(is_utf8_punct(L'a') == false);
+
+	// U+00A1 INVERTED EXCLAMATION MARK
+	REQUIRE(nihil::is_punct(L'\u00A1') == false);
+	REQUIRE(nihil::is_c_punct(L'\u00A1') == false);
+	REQUIRE(is_utf8_punct(L'\u00A1') == true);
+}
+
+TEST_CASE("ctype: xdigit", "[ctype]") {
+	auto is_utf8_xdigit =
+		nihil::ctype_is(std::ctype_base::xdigit,
+				std::locale("C.UTF-8"));
+
+	// 'f' is an xdigit
+	REQUIRE(nihil::is_xdigit('f') == true);
+	REQUIRE(nihil::is_xdigit(L'f') == true);
+
+	REQUIRE(nihil::is_c_xdigit('f') == true);
+	REQUIRE(nihil::is_c_xdigit(L'f') == true);
+
+	REQUIRE(is_utf8_xdigit('f') == true);
+	REQUIRE(is_utf8_xdigit(L'f') == true);
+
+	// 'g' is not an xdigit
+	REQUIRE(nihil::is_xdigit('g') == false);
+	REQUIRE(nihil::is_xdigit(L'g') == false);
+
+	REQUIRE(nihil::is_c_xdigit('g') == false);
+	REQUIRE(nihil::is_c_xdigit(L'g') == false);
+
+	REQUIRE(is_utf8_xdigit('g') == false);
+	REQUIRE(is_utf8_xdigit(L'g') == false);
+}
+
+TEST_CASE("ctype: blank", "[ctype]") {
+	auto is_utf8_blank =
+		nihil::ctype_is(std::ctype_base::blank,
+				std::locale("C.UTF-8"));
+
+	// '\t' is a blank
+	REQUIRE(nihil::is_blank('\t') == true);
+	REQUIRE(nihil::is_blank(L'\t') == true);
+
+	REQUIRE(nihil::is_c_blank('\t') == true);
+	REQUIRE(nihil::is_c_blank(L'\t') == true);
+
+	REQUIRE(is_utf8_blank('\t') == true);
+	REQUIRE(is_utf8_blank(L'\t') == true);
+
+	// '\v' is not a blank
+	REQUIRE(nihil::is_blank('\v') == false);
+	REQUIRE(nihil::is_blank(L'\v') == false);
+
+	REQUIRE(nihil::is_c_blank('\v') == false);
+	REQUIRE(nihil::is_c_blank(L'\v') == false);
+
+	REQUIRE(is_utf8_blank('\v') == false);
+	REQUIRE(is_utf8_blank(L'\v') == false);
+
+	// There don't seem to be any UTF-8 blank characters, at least
+	// in FreeBSD libc.
+}
+
+TEST_CASE("ctype: alnum", "[ctype]") {
+	auto is_utf8_alnum =
+		nihil::ctype_is(std::ctype_base::alnum,
+				std::locale("C.UTF-8"));
+
+	// 'a' is alphanumeric
+	REQUIRE(nihil::is_alnum('a') == true);
+	REQUIRE(nihil::is_alnum(L'a') == true);
+
+	REQUIRE(nihil::is_c_alnum('a') == true);
+	REQUIRE(nihil::is_c_alnum(L'a') == true);
+
+	REQUIRE(is_utf8_alnum('a') == true);
+	REQUIRE(is_utf8_alnum(L'a') == true);
+
+	// '\t' is not a alnum
+	REQUIRE(nihil::is_alnum('\t') == false);
+	REQUIRE(nihil::is_alnum(L'\t') == false);
+
+	REQUIRE(nihil::is_c_alnum('\t') == false);
+	REQUIRE(nihil::is_c_alnum(L'\t') == false);
+
+	REQUIRE(is_utf8_alnum('\t') == false);
+	REQUIRE(is_utf8_alnum(L'\t') == false);
+
+	// U+0430 CYRILLIC SMALL LETTER A
+	REQUIRE(nihil::is_alnum(L'\u0430') == false);
+	REQUIRE(nihil::is_c_alnum(L'\u0430') == false);
+	REQUIRE(is_utf8_alnum(L'\u0430') == true);
+}
+
+TEST_CASE("ctype: graph", "[ctype]") {
+	auto is_utf8_graph =
+		nihil::ctype_is(std::ctype_base::graph,
+				std::locale("C.UTF-8"));
+
+	// 'a' is graphical 
+	REQUIRE(nihil::is_graph('a') == true);
+	REQUIRE(nihil::is_graph(L'a') == true);
+
+	REQUIRE(nihil::is_c_graph('a') == true);
+	REQUIRE(nihil::is_c_graph(L'a') == true);
+
+	REQUIRE(is_utf8_graph('a') == true);
+	REQUIRE(is_utf8_graph(L'a') == true);
+
+	// '\t' is not graphical
+	REQUIRE(nihil::is_graph('\t') == false);
+	REQUIRE(nihil::is_graph(L'\t') == false);
+
+	REQUIRE(nihil::is_c_graph('\t') == false);
+	REQUIRE(nihil::is_c_graph(L'\t') == false);
+
+	REQUIRE(is_utf8_graph('\t') == false);
+	REQUIRE(is_utf8_graph(L'\t') == false);
+
+	// U+0430 CYRILLIC SMALL LETTER A
+	REQUIRE(nihil::is_graph(L'\u0430') == false);
+	REQUIRE(nihil::is_c_graph(L'\u0430') == false);
+	REQUIRE(is_utf8_graph(L'\u0430') == true);
+}
diff --git a/nihil.util/test_next_word.cc b/nihil.util/test_next_word.cc
new file mode 100644
index 0000000..7e61237
--- /dev/null
+++ b/nihil.util/test_next_word.cc
@@ -0,0 +1,65 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <locale>
+#include <string>
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.util;
+
+TEST_CASE("next_word: basic", "[next_word]")
+{
+	using namespace std::literals;
+	auto s = "foo bar baz"sv;
+
+	auto words = nihil::next_word(s);
+	REQUIRE(words.first == "foo");
+	REQUIRE(words.second == " bar baz");
+
+	auto word = nihil::next_word(&s);
+	REQUIRE(word == "foo");
+	REQUIRE(s == " bar baz");
+}
+
+TEST_CASE("next_word: multiple spaces", "[next_word]")
+{
+	using namespace std::literals;
+	auto s = "foo  bar  baz"sv;
+
+	auto words = nihil::next_word(s);
+	REQUIRE(words.first == "foo");
+	REQUIRE(words.second == "  bar  baz");
+
+	auto word = nihil::next_word(&s);
+	REQUIRE(word == "foo");
+	REQUIRE(s == "  bar  baz");
+}
+
+TEST_CASE("next_word: leading spaces", "[next_word]")
+{
+	using namespace std::literals;
+	auto s = " \tfoo  bar  baz"sv;
+
+	auto words = nihil::next_word(s);
+	REQUIRE(words.first == "foo");
+	REQUIRE(words.second == "  bar  baz");
+
+	auto word = nihil::next_word(&s);
+	REQUIRE(word == "foo");
+	REQUIRE(s == "  bar  baz");
+}
+
+TEST_CASE("next_word: locale", "[next_word]")
+{
+	using namespace std::literals;
+	auto s = L"\u2003foo\u2003bar\u2003baz"sv;
+
+	auto words = nihil::next_word(s);
+	REQUIRE(words.first == s);
+
+	words = nihil::next_word(s, std::locale("C.UTF-8"));
+	REQUIRE(words.first == L"foo");
+	REQUIRE(words.second == L"\u2003bar\u2003baz");
+}
diff --git a/nihil.util/test_parse_size.cc b/nihil.util/test_parse_size.cc
new file mode 100644
index 0000000..4f4d018
--- /dev/null
+++ b/nihil.util/test_parse_size.cc
@@ -0,0 +1,168 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <cstdint>
+#include <system_error>
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.core;
+import nihil.util;
+
+TEST_CASE("parse_size: empty value", "[nihil]")
+{
+	using namespace nihil;
+
+	auto n = parse_size<std::uint64_t>("");
+	REQUIRE(!n);
+	REQUIRE(n.error() == nihil::errc::empty_string);
+}
+
+TEST_CASE("parse_size: basic", "[nihil]")
+{
+	using namespace nihil;
+
+	SECTION("bare number") {
+		auto n = parse_size<std::uint64_t>("1024").value();
+		REQUIRE(n == 1024);
+	}
+
+	SECTION("max value, unsigned") {
+		auto n = parse_size<std::uint16_t>("65535").value();
+		REQUIRE(n == 65535);
+	}
+
+	SECTION("max value, signed") {
+		auto n = parse_size<std::uint16_t>("32767").value();
+		REQUIRE(n == 32767);
+	}
+
+	SECTION("overflow by 1, unsigned") {
+		auto n = parse_size<std::uint16_t>("65536");
+		REQUIRE(!n);
+		REQUIRE(n.error() == std::errc::result_out_of_range);
+	}
+
+	SECTION("overflow by 1, signed") {
+		auto n = parse_size<std::int16_t>("32768");
+		REQUIRE(!n);
+		REQUIRE(n.error() == std::errc::result_out_of_range);
+	}
+
+	SECTION("overflow by many, unsigned") {
+		auto n = parse_size<std::uint16_t>("100000");
+		REQUIRE(!n);
+		REQUIRE(n.error() == std::errc::result_out_of_range);
+	}
+
+	SECTION("overflow by many, signed") {
+		auto n = parse_size<std::int16_t>("100000");
+		REQUIRE(!n);
+		REQUIRE(n.error() == std::errc::result_out_of_range);
+	}
+}
+
+TEST_CASE("parse_size: invalid multiplier", "[nihil]")
+{
+	using namespace nihil;
+
+	auto n = parse_size<std::uint64_t>("4z");
+	REQUIRE(!n);
+	REQUIRE(n.error() == nihil::errc::invalid_unit);
+
+	n = parse_size<std::uint64_t>("4kz");
+	REQUIRE(!n);
+	REQUIRE(n.error() == nihil::errc::invalid_unit);
+}
+
+TEST_CASE("parse_size: multipliers", "[nihil]")
+{
+	using namespace nihil;
+
+	auto sf = static_cast<std::uint64_t>(4);
+
+	SECTION("k") {
+		auto n = parse_size<std::uint64_t>("4k").value();
+		REQUIRE(n == sf * 1024);
+	}
+
+	SECTION("m") {
+		auto n = parse_size<std::uint64_t>("4m").value();
+		REQUIRE(n == sf * 1024 * 1024);
+	}
+
+	SECTION("g") {
+		auto n = parse_size<std::uint64_t>("4g").value();
+		REQUIRE(n == sf * 1024 * 1024 * 1024);
+	}
+
+	SECTION("t") {
+		auto n = parse_size<std::uint64_t>("4t").value();
+		REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024);
+	}
+
+	SECTION("p") {
+		auto n = parse_size<std::uint64_t>("4p").value();
+		REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024 * 1024);
+	}
+}
+
+TEST_CASE("parse_size: multiplier overflow", "[nihil]")
+{
+	using namespace nihil;
+
+	SECTION("signed") {
+		auto n = parse_size<std::uint16_t>("64k");
+		REQUIRE(!n);
+		REQUIRE(n.error() == std::errc::result_out_of_range);
+	}
+
+	SECTION("unsigned") {
+		auto n = parse_size<std::int16_t>("32k");
+		REQUIRE(!n);
+		REQUIRE(n.error() == std::errc::result_out_of_range);
+	}
+}
+
+TEST_CASE("parse_size: wide", "[nihil]")
+{
+	using namespace nihil;
+
+	SECTION("bare number") {
+		auto n = parse_size<std::uint64_t>(L"1024").value();
+		REQUIRE(n == 1024);
+	}
+}
+
+TEST_CASE("parse_size: wide multipliers", "[nihil]")
+{
+	using namespace nihil;
+
+	auto sf = static_cast<std::uint64_t>(4);
+
+	SECTION("k") {
+		auto n = parse_size<std::uint64_t>(L"4k").value();
+		REQUIRE(n == sf * 1024);
+	}
+
+	SECTION("m") {
+		auto n = parse_size<std::uint64_t>(L"4m").value();
+		REQUIRE(n == sf * 1024 * 1024);
+	}
+
+	SECTION("g") {
+		auto n = parse_size<std::uint64_t>(L"4g").value();
+		REQUIRE(n == sf * 1024 * 1024 * 1024);
+	}
+
+	SECTION("t") {
+		auto n = parse_size<std::uint64_t>(L"4t").value();
+		REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024);
+	}
+
+	SECTION("p") {
+		auto n = parse_size<std::uint64_t>(L"4p").value();
+		REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024 * 1024);
+	}
+}
diff --git a/nihil.util/test_skipws.cc b/nihil.util/test_skipws.cc
new file mode 100644
index 0000000..837c1f3
--- /dev/null
+++ b/nihil.util/test_skipws.cc
@@ -0,0 +1,45 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <locale>
+#include <string>
+using namespace std::literals;
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.util;
+
+TEST_CASE("skipws: basic", "[skipws]")
+{
+	REQUIRE(nihil::skipws("foo"sv) == "foo");
+	REQUIRE(nihil::skipws("   foo"sv) == "foo");
+	REQUIRE(nihil::skipws("foo "sv) == "foo ");
+	REQUIRE(nihil::skipws("foo bar"sv) == "foo bar");
+}
+
+TEST_CASE("skipws: pointer", "[skipws]")
+{
+	auto s = "foo"sv;
+	nihil::skipws(&s);
+	REQUIRE(s == "foo");
+
+	s = "   foo"sv;
+	nihil::skipws(&s);
+	REQUIRE(s == "foo");
+
+	s = "foo "sv;
+	nihil::skipws(&s);
+	REQUIRE(s == "foo ");
+
+	s = "foo bar"sv;
+	nihil::skipws(&s);
+	REQUIRE(s == "foo bar");
+}
+
+TEST_CASE("skipws: locale", "[skipws]")
+{
+	// Assume the default locale is C.
+	REQUIRE(nihil::skipws(L"\u2003foo"sv) == L"\u2003foo");
+	REQUIRE(nihil::skipws(L"\u2003foo"sv, std::locale("C.UTF-8")) == L"foo");
+}
diff --git a/nihil.util/test_tabulate.cc b/nihil.util/test_tabulate.cc
new file mode 100644
index 0000000..8dee796
--- /dev/null
+++ b/nihil.util/test_tabulate.cc
@@ -0,0 +1,75 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <iterator>
+#include <string>
+#include <vector>
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.util;
+
+using namespace std::literals;
+using namespace nihil;
+
+TEST_CASE("tabulate: basic", "[tabulate]")
+{
+	auto input = std::vector{
+		std::vector{"a",   "foo", "b"},
+		std::vector{"bar", "c",   "baz"},
+	};
+
+	auto result = std::string();
+	tabulate("{:1} {:2} {:3}", input, std::back_inserter(result));
+	REQUIRE(result ==
+"1   2   3\n"
+"a   foo b\n"
+"bar c   baz\n");
+}
+
+TEST_CASE("tabulate: basic wide", "[tabulate]")
+{
+	auto input = std::vector{
+		std::vector{L"a",   L"foo", L"b"},
+		std::vector{L"bar", L"c",   L"baz"},
+	};
+
+	auto result = std::wstring();
+	wtabulate(L"{:1} {:2} {:3}", input, std::back_inserter(result));
+
+	REQUIRE(result ==
+L"1   2   3\n"
+"a   foo b\n"
+"bar c   baz\n");
+}
+
+TEST_CASE("tabulate: jagged", "[tabulate]")
+{
+	auto input = std::vector{
+		std::vector{"a",   "foo", "b"},
+		std::vector{"bar", "baz"},
+	};
+
+	auto result = std::string();
+	tabulate("{:1} {:2} {:3}", input, std::back_inserter(result));
+	REQUIRE(result ==
+"1   2   3\n"
+"a   foo b\n"
+"bar baz\n");
+}
+
+TEST_CASE("tabulate: align", "[tabulate]")
+{
+	auto input = std::vector{
+		std::vector{"a", "longvalue", "s"},
+		std::vector{"a", "s",         "longvalue"},
+	};
+
+	auto result = std::string();
+	tabulate("{:1} {<:2} {>:3}", input, std::back_inserter(result));
+	REQUIRE(result ==
+"1 2                 3\n"
+"a longvalue         s\n"
+"a s         longvalue\n");
+}
author	Lexi Winter <lexi@le-fay.org>	2025-06-28 19:25:55 +0100
committer	Lexi Winter <lexi@le-fay.org>	2025-06-28 19:25:55 +0100
commit	a2d7181700ac64b8e7a4472ec26dfa253b38f188 (patch)
tree	23c5a9c8ec4089ac346e2e0f9391909c3089b66b /nihil.util
parent	f226d46ee02b57dd76a4793593aa8d66e1c58353 (diff)
download	nihil-a2d7181700ac64b8e7a4472ec26dfa253b38f188.tar.gz nihil-a2d7181700ac64b8e7a4472ec26dfa253b38f188.tar.bz2