aboutsummaryrefslogtreecommitdiffstats
path: root/nihil.util
diff options
context:
space:
mode:
authorLexi Winter <lexi@le-fay.org>2025-06-28 19:25:55 +0100
committerLexi Winter <lexi@le-fay.org>2025-06-28 19:25:55 +0100
commita2d7181700ac64b8e7a4472ec26dfa253b38f188 (patch)
tree23c5a9c8ec4089ac346e2e0f9391909c3089b66b /nihil.util
parentf226d46ee02b57dd76a4793593aa8d66e1c58353 (diff)
downloadnihil-a2d7181700ac64b8e7a4472ec26dfa253b38f188.tar.gz
nihil-a2d7181700ac64b8e7a4472ec26dfa253b38f188.tar.bz2
split nihil into separate modules
Diffstat (limited to 'nihil.util')
-rw-r--r--nihil.util/CMakeLists.txt36
-rw-r--r--nihil.util/ctype.ccm87
-rw-r--r--nihil.util/next_word.ccm49
-rw-r--r--nihil.util/nihil.util.ccm13
-rw-r--r--nihil.util/parse_size.ccm107
-rw-r--r--nihil.util/skipws.ccm40
-rw-r--r--nihil.util/tabulate.ccm312
-rw-r--r--nihil.util/test_ctype.cc373
-rw-r--r--nihil.util/test_next_word.cc65
-rw-r--r--nihil.util/test_parse_size.cc168
-rw-r--r--nihil.util/test_skipws.cc45
-rw-r--r--nihil.util/test_tabulate.cc75
12 files changed, 1370 insertions, 0 deletions
diff --git a/nihil.util/CMakeLists.txt b/nihil.util/CMakeLists.txt
new file mode 100644
index 0000000..b809a68
--- /dev/null
+++ b/nihil.util/CMakeLists.txt
@@ -0,0 +1,36 @@
+# This source code is released into the public domain.
+
+add_library(nihil.util STATIC)
+target_link_libraries(nihil.util PRIVATE nihil.core nihil.error nihil.monad)
+target_sources(nihil.util
+ PUBLIC FILE_SET modules TYPE CXX_MODULES FILES
+ nihil.util.ccm
+
+ ctype.ccm
+ parse_size.ccm
+ next_word.ccm
+ skipws.ccm
+ tabulate.ccm
+)
+
+if(NIHIL_TESTS)
+ enable_testing()
+
+ add_executable(nihil.util.test
+ test_ctype.cc
+ test_parse_size.cc
+ test_next_word.cc
+ test_skipws.cc
+ test_tabulate.cc
+ )
+ target_link_libraries(nihil.util.test PRIVATE
+ nihil.util
+ Catch2::Catch2WithMain
+ )
+
+ find_package(Catch2 REQUIRED)
+
+ include(CTest)
+ include(Catch)
+ catch_discover_tests(nihil.util.test)
+endif()
diff --git a/nihil.util/ctype.ccm b/nihil.util/ctype.ccm
new file mode 100644
index 0000000..6d30c4f
--- /dev/null
+++ b/nihil.util/ctype.ccm
@@ -0,0 +1,87 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <concepts>
+#include <locale>
+
+export module nihil.util:ctype;
+
+namespace nihil {
+
+/*
+ * ctype_is: wrap std::ctype<T>::is() in a form suitable for use as an algorithm
+ * predicate, i.e., ctype_is(m) will return a functor object that takes any char
+ * type as an argument and returns bool.
+ *
+ * If the locale is not specified, the current global locale is used by default.
+ *
+ * ctype_is copies the locale, so passing a temporary is fine.
+ */
+
+export struct ctype_is final {
+ ctype_is(std::ctype_base::mask mask_,
+ std::locale const &locale_ = std::locale())
+ : m_mask(mask_)
+ , m_locale(locale_)
+ {}
+
+ [[nodiscard]] auto operator()(this ctype_is const &self,
+ std::integral auto c)
+ {
+ using ctype = std::ctype<decltype(c)>;
+ auto &facet = std::use_facet<ctype>(self.m_locale);
+ return facet.is(self.m_mask, c);
+ }
+
+private:
+ std::ctype_base::mask m_mask;
+ std::locale m_locale;
+};
+
+// Predefined tests for the current global locale.
+
+export inline auto is_space = ctype_is(std::ctype_base::space);
+export inline auto is_print = ctype_is(std::ctype_base::print);
+export inline auto is_cntrl = ctype_is(std::ctype_base::cntrl);
+export inline auto is_upper = ctype_is(std::ctype_base::upper);
+export inline auto is_lower = ctype_is(std::ctype_base::lower);
+export inline auto is_alpha = ctype_is(std::ctype_base::alpha);
+export inline auto is_digit = ctype_is(std::ctype_base::digit);
+export inline auto is_punct = ctype_is(std::ctype_base::punct);
+export inline auto is_xdigit = ctype_is(std::ctype_base::xdigit);
+export inline auto is_blank = ctype_is(std::ctype_base::blank);
+export inline auto is_alnum = ctype_is(std::ctype_base::alnum);
+export inline auto is_graph = ctype_is(std::ctype_base::graph);
+
+// Predefined tests for the C locale. The C locale is guaranteed to always be
+// available, so this doesn't create lifetime issues.
+
+export inline auto is_c_space =
+ ctype_is(std::ctype_base::space, std::locale::classic());
+export inline auto is_c_print =
+ ctype_is(std::ctype_base::print, std::locale::classic());
+export inline auto is_c_cntrl =
+ ctype_is(std::ctype_base::cntrl, std::locale::classic());
+export inline auto is_c_upper =
+ ctype_is(std::ctype_base::upper, std::locale::classic());
+export inline auto is_c_lower =
+ ctype_is(std::ctype_base::lower, std::locale::classic());
+export inline auto is_c_alpha =
+ ctype_is(std::ctype_base::alpha, std::locale::classic());
+export inline auto is_c_digit =
+ ctype_is(std::ctype_base::digit, std::locale::classic());
+export inline auto is_c_punct =
+ ctype_is(std::ctype_base::punct, std::locale::classic());
+export inline auto is_c_xdigit =
+ ctype_is(std::ctype_base::xdigit, std::locale::classic());
+export inline auto is_c_blank =
+ ctype_is(std::ctype_base::blank, std::locale::classic());
+export inline auto is_c_alnum =
+ ctype_is(std::ctype_base::alnum, std::locale::classic());
+export inline auto is_c_graph =
+ ctype_is(std::ctype_base::graph, std::locale::classic());
+
+} // namespace nihil
diff --git a/nihil.util/next_word.ccm b/nihil.util/next_word.ccm
new file mode 100644
index 0000000..c5d3ad7
--- /dev/null
+++ b/nihil.util/next_word.ccm
@@ -0,0 +1,49 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <algorithm>
+#include <locale>
+#include <ranges>
+#include <string>
+#include <utility>
+
+export module nihil.util:next_word;
+
+import :skipws;
+
+namespace nihil {
+
+/*
+ * Return the next word from a string_view. Skips leading whitespace, so
+ * calling this repeatedly will return each word from the string.
+ */
+
+export template<typename Char> [[nodiscard]]
+auto next_word(std::basic_string_view<Char> text,
+ std::locale const &locale = std::locale())
+ -> std::pair<std::basic_string_view<Char>,
+ std::basic_string_view<Char>>
+{
+ text = skipws(text, locale);
+
+ auto is_space = ctype_is(std::ctype_base::space, locale);
+ auto split_pos = std::ranges::find_if(text, is_space);
+
+ return {{std::ranges::begin(text), split_pos},
+ {split_pos, std::ranges::end(text)}};
+}
+
+export template<typename Char>
+auto next_word(std::basic_string_view<Char> *text,
+ std::locale const &locale = std::locale())
+ -> std::basic_string_view<Char>
+{
+ auto [word, rest] = next_word(*text, locale);
+ *text = rest;
+ return word;
+}
+
+} // namespace nihil
diff --git a/nihil.util/nihil.util.ccm b/nihil.util/nihil.util.ccm
new file mode 100644
index 0000000..afd513a
--- /dev/null
+++ b/nihil.util/nihil.util.ccm
@@ -0,0 +1,13 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+export module nihil.util;
+
+export import :ctype;
+export import :parse_size;
+export import :next_word;
+export import :skipws;
+export import :tabulate;
diff --git a/nihil.util/parse_size.ccm b/nihil.util/parse_size.ccm
new file mode 100644
index 0000000..c692578
--- /dev/null
+++ b/nihil.util/parse_size.ccm
@@ -0,0 +1,107 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <algorithm>
+#include <coroutine>
+#include <cstdint>
+#include <expected>
+#include <ranges>
+#include <string>
+#include <system_error>
+#include <utility>
+
+export module nihil.util:parse_size;
+
+import nihil.core;
+import nihil.error;
+import nihil.monad;
+
+import :ctype;
+
+namespace nihil {
+
+template<typename Char>
+auto get_multiplier(Char c) -> std::expected<std::uint64_t, error>
+{
+ auto ret = std::uint64_t{1};
+
+ switch (c) {
+ case 'p': case 'P': ret *= 1024;
+ case 't': case 'T': ret *= 1024;
+ case 'g': case 'G': ret *= 1024;
+ case 'm': case 'M': ret *= 1024;
+ case 'k': case 'K': ret *= 1024;
+ return ret;
+
+ default:
+ return std::unexpected(error(errc::invalid_unit));
+ }
+}
+
+/*
+ * Parse a string containing a human-formatted size, such as "1024"
+ * or "4g". Parsing is always done in the "C" locale and does not
+ * recognise thousands separators or negative numbers.
+ */
+export template<typename T, typename Char> [[nodiscard]]
+auto parse_size(std::basic_string_view<Char> str)
+ -> std::expected<T, error>
+{
+ // Extract the numeric part of the string.
+ auto it = std::ranges::find_if_not(str, is_c_digit);
+ auto num_str = std::basic_string_view<Char>(
+ std::ranges::begin(str), it);
+
+ if (num_str.empty())
+ co_return std::unexpected(error(errc::empty_string));
+
+ auto ret = T{0};
+
+ for (auto c : num_str) {
+ if (ret > (std::numeric_limits<T>::max() / 10))
+ co_return std::unexpected(error(
+ std::errc::result_out_of_range));
+ ret *= 10;
+
+ auto digit = static_cast<T>(c - '0');
+ if ((std::numeric_limits<T>::max() - digit) < ret)
+ co_return std::unexpected(error(
+ std::errc::result_out_of_range));
+ ret += digit;
+ }
+
+ if (it == str.end())
+ // No multiplier.
+ co_return ret;
+
+ auto mchar = *it++;
+
+ if (it != str.end())
+ // Multiplier is more than one character.
+ co_return std::unexpected(error(errc::invalid_unit));
+
+ auto mult = co_await get_multiplier(mchar);
+
+ if (std::cmp_greater(ret, std::numeric_limits<T>::max() / mult))
+ co_return std::unexpected(error(
+ std::errc::result_out_of_range));
+
+ co_return ret * mult;
+}
+
+export template<typename T>
+[[nodiscard]] inline auto parse_size(char const *s)
+{
+ return parse_size<T>(std::string_view(s));
+}
+
+export template<typename T>
+[[nodiscard]] inline auto parse_size(wchar_t const *s)
+{
+ return parse_size<T>(std::wstring_view(s));
+}
+
+}
diff --git a/nihil.util/skipws.ccm b/nihil.util/skipws.ccm
new file mode 100644
index 0000000..4813ae8
--- /dev/null
+++ b/nihil.util/skipws.ccm
@@ -0,0 +1,40 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <algorithm>
+#include <locale>
+#include <ranges>
+#include <string>
+
+export module nihil.util:skipws;
+
+import :ctype;
+
+namespace nihil {
+
+/*
+ * Remove leading whitespace from a string.
+ */
+
+export template<typename Char> [[nodiscard]]
+auto skipws(std::basic_string_view<Char> text,
+ std::locale const &locale = std::locale())
+ -> std::basic_string_view<Char>
+{
+ auto is_space = ctype_is(std::ctype_base::space, locale);
+ auto nonws = std::ranges::find_if_not(text, is_space);
+ return {nonws, std::ranges::end(text)};
+}
+
+export template<typename Char>
+auto skipws(std::basic_string_view<Char> *text,
+ std::locale const &locale = std::locale())
+ -> void
+{
+ *text = skipws(*text, locale);
+}
+
+} // namespace nihil
diff --git a/nihil.util/tabulate.ccm b/nihil.util/tabulate.ccm
new file mode 100644
index 0000000..5998b24
--- /dev/null
+++ b/nihil.util/tabulate.ccm
@@ -0,0 +1,312 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+module;
+
+#include <algorithm>
+#include <cstdlib>
+#include <format>
+#include <ranges>
+#include <iterator>
+#include <vector>
+
+export module nihil.util:tabulate;
+
+import nihil.error;
+import :ctype;
+
+namespace nihil {
+
+/*
+ * tabulate: format the given range in an ASCII table and write the output
+ * to the given output iterator. The range's values will be converted to
+ * strings as if by std::format.
+ *
+ * tabulate is implemented by copying the range; this allows it to work on
+ * input/forward ranges at the cost of slightly increased memory use.
+ *
+ * The table spec is a string consisting of zero or more field formats,
+ * formatted as {flags:fieldname}; both flags and fieldname are optional.
+ * If there are fewer field formats than fields, the remaining fields
+ * are formatted as if by {:}.
+ *
+ * The following flags are supported:
+ *
+ * < left-align this column (default)
+ * > right-align this column
+ */
+
+// Exception thrown when a table spec is invalid.
+export struct table_spec_error : error {
+ table_spec_error(std::string_view what)
+ : error(what)
+ {
+ }
+};
+
+/*
+ * The specification for a single field.
+ */
+template<typename Char>
+struct field_spec {
+ enum align_t { left, right };
+
+ // Get the name of this field.
+ auto name(this field_spec const &self)
+ -> std::basic_string_view<Char>
+ {
+ return self.m_name;
+ }
+
+ // Set the name of this field.
+ auto name(this field_spec &self,
+ std::basic_string_view<Char> new_name)
+ -> void
+ {
+ self.m_name = new_name;
+ }
+
+ // Set this field's alignment.
+ auto align(this field_spec &self, align_t new_align) -> void
+ {
+ self.m_align = new_align;
+ }
+
+ // Ensure the length of this field is at least the given width.
+ auto ensure_width(this field_spec &self, std::size_t newwidth)
+ -> void
+ {
+ self.m_width = std::max(self.m_width, newwidth);
+ }
+
+ // Format an object to a string based on our field spec.
+ [[nodiscard]] auto format(this field_spec const &, auto &&obj)
+ -> std::basic_string<Char>
+ {
+ auto format_string = std::basic_string<Char>{'{', '}'};
+ return std::format(std::runtime_format(format_string), obj);
+ }
+
+ // Print a column value to an output iterator according to our field
+ // spec. If is_last is true, this is the last field on the line, so
+ // we won't output any trailling padding.
+ auto print(this field_spec const &self,
+ std::basic_string_view<Char> value,
+ std::output_iterator<Char> auto &out,
+ bool is_last)
+ -> void
+ {
+ auto padding = self.m_width - value.size();
+
+ if (self.m_align == right)
+ for (std::size_t i = 0; i < padding; ++i)
+ *out++ = ' ';
+
+ std::ranges::copy(value, out);
+
+ if (!is_last && self.m_align == left)
+ for (std::size_t i = 0; i < padding; ++i)
+ *out++ = ' ';
+ }
+
+private:
+ std::basic_string_view<Char> m_name;
+ std::size_t m_width = 0;
+ align_t m_align = left;
+};
+
+/*
+ * The specification for an entire table.
+ */
+template<typename Char>
+struct table_spec {
+ // Add a new field spec to this table.
+ auto add(this table_spec &self, field_spec<Char> field) -> void
+ {
+ self.m_fields.emplace_back(std::move(field));
+ }
+
+ // Return the field spec for a given field. If the field doesn't
+ // exist, this field and any intermediate fields will be created.
+ [[nodiscard]] auto field(this table_spec &self, std::size_t fieldnr)
+ -> field_spec<Char> &
+ {
+ if (fieldnr >= self.m_fields.size())
+ self.m_fields.resize(fieldnr + 1);
+ return self.m_fields.at(fieldnr);
+ }
+
+ // The number of columns in this table.
+ [[nodiscard]] auto columns(this table_spec const &self) -> std::size_t
+ {
+ return self.m_fields.size();
+ }
+
+ // Return all the fields in this table.
+ [[nodiscard]] auto fields(this table_spec const &self)
+ -> std::vector<field_spec<Char>> const &
+ {
+ return self.m_fields;
+ }
+
+private:
+ std::vector<field_spec<Char>> m_fields;
+};
+
+// Parse the field flags, e.g. '<'.
+template<typename Char,
+ std::input_iterator Iterator, std::sentinel_for<Iterator> Sentinel>
+auto parse_field_flags(field_spec<Char> &field, Iterator &pos, Sentinel end)
+ -> void
+{
+ while (pos < end) {
+ switch (*pos) {
+ case '<':
+ field.align(field_spec<Char>::left);
+ break;
+ case '>':
+ field.align(field_spec<Char>::right);
+ break;
+ case ':':
+ ++pos;
+ /*FALLTHROUGH*/
+ case '}':
+ return;
+ default:
+ throw table_spec_error("Invalid table spec: "
+ "unknown flag character");
+ }
+
+ if (++pos == end)
+ throw table_spec_error("Invalid table spec: "
+ "unterminated field");
+ }
+}
+
+// Parse a complete field spec, e.g. "{<:NAME}".
+template<typename Char,
+ std::input_iterator Iterator, std::sentinel_for<Iterator> Sentinel>
+[[nodiscard]] auto parse_field(Iterator &pos, Sentinel end)
+ -> field_spec<Char>
+{
+ auto field = field_spec<Char>{};
+
+ if (pos == end)
+ throw table_spec_error("Invalid table spec: empty field");
+
+ // The field spec should start with a '{'.
+ if (*pos != '{')
+ throw table_spec_error("Invalid table spec: expected '{'");
+
+ if (++pos == end)
+ throw table_spec_error("Invalid table spec: unterminated field");
+
+ // This consumes 'pos' up to and including the ':'.
+ parse_field_flags(field, pos, end);
+
+ auto brace = std::ranges::find(pos, end, '}');
+ if (brace == end)
+ throw table_spec_error("Invalid table spec: expected '}'");
+
+ field.name(std::basic_string_view<Char>(pos, brace));
+ pos = std::next(brace);
+
+ // The field must be at least as wide as its header.
+ field.ensure_width(field.name().size());
+
+ return field;
+}
+
+template<typename Char>
+[[nodiscard]] auto parse_table_spec(std::basic_string_view<Char> spec)
+ -> table_spec<Char>
+{
+ auto table = table_spec<Char>();
+
+ auto pos = std::ranges::begin(spec);
+ auto end = std::ranges::end(spec);
+
+ for (;;) {
+ // Skip leading whitespace
+ while (pos < end && is_c_space(*pos))
+ ++pos;
+
+ if (pos == end)
+ break;
+
+ table.add(parse_field<Char>(pos, end));
+ }
+
+ return table;
+}
+
+export template<typename Char,
+ std::ranges::range Range,
+ std::output_iterator<Char> Iterator>
+auto basic_tabulate(std::basic_string_view<Char> table_spec,
+ Range &&range,
+ Iterator &&out)
+ -> void
+{
+ // Parse the table spec.
+ auto table = parse_table_spec(table_spec);
+
+ // Create our copy of the input data.
+ auto data = std::vector<std::vector<std::basic_string<Char>>>();
+ // Reserve the first row for the header.
+ data.resize(1);
+
+ // Find the required length of each field.
+ for (auto &&row : range) {
+ // LLVM doesn't have std::enumerate_view yet
+ auto i = std::size_t{0};
+ auto &this_row = data.emplace_back();
+
+ for (auto &&column : row) {
+ auto &field = table.field(i);
+ auto &str = this_row.emplace_back(field.format(column));
+ field.ensure_width(str.size());
+ ++i;
+ }
+ }
+
+ // Add the header row.
+ for (auto &&field : table.fields())
+ data.at(0).emplace_back(std::from_range, field.name());
+
+ // Print the values.
+ for (auto &&row : data) {
+ for (std::size_t i = 0; i < row.size(); ++i) {
+ auto &field = table.field(i);
+ bool is_last = (i == row.size() - 1);
+
+ field.print(row[i], out, is_last);
+
+ if (!is_last)
+ *out++ = ' ';
+ }
+
+ *out++ = '\n';
+ }
+}
+
+export auto tabulate(std::string_view table_spec,
+ std::ranges::range auto &&range,
+ std::output_iterator<char> auto &&out)
+{
+ return basic_tabulate<char>(table_spec,
+ std::forward<decltype(range)>(range),
+ std::forward<decltype(out)>(out));
+}
+
+export auto wtabulate(std::wstring_view table_spec,
+ std::ranges::range auto &&range,
+ std::output_iterator<wchar_t> auto &&out)
+{
+ return basic_tabulate<wchar_t>(table_spec,
+ std::forward<decltype(range)>(range),
+ std::forward<decltype(out)>(out));
+}
+
+} // namespace nihil
diff --git a/nihil.util/test_ctype.cc b/nihil.util/test_ctype.cc
new file mode 100644
index 0000000..62721d1
--- /dev/null
+++ b/nihil.util/test_ctype.cc
@@ -0,0 +1,373 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.util;
+
+TEST_CASE("ctype: space", "[ctype]") {
+ auto is_utf8_space =
+ nihil::ctype_is(std::ctype_base::space,
+ std::locale("C.UTF-8"));
+
+ // '\v' (vertical tab) is a space
+ REQUIRE(nihil::is_space('\v') == true);
+ REQUIRE(nihil::is_space(L'\v') == true);
+
+ REQUIRE(nihil::is_c_space('\v') == true);
+ REQUIRE(nihil::is_c_space(L'\v') == true);
+
+ REQUIRE(is_utf8_space('\v') == true);
+ REQUIRE(is_utf8_space(L'\v') == true);
+
+ // 'x' is not a space
+ REQUIRE(nihil::is_space('x') == false);
+ REQUIRE(nihil::is_space(L'x') == false);
+
+ REQUIRE(nihil::is_c_space('x') == false);
+ REQUIRE(nihil::is_c_space(L'x') == false);
+
+ REQUIRE(is_utf8_space('x') == false);
+ REQUIRE(is_utf8_space(L'x') == false);
+
+ // U+2003 EM SPACE is a space
+ REQUIRE(nihil::is_space(L'\u2003') == false);
+ REQUIRE(nihil::is_c_space(L'\u2003') == false);
+ REQUIRE(is_utf8_space(L'\u2003') == true);
+}
+
+TEST_CASE("ctype: print", "[ctype]") {
+ auto is_utf8_print =
+ nihil::ctype_is(std::ctype_base::print,
+ std::locale("C.UTF-8"));
+
+ // 'x' is printable
+ REQUIRE(nihil::is_print('x') == true);
+ REQUIRE(nihil::is_print(L'x') == true);
+
+ REQUIRE(nihil::is_c_print('x') == true);
+ REQUIRE(nihil::is_c_print(L'x') == true);
+
+ REQUIRE(is_utf8_print('x') == true);
+ REQUIRE(is_utf8_print(L'x') == true);
+
+ // '\003' is not printable
+ REQUIRE(nihil::is_print('\003') == false);
+ REQUIRE(nihil::is_print(L'\003') == false);
+
+ REQUIRE(nihil::is_c_print('\003') == false);
+ REQUIRE(nihil::is_c_print(L'\003') == false);
+
+ REQUIRE(is_utf8_print('\003') == false);
+ REQUIRE(is_utf8_print(L'\003') == false);
+
+ // U+0410 CYRILLIC CAPITAL LETTER A is printable
+ REQUIRE(nihil::is_print(L'\u0410') == false);
+ REQUIRE(nihil::is_c_print(L'\u0410') == false);
+ REQUIRE(is_utf8_print(L'\u0410') == true);
+}
+
+TEST_CASE("ctype: cntrl", "[ctype]") {
+ auto is_utf8_cntrl =
+ nihil::ctype_is(std::ctype_base::cntrl,
+ std::locale("C.UTF-8"));
+
+ // '\003' is a control character
+ REQUIRE(nihil::is_cntrl('\003') == true);
+ REQUIRE(nihil::is_cntrl(L'\003') == true);
+
+ REQUIRE(nihil::is_c_cntrl('\003') == true);
+ REQUIRE(nihil::is_c_cntrl(L'\003') == true);
+
+ REQUIRE(is_utf8_cntrl('\003') == true);
+ REQUIRE(is_utf8_cntrl(L'\003') == true);
+
+
+ // 'x' is not a control character
+ REQUIRE(nihil::is_cntrl('x') == false);
+ REQUIRE(nihil::is_cntrl(L'x') == false);
+
+ REQUIRE(nihil::is_c_cntrl('x') == false);
+ REQUIRE(nihil::is_c_cntrl(L'x') == false);
+
+ REQUIRE(is_utf8_cntrl('x') == false);
+ REQUIRE(is_utf8_cntrl(L'x') == false);
+
+ // U+00AD SOFT HYPHEN is a control character.
+ REQUIRE(nihil::is_cntrl(L'\u00ad') == false);
+ REQUIRE(nihil::is_c_cntrl(L'\u00ad') == false);
+ REQUIRE(is_utf8_cntrl(L'\u00ad') == true);
+}
+
+TEST_CASE("ctype: upper", "[ctype]") {
+ auto is_utf8_upper =
+ nihil::ctype_is(std::ctype_base::upper,
+ std::locale("C.UTF-8"));
+
+ // 'A' is upper case
+ REQUIRE(nihil::is_upper('A') == true);
+ REQUIRE(nihil::is_upper(L'A') == true);
+
+ REQUIRE(nihil::is_c_upper('A') == true);
+ REQUIRE(nihil::is_c_upper(L'A') == true);
+
+ REQUIRE(is_utf8_upper('A') == true);
+ REQUIRE(is_utf8_upper(L'A') == true);
+
+ // 'a' is not upper case
+ REQUIRE(nihil::is_upper('a') == false);
+ REQUIRE(nihil::is_upper(L'a') == false);
+
+ REQUIRE(nihil::is_c_upper('a') == false);
+ REQUIRE(nihil::is_c_upper(L'a') == false);
+
+ REQUIRE(is_utf8_upper('a') == false);
+ REQUIRE(is_utf8_upper(L'a') == false);
+
+ // U+0410 CYRILLIC CAPITAL LETTER A is upper case
+ REQUIRE(nihil::is_upper(L'\u0410') == false);
+ REQUIRE(nihil::is_c_upper(L'\u0410') == false);
+ REQUIRE(is_utf8_upper(L'\u0410') == true);
+}
+
+TEST_CASE("ctype: lower", "[ctype]") {
+ auto is_utf8_lower =
+ nihil::ctype_is(std::ctype_base::lower,
+ std::locale("C.UTF-8"));
+
+ // 'a' is lower case
+ REQUIRE(nihil::is_lower('a') == true);
+ REQUIRE(nihil::is_lower(L'a') == true);
+
+ REQUIRE(nihil::is_c_lower('a') == true);
+ REQUIRE(nihil::is_c_lower(L'a') == true);
+
+ REQUIRE(is_utf8_lower('a') == true);
+ REQUIRE(is_utf8_lower(L'a') == true);
+
+ // 'A' is not lower case
+ REQUIRE(nihil::is_lower('A') == false);
+ REQUIRE(nihil::is_lower(L'A') == false);
+
+ REQUIRE(nihil::is_c_lower('A') == false);
+ REQUIRE(nihil::is_c_lower(L'A') == false);
+
+ REQUIRE(is_utf8_lower('A') == false);
+ REQUIRE(is_utf8_lower(L'A') == false);
+
+ // U+0430 CYRILLIC SMALL LETTER A
+ REQUIRE(nihil::is_lower(L'\u0430') == false);
+ REQUIRE(nihil::is_c_lower(L'\u0430') == false);
+ REQUIRE(is_utf8_lower(L'\u0430') == true);
+}
+
+TEST_CASE("ctype: alpha", "[ctype]") {
+ auto is_utf8_alpha =
+ nihil::ctype_is(std::ctype_base::alpha,
+ std::locale("C.UTF-8"));
+
+ // 'a' is alphabetical
+ REQUIRE(nihil::is_alpha('a') == true);
+ REQUIRE(nihil::is_alpha(L'a') == true);
+
+ REQUIRE(nihil::is_c_alpha('a') == true);
+ REQUIRE(nihil::is_c_alpha(L'a') == true);
+
+ REQUIRE(is_utf8_alpha('a') == true);
+ REQUIRE(is_utf8_alpha(L'a') == true);
+
+ // '1' is not alphabetical
+ REQUIRE(nihil::is_alpha('1') == false);
+ REQUIRE(nihil::is_alpha(L'1') == false);
+
+ REQUIRE(nihil::is_c_alpha('1') == false);
+ REQUIRE(nihil::is_c_alpha(L'1') == false);
+
+ REQUIRE(is_utf8_alpha('1') == false);
+ REQUIRE(is_utf8_alpha(L'1') == false);
+
+ // U+0430 CYRILLIC SMALL LETTER A
+ REQUIRE(nihil::is_alpha(L'\u0430') == false);
+ REQUIRE(nihil::is_c_alpha(L'\u0430') == false);
+ REQUIRE(is_utf8_alpha(L'\u0430') == true);
+}
+
+TEST_CASE("ctype: digit", "[ctype]") {
+ auto is_utf8_digit =
+ nihil::ctype_is(std::ctype_base::digit,
+ std::locale("C.UTF-8"));
+
+ // '1' is a digit
+ REQUIRE(nihil::is_digit('1') == true);
+ REQUIRE(nihil::is_digit(L'1') == true);
+
+ REQUIRE(nihil::is_c_digit('1') == true);
+ REQUIRE(nihil::is_c_digit(L'1') == true);
+
+ REQUIRE(is_utf8_digit('1') == true);
+ REQUIRE(is_utf8_digit(L'1') == true);
+
+ // 'a' is not a digit
+ REQUIRE(nihil::is_digit('a') == false);
+ REQUIRE(nihil::is_digit(L'a') == false);
+
+ REQUIRE(nihil::is_c_digit('a') == false);
+ REQUIRE(nihil::is_c_digit(L'a') == false);
+
+ REQUIRE(is_utf8_digit('a') == false);
+ REQUIRE(is_utf8_digit(L'a') == false);
+
+ // U+0660 ARABIC-INDIC DIGIT ZERO
+ REQUIRE(nihil::is_digit(L'\u0660') == false);
+ REQUIRE(nihil::is_c_digit(L'\u0660') == false);
+ REQUIRE(is_utf8_digit(L'\u0660') == true);
+}
+
+TEST_CASE("ctype: punct", "[ctype]") {
+ auto is_utf8_punct =
+ nihil::ctype_is(std::ctype_base::punct,
+ std::locale("C.UTF-8"));
+
+ // ';' is punctuation
+ REQUIRE(nihil::is_punct(';') == true);
+ REQUIRE(nihil::is_punct(L';') == true);
+
+ REQUIRE(nihil::is_c_punct(';') == true);
+ REQUIRE(nihil::is_c_punct(L';') == true);
+
+ REQUIRE(is_utf8_punct(';') == true);
+ REQUIRE(is_utf8_punct(L';') == true);
+
+ // 'a' is not punctuation
+ REQUIRE(nihil::is_punct('a') == false);
+ REQUIRE(nihil::is_punct(L'a') == false);
+
+ REQUIRE(nihil::is_c_punct('a') == false);
+ REQUIRE(nihil::is_c_punct(L'a') == false);
+
+ REQUIRE(is_utf8_punct('a') == false);
+ REQUIRE(is_utf8_punct(L'a') == false);
+
+ // U+00A1 INVERTED EXCLAMATION MARK
+ REQUIRE(nihil::is_punct(L'\u00A1') == false);
+ REQUIRE(nihil::is_c_punct(L'\u00A1') == false);
+ REQUIRE(is_utf8_punct(L'\u00A1') == true);
+}
+
+TEST_CASE("ctype: xdigit", "[ctype]") {
+ auto is_utf8_xdigit =
+ nihil::ctype_is(std::ctype_base::xdigit,
+ std::locale("C.UTF-8"));
+
+ // 'f' is an xdigit
+ REQUIRE(nihil::is_xdigit('f') == true);
+ REQUIRE(nihil::is_xdigit(L'f') == true);
+
+ REQUIRE(nihil::is_c_xdigit('f') == true);
+ REQUIRE(nihil::is_c_xdigit(L'f') == true);
+
+ REQUIRE(is_utf8_xdigit('f') == true);
+ REQUIRE(is_utf8_xdigit(L'f') == true);
+
+ // 'g' is not an xdigit
+ REQUIRE(nihil::is_xdigit('g') == false);
+ REQUIRE(nihil::is_xdigit(L'g') == false);
+
+ REQUIRE(nihil::is_c_xdigit('g') == false);
+ REQUIRE(nihil::is_c_xdigit(L'g') == false);
+
+ REQUIRE(is_utf8_xdigit('g') == false);
+ REQUIRE(is_utf8_xdigit(L'g') == false);
+}
+
+TEST_CASE("ctype: blank", "[ctype]") {
+ auto is_utf8_blank =
+ nihil::ctype_is(std::ctype_base::blank,
+ std::locale("C.UTF-8"));
+
+ // '\t' is a blank
+ REQUIRE(nihil::is_blank('\t') == true);
+ REQUIRE(nihil::is_blank(L'\t') == true);
+
+ REQUIRE(nihil::is_c_blank('\t') == true);
+ REQUIRE(nihil::is_c_blank(L'\t') == true);
+
+ REQUIRE(is_utf8_blank('\t') == true);
+ REQUIRE(is_utf8_blank(L'\t') == true);
+
+ // '\v' is not a blank
+ REQUIRE(nihil::is_blank('\v') == false);
+ REQUIRE(nihil::is_blank(L'\v') == false);
+
+ REQUIRE(nihil::is_c_blank('\v') == false);
+ REQUIRE(nihil::is_c_blank(L'\v') == false);
+
+ REQUIRE(is_utf8_blank('\v') == false);
+ REQUIRE(is_utf8_blank(L'\v') == false);
+
+ // There don't seem to be any UTF-8 blank characters, at least
+ // in FreeBSD libc.
+}
+
+TEST_CASE("ctype: alnum", "[ctype]") {
+ auto is_utf8_alnum =
+ nihil::ctype_is(std::ctype_base::alnum,
+ std::locale("C.UTF-8"));
+
+ // 'a' is alphanumeric
+ REQUIRE(nihil::is_alnum('a') == true);
+ REQUIRE(nihil::is_alnum(L'a') == true);
+
+ REQUIRE(nihil::is_c_alnum('a') == true);
+ REQUIRE(nihil::is_c_alnum(L'a') == true);
+
+ REQUIRE(is_utf8_alnum('a') == true);
+ REQUIRE(is_utf8_alnum(L'a') == true);
+
+ // '\t' is not a alnum
+ REQUIRE(nihil::is_alnum('\t') == false);
+ REQUIRE(nihil::is_alnum(L'\t') == false);
+
+ REQUIRE(nihil::is_c_alnum('\t') == false);
+ REQUIRE(nihil::is_c_alnum(L'\t') == false);
+
+ REQUIRE(is_utf8_alnum('\t') == false);
+ REQUIRE(is_utf8_alnum(L'\t') == false);
+
+ // U+0430 CYRILLIC SMALL LETTER A
+ REQUIRE(nihil::is_alnum(L'\u0430') == false);
+ REQUIRE(nihil::is_c_alnum(L'\u0430') == false);
+ REQUIRE(is_utf8_alnum(L'\u0430') == true);
+}
+
+TEST_CASE("ctype: graph", "[ctype]") {
+ auto is_utf8_graph =
+ nihil::ctype_is(std::ctype_base::graph,
+ std::locale("C.UTF-8"));
+
+ // 'a' is graphical
+ REQUIRE(nihil::is_graph('a') == true);
+ REQUIRE(nihil::is_graph(L'a') == true);
+
+ REQUIRE(nihil::is_c_graph('a') == true);
+ REQUIRE(nihil::is_c_graph(L'a') == true);
+
+ REQUIRE(is_utf8_graph('a') == true);
+ REQUIRE(is_utf8_graph(L'a') == true);
+
+ // '\t' is not graphical
+ REQUIRE(nihil::is_graph('\t') == false);
+ REQUIRE(nihil::is_graph(L'\t') == false);
+
+ REQUIRE(nihil::is_c_graph('\t') == false);
+ REQUIRE(nihil::is_c_graph(L'\t') == false);
+
+ REQUIRE(is_utf8_graph('\t') == false);
+ REQUIRE(is_utf8_graph(L'\t') == false);
+
+ // U+0430 CYRILLIC SMALL LETTER A
+ REQUIRE(nihil::is_graph(L'\u0430') == false);
+ REQUIRE(nihil::is_c_graph(L'\u0430') == false);
+ REQUIRE(is_utf8_graph(L'\u0430') == true);
+}
diff --git a/nihil.util/test_next_word.cc b/nihil.util/test_next_word.cc
new file mode 100644
index 0000000..7e61237
--- /dev/null
+++ b/nihil.util/test_next_word.cc
@@ -0,0 +1,65 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <locale>
+#include <string>
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.util;
+
+TEST_CASE("next_word: basic", "[next_word]")
+{
+ using namespace std::literals;
+ auto s = "foo bar baz"sv;
+
+ auto words = nihil::next_word(s);
+ REQUIRE(words.first == "foo");
+ REQUIRE(words.second == " bar baz");
+
+ auto word = nihil::next_word(&s);
+ REQUIRE(word == "foo");
+ REQUIRE(s == " bar baz");
+}
+
+TEST_CASE("next_word: multiple spaces", "[next_word]")
+{
+ using namespace std::literals;
+ auto s = "foo bar baz"sv;
+
+ auto words = nihil::next_word(s);
+ REQUIRE(words.first == "foo");
+ REQUIRE(words.second == " bar baz");
+
+ auto word = nihil::next_word(&s);
+ REQUIRE(word == "foo");
+ REQUIRE(s == " bar baz");
+}
+
+TEST_CASE("next_word: leading spaces", "[next_word]")
+{
+ using namespace std::literals;
+ auto s = " \tfoo bar baz"sv;
+
+ auto words = nihil::next_word(s);
+ REQUIRE(words.first == "foo");
+ REQUIRE(words.second == " bar baz");
+
+ auto word = nihil::next_word(&s);
+ REQUIRE(word == "foo");
+ REQUIRE(s == " bar baz");
+}
+
+TEST_CASE("next_word: locale", "[next_word]")
+{
+ using namespace std::literals;
+ auto s = L"\u2003foo\u2003bar\u2003baz"sv;
+
+ auto words = nihil::next_word(s);
+ REQUIRE(words.first == s);
+
+ words = nihil::next_word(s, std::locale("C.UTF-8"));
+ REQUIRE(words.first == L"foo");
+ REQUIRE(words.second == L"\u2003bar\u2003baz");
+}
diff --git a/nihil.util/test_parse_size.cc b/nihil.util/test_parse_size.cc
new file mode 100644
index 0000000..4f4d018
--- /dev/null
+++ b/nihil.util/test_parse_size.cc
@@ -0,0 +1,168 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <cstdint>
+#include <system_error>
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.core;
+import nihil.util;
+
+TEST_CASE("parse_size: empty value", "[nihil]")
+{
+ using namespace nihil;
+
+ auto n = parse_size<std::uint64_t>("");
+ REQUIRE(!n);
+ REQUIRE(n.error() == nihil::errc::empty_string);
+}
+
+TEST_CASE("parse_size: basic", "[nihil]")
+{
+ using namespace nihil;
+
+ SECTION("bare number") {
+ auto n = parse_size<std::uint64_t>("1024").value();
+ REQUIRE(n == 1024);
+ }
+
+ SECTION("max value, unsigned") {
+ auto n = parse_size<std::uint16_t>("65535").value();
+ REQUIRE(n == 65535);
+ }
+
+ SECTION("max value, signed") {
+ auto n = parse_size<std::uint16_t>("32767").value();
+ REQUIRE(n == 32767);
+ }
+
+ SECTION("overflow by 1, unsigned") {
+ auto n = parse_size<std::uint16_t>("65536");
+ REQUIRE(!n);
+ REQUIRE(n.error() == std::errc::result_out_of_range);
+ }
+
+ SECTION("overflow by 1, signed") {
+ auto n = parse_size<std::int16_t>("32768");
+ REQUIRE(!n);
+ REQUIRE(n.error() == std::errc::result_out_of_range);
+ }
+
+ SECTION("overflow by many, unsigned") {
+ auto n = parse_size<std::uint16_t>("100000");
+ REQUIRE(!n);
+ REQUIRE(n.error() == std::errc::result_out_of_range);
+ }
+
+ SECTION("overflow by many, signed") {
+ auto n = parse_size<std::int16_t>("100000");
+ REQUIRE(!n);
+ REQUIRE(n.error() == std::errc::result_out_of_range);
+ }
+}
+
+TEST_CASE("parse_size: invalid multiplier", "[nihil]")
+{
+ using namespace nihil;
+
+ auto n = parse_size<std::uint64_t>("4z");
+ REQUIRE(!n);
+ REQUIRE(n.error() == nihil::errc::invalid_unit);
+
+ n = parse_size<std::uint64_t>("4kz");
+ REQUIRE(!n);
+ REQUIRE(n.error() == nihil::errc::invalid_unit);
+}
+
+TEST_CASE("parse_size: multipliers", "[nihil]")
+{
+ using namespace nihil;
+
+ auto sf = static_cast<std::uint64_t>(4);
+
+ SECTION("k") {
+ auto n = parse_size<std::uint64_t>("4k").value();
+ REQUIRE(n == sf * 1024);
+ }
+
+ SECTION("m") {
+ auto n = parse_size<std::uint64_t>("4m").value();
+ REQUIRE(n == sf * 1024 * 1024);
+ }
+
+ SECTION("g") {
+ auto n = parse_size<std::uint64_t>("4g").value();
+ REQUIRE(n == sf * 1024 * 1024 * 1024);
+ }
+
+ SECTION("t") {
+ auto n = parse_size<std::uint64_t>("4t").value();
+ REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024);
+ }
+
+ SECTION("p") {
+ auto n = parse_size<std::uint64_t>("4p").value();
+ REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024 * 1024);
+ }
+}
+
+TEST_CASE("parse_size: multiplier overflow", "[nihil]")
+{
+ using namespace nihil;
+
+ SECTION("signed") {
+ auto n = parse_size<std::uint16_t>("64k");
+ REQUIRE(!n);
+ REQUIRE(n.error() == std::errc::result_out_of_range);
+ }
+
+ SECTION("unsigned") {
+ auto n = parse_size<std::int16_t>("32k");
+ REQUIRE(!n);
+ REQUIRE(n.error() == std::errc::result_out_of_range);
+ }
+}
+
+TEST_CASE("parse_size: wide", "[nihil]")
+{
+ using namespace nihil;
+
+ SECTION("bare number") {
+ auto n = parse_size<std::uint64_t>(L"1024").value();
+ REQUIRE(n == 1024);
+ }
+}
+
+TEST_CASE("parse_size: wide multipliers", "[nihil]")
+{
+ using namespace nihil;
+
+ auto sf = static_cast<std::uint64_t>(4);
+
+ SECTION("k") {
+ auto n = parse_size<std::uint64_t>(L"4k").value();
+ REQUIRE(n == sf * 1024);
+ }
+
+ SECTION("m") {
+ auto n = parse_size<std::uint64_t>(L"4m").value();
+ REQUIRE(n == sf * 1024 * 1024);
+ }
+
+ SECTION("g") {
+ auto n = parse_size<std::uint64_t>(L"4g").value();
+ REQUIRE(n == sf * 1024 * 1024 * 1024);
+ }
+
+ SECTION("t") {
+ auto n = parse_size<std::uint64_t>(L"4t").value();
+ REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024);
+ }
+
+ SECTION("p") {
+ auto n = parse_size<std::uint64_t>(L"4p").value();
+ REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024 * 1024);
+ }
+}
diff --git a/nihil.util/test_skipws.cc b/nihil.util/test_skipws.cc
new file mode 100644
index 0000000..837c1f3
--- /dev/null
+++ b/nihil.util/test_skipws.cc
@@ -0,0 +1,45 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <locale>
+#include <string>
+using namespace std::literals;
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.util;
+
+TEST_CASE("skipws: basic", "[skipws]")
+{
+ REQUIRE(nihil::skipws("foo"sv) == "foo");
+ REQUIRE(nihil::skipws(" foo"sv) == "foo");
+ REQUIRE(nihil::skipws("foo "sv) == "foo ");
+ REQUIRE(nihil::skipws("foo bar"sv) == "foo bar");
+}
+
+TEST_CASE("skipws: pointer", "[skipws]")
+{
+ auto s = "foo"sv;
+ nihil::skipws(&s);
+ REQUIRE(s == "foo");
+
+ s = " foo"sv;
+ nihil::skipws(&s);
+ REQUIRE(s == "foo");
+
+ s = "foo "sv;
+ nihil::skipws(&s);
+ REQUIRE(s == "foo ");
+
+ s = "foo bar"sv;
+ nihil::skipws(&s);
+ REQUIRE(s == "foo bar");
+}
+
+TEST_CASE("skipws: locale", "[skipws]")
+{
+ // Assume the default locale is C.
+ REQUIRE(nihil::skipws(L"\u2003foo"sv) == L"\u2003foo");
+ REQUIRE(nihil::skipws(L"\u2003foo"sv, std::locale("C.UTF-8")) == L"foo");
+}
diff --git a/nihil.util/test_tabulate.cc b/nihil.util/test_tabulate.cc
new file mode 100644
index 0000000..8dee796
--- /dev/null
+++ b/nihil.util/test_tabulate.cc
@@ -0,0 +1,75 @@
+/*
+ * This source code is released into the public domain.
+ */
+
+#include <iterator>
+#include <string>
+#include <vector>
+
+#include <catch2/catch_test_macros.hpp>
+
+import nihil.util;
+
+using namespace std::literals;
+using namespace nihil;
+
+TEST_CASE("tabulate: basic", "[tabulate]")
+{
+ auto input = std::vector{
+ std::vector{"a", "foo", "b"},
+ std::vector{"bar", "c", "baz"},
+ };
+
+ auto result = std::string();
+ tabulate("{:1} {:2} {:3}", input, std::back_inserter(result));
+ REQUIRE(result ==
+"1 2 3\n"
+"a foo b\n"
+"bar c baz\n");
+}
+
+TEST_CASE("tabulate: basic wide", "[tabulate]")
+{
+ auto input = std::vector{
+ std::vector{L"a", L"foo", L"b"},
+ std::vector{L"bar", L"c", L"baz"},
+ };
+
+ auto result = std::wstring();
+ wtabulate(L"{:1} {:2} {:3}", input, std::back_inserter(result));
+
+ REQUIRE(result ==
+L"1 2 3\n"
+"a foo b\n"
+"bar c baz\n");
+}
+
+TEST_CASE("tabulate: jagged", "[tabulate]")
+{
+ auto input = std::vector{
+ std::vector{"a", "foo", "b"},
+ std::vector{"bar", "baz"},
+ };
+
+ auto result = std::string();
+ tabulate("{:1} {:2} {:3}", input, std::back_inserter(result));
+ REQUIRE(result ==
+"1 2 3\n"
+"a foo b\n"
+"bar baz\n");
+}
+
+TEST_CASE("tabulate: align", "[tabulate]")
+{
+ auto input = std::vector{
+ std::vector{"a", "longvalue", "s"},
+ std::vector{"a", "s", "longvalue"},
+ };
+
+ auto result = std::string();
+ tabulate("{:1} {<:2} {>:3}", input, std::back_inserter(result));
+ REQUIRE(result ==
+"1 2 3\n"
+"a longvalue s\n"
+"a s longvalue\n");
+}