diff options
Diffstat (limited to 'nihil.util')
| -rw-r--r-- | nihil.util/CMakeLists.txt | 36 | ||||
| -rw-r--r-- | nihil.util/ctype.ccm | 87 | ||||
| -rw-r--r-- | nihil.util/next_word.ccm | 49 | ||||
| -rw-r--r-- | nihil.util/nihil.util.ccm | 13 | ||||
| -rw-r--r-- | nihil.util/parse_size.ccm | 107 | ||||
| -rw-r--r-- | nihil.util/skipws.ccm | 40 | ||||
| -rw-r--r-- | nihil.util/tabulate.ccm | 312 | ||||
| -rw-r--r-- | nihil.util/test_ctype.cc | 373 | ||||
| -rw-r--r-- | nihil.util/test_next_word.cc | 65 | ||||
| -rw-r--r-- | nihil.util/test_parse_size.cc | 168 | ||||
| -rw-r--r-- | nihil.util/test_skipws.cc | 45 | ||||
| -rw-r--r-- | nihil.util/test_tabulate.cc | 75 |
12 files changed, 1370 insertions, 0 deletions
diff --git a/nihil.util/CMakeLists.txt b/nihil.util/CMakeLists.txt new file mode 100644 index 0000000..b809a68 --- /dev/null +++ b/nihil.util/CMakeLists.txt @@ -0,0 +1,36 @@ +# This source code is released into the public domain. + +add_library(nihil.util STATIC) +target_link_libraries(nihil.util PRIVATE nihil.core nihil.error nihil.monad) +target_sources(nihil.util + PUBLIC FILE_SET modules TYPE CXX_MODULES FILES + nihil.util.ccm + + ctype.ccm + parse_size.ccm + next_word.ccm + skipws.ccm + tabulate.ccm +) + +if(NIHIL_TESTS) + enable_testing() + + add_executable(nihil.util.test + test_ctype.cc + test_parse_size.cc + test_next_word.cc + test_skipws.cc + test_tabulate.cc + ) + target_link_libraries(nihil.util.test PRIVATE + nihil.util + Catch2::Catch2WithMain + ) + + find_package(Catch2 REQUIRED) + + include(CTest) + include(Catch) + catch_discover_tests(nihil.util.test) +endif() diff --git a/nihil.util/ctype.ccm b/nihil.util/ctype.ccm new file mode 100644 index 0000000..6d30c4f --- /dev/null +++ b/nihil.util/ctype.ccm @@ -0,0 +1,87 @@ +/* + * This source code is released into the public domain. + */ + +module; + +#include <concepts> +#include <locale> + +export module nihil.util:ctype; + +namespace nihil { + +/* + * ctype_is: wrap std::ctype<T>::is() in a form suitable for use as an algorithm + * predicate, i.e., ctype_is(m) will return a functor object that takes any char + * type as an argument and returns bool. + * + * If the locale is not specified, the current global locale is used by default. + * + * ctype_is copies the locale, so passing a temporary is fine. + */ + +export struct ctype_is final { + ctype_is(std::ctype_base::mask mask_, + std::locale const &locale_ = std::locale()) + : m_mask(mask_) + , m_locale(locale_) + {} + + [[nodiscard]] auto operator()(this ctype_is const &self, + std::integral auto c) + { + using ctype = std::ctype<decltype(c)>; + auto &facet = std::use_facet<ctype>(self.m_locale); + return facet.is(self.m_mask, c); + } + +private: + std::ctype_base::mask m_mask; + std::locale m_locale; +}; + +// Predefined tests for the current global locale. + +export inline auto is_space = ctype_is(std::ctype_base::space); +export inline auto is_print = ctype_is(std::ctype_base::print); +export inline auto is_cntrl = ctype_is(std::ctype_base::cntrl); +export inline auto is_upper = ctype_is(std::ctype_base::upper); +export inline auto is_lower = ctype_is(std::ctype_base::lower); +export inline auto is_alpha = ctype_is(std::ctype_base::alpha); +export inline auto is_digit = ctype_is(std::ctype_base::digit); +export inline auto is_punct = ctype_is(std::ctype_base::punct); +export inline auto is_xdigit = ctype_is(std::ctype_base::xdigit); +export inline auto is_blank = ctype_is(std::ctype_base::blank); +export inline auto is_alnum = ctype_is(std::ctype_base::alnum); +export inline auto is_graph = ctype_is(std::ctype_base::graph); + +// Predefined tests for the C locale. The C locale is guaranteed to always be +// available, so this doesn't create lifetime issues. + +export inline auto is_c_space = + ctype_is(std::ctype_base::space, std::locale::classic()); +export inline auto is_c_print = + ctype_is(std::ctype_base::print, std::locale::classic()); +export inline auto is_c_cntrl = + ctype_is(std::ctype_base::cntrl, std::locale::classic()); +export inline auto is_c_upper = + ctype_is(std::ctype_base::upper, std::locale::classic()); +export inline auto is_c_lower = + ctype_is(std::ctype_base::lower, std::locale::classic()); +export inline auto is_c_alpha = + ctype_is(std::ctype_base::alpha, std::locale::classic()); +export inline auto is_c_digit = + ctype_is(std::ctype_base::digit, std::locale::classic()); +export inline auto is_c_punct = + ctype_is(std::ctype_base::punct, std::locale::classic()); +export inline auto is_c_xdigit = + ctype_is(std::ctype_base::xdigit, std::locale::classic()); +export inline auto is_c_blank = + ctype_is(std::ctype_base::blank, std::locale::classic()); +export inline auto is_c_alnum = + ctype_is(std::ctype_base::alnum, std::locale::classic()); +export inline auto is_c_graph = + ctype_is(std::ctype_base::graph, std::locale::classic()); + +} // namespace nihil diff --git a/nihil.util/next_word.ccm b/nihil.util/next_word.ccm new file mode 100644 index 0000000..c5d3ad7 --- /dev/null +++ b/nihil.util/next_word.ccm @@ -0,0 +1,49 @@ +/* + * This source code is released into the public domain. + */ + +module; + +#include <algorithm> +#include <locale> +#include <ranges> +#include <string> +#include <utility> + +export module nihil.util:next_word; + +import :skipws; + +namespace nihil { + +/* + * Return the next word from a string_view. Skips leading whitespace, so + * calling this repeatedly will return each word from the string. + */ + +export template<typename Char> [[nodiscard]] +auto next_word(std::basic_string_view<Char> text, + std::locale const &locale = std::locale()) + -> std::pair<std::basic_string_view<Char>, + std::basic_string_view<Char>> +{ + text = skipws(text, locale); + + auto is_space = ctype_is(std::ctype_base::space, locale); + auto split_pos = std::ranges::find_if(text, is_space); + + return {{std::ranges::begin(text), split_pos}, + {split_pos, std::ranges::end(text)}}; +} + +export template<typename Char> +auto next_word(std::basic_string_view<Char> *text, + std::locale const &locale = std::locale()) + -> std::basic_string_view<Char> +{ + auto [word, rest] = next_word(*text, locale); + *text = rest; + return word; +} + +} // namespace nihil diff --git a/nihil.util/nihil.util.ccm b/nihil.util/nihil.util.ccm new file mode 100644 index 0000000..afd513a --- /dev/null +++ b/nihil.util/nihil.util.ccm @@ -0,0 +1,13 @@ +/* + * This source code is released into the public domain. + */ + +module; + +export module nihil.util; + +export import :ctype; +export import :parse_size; +export import :next_word; +export import :skipws; +export import :tabulate; diff --git a/nihil.util/parse_size.ccm b/nihil.util/parse_size.ccm new file mode 100644 index 0000000..c692578 --- /dev/null +++ b/nihil.util/parse_size.ccm @@ -0,0 +1,107 @@ +/* + * This source code is released into the public domain. + */ + +module; + +#include <algorithm> +#include <coroutine> +#include <cstdint> +#include <expected> +#include <ranges> +#include <string> +#include <system_error> +#include <utility> + +export module nihil.util:parse_size; + +import nihil.core; +import nihil.error; +import nihil.monad; + +import :ctype; + +namespace nihil { + +template<typename Char> +auto get_multiplier(Char c) -> std::expected<std::uint64_t, error> +{ + auto ret = std::uint64_t{1}; + + switch (c) { + case 'p': case 'P': ret *= 1024; + case 't': case 'T': ret *= 1024; + case 'g': case 'G': ret *= 1024; + case 'm': case 'M': ret *= 1024; + case 'k': case 'K': ret *= 1024; + return ret; + + default: + return std::unexpected(error(errc::invalid_unit)); + } +} + +/* + * Parse a string containing a human-formatted size, such as "1024" + * or "4g". Parsing is always done in the "C" locale and does not + * recognise thousands separators or negative numbers. + */ +export template<typename T, typename Char> [[nodiscard]] +auto parse_size(std::basic_string_view<Char> str) + -> std::expected<T, error> +{ + // Extract the numeric part of the string. + auto it = std::ranges::find_if_not(str, is_c_digit); + auto num_str = std::basic_string_view<Char>( + std::ranges::begin(str), it); + + if (num_str.empty()) + co_return std::unexpected(error(errc::empty_string)); + + auto ret = T{0}; + + for (auto c : num_str) { + if (ret > (std::numeric_limits<T>::max() / 10)) + co_return std::unexpected(error( + std::errc::result_out_of_range)); + ret *= 10; + + auto digit = static_cast<T>(c - '0'); + if ((std::numeric_limits<T>::max() - digit) < ret) + co_return std::unexpected(error( + std::errc::result_out_of_range)); + ret += digit; + } + + if (it == str.end()) + // No multiplier. + co_return ret; + + auto mchar = *it++; + + if (it != str.end()) + // Multiplier is more than one character. + co_return std::unexpected(error(errc::invalid_unit)); + + auto mult = co_await get_multiplier(mchar); + + if (std::cmp_greater(ret, std::numeric_limits<T>::max() / mult)) + co_return std::unexpected(error( + std::errc::result_out_of_range)); + + co_return ret * mult; +} + +export template<typename T> +[[nodiscard]] inline auto parse_size(char const *s) +{ + return parse_size<T>(std::string_view(s)); +} + +export template<typename T> +[[nodiscard]] inline auto parse_size(wchar_t const *s) +{ + return parse_size<T>(std::wstring_view(s)); +} + +} diff --git a/nihil.util/skipws.ccm b/nihil.util/skipws.ccm new file mode 100644 index 0000000..4813ae8 --- /dev/null +++ b/nihil.util/skipws.ccm @@ -0,0 +1,40 @@ +/* + * This source code is released into the public domain. + */ + +module; + +#include <algorithm> +#include <locale> +#include <ranges> +#include <string> + +export module nihil.util:skipws; + +import :ctype; + +namespace nihil { + +/* + * Remove leading whitespace from a string. + */ + +export template<typename Char> [[nodiscard]] +auto skipws(std::basic_string_view<Char> text, + std::locale const &locale = std::locale()) + -> std::basic_string_view<Char> +{ + auto is_space = ctype_is(std::ctype_base::space, locale); + auto nonws = std::ranges::find_if_not(text, is_space); + return {nonws, std::ranges::end(text)}; +} + +export template<typename Char> +auto skipws(std::basic_string_view<Char> *text, + std::locale const &locale = std::locale()) + -> void +{ + *text = skipws(*text, locale); +} + +} // namespace nihil diff --git a/nihil.util/tabulate.ccm b/nihil.util/tabulate.ccm new file mode 100644 index 0000000..5998b24 --- /dev/null +++ b/nihil.util/tabulate.ccm @@ -0,0 +1,312 @@ +/* + * This source code is released into the public domain. + */ + +module; + +#include <algorithm> +#include <cstdlib> +#include <format> +#include <ranges> +#include <iterator> +#include <vector> + +export module nihil.util:tabulate; + +import nihil.error; +import :ctype; + +namespace nihil { + +/* + * tabulate: format the given range in an ASCII table and write the output + * to the given output iterator. The range's values will be converted to + * strings as if by std::format. + * + * tabulate is implemented by copying the range; this allows it to work on + * input/forward ranges at the cost of slightly increased memory use. + * + * The table spec is a string consisting of zero or more field formats, + * formatted as {flags:fieldname}; both flags and fieldname are optional. + * If there are fewer field formats than fields, the remaining fields + * are formatted as if by {:}. + * + * The following flags are supported: + * + * < left-align this column (default) + * > right-align this column + */ + +// Exception thrown when a table spec is invalid. +export struct table_spec_error : error { + table_spec_error(std::string_view what) + : error(what) + { + } +}; + +/* + * The specification for a single field. + */ +template<typename Char> +struct field_spec { + enum align_t { left, right }; + + // Get the name of this field. + auto name(this field_spec const &self) + -> std::basic_string_view<Char> + { + return self.m_name; + } + + // Set the name of this field. + auto name(this field_spec &self, + std::basic_string_view<Char> new_name) + -> void + { + self.m_name = new_name; + } + + // Set this field's alignment. + auto align(this field_spec &self, align_t new_align) -> void + { + self.m_align = new_align; + } + + // Ensure the length of this field is at least the given width. + auto ensure_width(this field_spec &self, std::size_t newwidth) + -> void + { + self.m_width = std::max(self.m_width, newwidth); + } + + // Format an object to a string based on our field spec. + [[nodiscard]] auto format(this field_spec const &, auto &&obj) + -> std::basic_string<Char> + { + auto format_string = std::basic_string<Char>{'{', '}'}; + return std::format(std::runtime_format(format_string), obj); + } + + // Print a column value to an output iterator according to our field + // spec. If is_last is true, this is the last field on the line, so + // we won't output any trailling padding. + auto print(this field_spec const &self, + std::basic_string_view<Char> value, + std::output_iterator<Char> auto &out, + bool is_last) + -> void + { + auto padding = self.m_width - value.size(); + + if (self.m_align == right) + for (std::size_t i = 0; i < padding; ++i) + *out++ = ' '; + + std::ranges::copy(value, out); + + if (!is_last && self.m_align == left) + for (std::size_t i = 0; i < padding; ++i) + *out++ = ' '; + } + +private: + std::basic_string_view<Char> m_name; + std::size_t m_width = 0; + align_t m_align = left; +}; + +/* + * The specification for an entire table. + */ +template<typename Char> +struct table_spec { + // Add a new field spec to this table. + auto add(this table_spec &self, field_spec<Char> field) -> void + { + self.m_fields.emplace_back(std::move(field)); + } + + // Return the field spec for a given field. If the field doesn't + // exist, this field and any intermediate fields will be created. + [[nodiscard]] auto field(this table_spec &self, std::size_t fieldnr) + -> field_spec<Char> & + { + if (fieldnr >= self.m_fields.size()) + self.m_fields.resize(fieldnr + 1); + return self.m_fields.at(fieldnr); + } + + // The number of columns in this table. + [[nodiscard]] auto columns(this table_spec const &self) -> std::size_t + { + return self.m_fields.size(); + } + + // Return all the fields in this table. + [[nodiscard]] auto fields(this table_spec const &self) + -> std::vector<field_spec<Char>> const & + { + return self.m_fields; + } + +private: + std::vector<field_spec<Char>> m_fields; +}; + +// Parse the field flags, e.g. '<'. +template<typename Char, + std::input_iterator Iterator, std::sentinel_for<Iterator> Sentinel> +auto parse_field_flags(field_spec<Char> &field, Iterator &pos, Sentinel end) + -> void +{ + while (pos < end) { + switch (*pos) { + case '<': + field.align(field_spec<Char>::left); + break; + case '>': + field.align(field_spec<Char>::right); + break; + case ':': + ++pos; + /*FALLTHROUGH*/ + case '}': + return; + default: + throw table_spec_error("Invalid table spec: " + "unknown flag character"); + } + + if (++pos == end) + throw table_spec_error("Invalid table spec: " + "unterminated field"); + } +} + +// Parse a complete field spec, e.g. "{<:NAME}". +template<typename Char, + std::input_iterator Iterator, std::sentinel_for<Iterator> Sentinel> +[[nodiscard]] auto parse_field(Iterator &pos, Sentinel end) + -> field_spec<Char> +{ + auto field = field_spec<Char>{}; + + if (pos == end) + throw table_spec_error("Invalid table spec: empty field"); + + // The field spec should start with a '{'. + if (*pos != '{') + throw table_spec_error("Invalid table spec: expected '{'"); + + if (++pos == end) + throw table_spec_error("Invalid table spec: unterminated field"); + + // This consumes 'pos' up to and including the ':'. + parse_field_flags(field, pos, end); + + auto brace = std::ranges::find(pos, end, '}'); + if (brace == end) + throw table_spec_error("Invalid table spec: expected '}'"); + + field.name(std::basic_string_view<Char>(pos, brace)); + pos = std::next(brace); + + // The field must be at least as wide as its header. + field.ensure_width(field.name().size()); + + return field; +} + +template<typename Char> +[[nodiscard]] auto parse_table_spec(std::basic_string_view<Char> spec) + -> table_spec<Char> +{ + auto table = table_spec<Char>(); + + auto pos = std::ranges::begin(spec); + auto end = std::ranges::end(spec); + + for (;;) { + // Skip leading whitespace + while (pos < end && is_c_space(*pos)) + ++pos; + + if (pos == end) + break; + + table.add(parse_field<Char>(pos, end)); + } + + return table; +} + +export template<typename Char, + std::ranges::range Range, + std::output_iterator<Char> Iterator> +auto basic_tabulate(std::basic_string_view<Char> table_spec, + Range &&range, + Iterator &&out) + -> void +{ + // Parse the table spec. + auto table = parse_table_spec(table_spec); + + // Create our copy of the input data. + auto data = std::vector<std::vector<std::basic_string<Char>>>(); + // Reserve the first row for the header. + data.resize(1); + + // Find the required length of each field. + for (auto &&row : range) { + // LLVM doesn't have std::enumerate_view yet + auto i = std::size_t{0}; + auto &this_row = data.emplace_back(); + + for (auto &&column : row) { + auto &field = table.field(i); + auto &str = this_row.emplace_back(field.format(column)); + field.ensure_width(str.size()); + ++i; + } + } + + // Add the header row. + for (auto &&field : table.fields()) + data.at(0).emplace_back(std::from_range, field.name()); + + // Print the values. + for (auto &&row : data) { + for (std::size_t i = 0; i < row.size(); ++i) { + auto &field = table.field(i); + bool is_last = (i == row.size() - 1); + + field.print(row[i], out, is_last); + + if (!is_last) + *out++ = ' '; + } + + *out++ = '\n'; + } +} + +export auto tabulate(std::string_view table_spec, + std::ranges::range auto &&range, + std::output_iterator<char> auto &&out) +{ + return basic_tabulate<char>(table_spec, + std::forward<decltype(range)>(range), + std::forward<decltype(out)>(out)); +} + +export auto wtabulate(std::wstring_view table_spec, + std::ranges::range auto &&range, + std::output_iterator<wchar_t> auto &&out) +{ + return basic_tabulate<wchar_t>(table_spec, + std::forward<decltype(range)>(range), + std::forward<decltype(out)>(out)); +} + +} // namespace nihil diff --git a/nihil.util/test_ctype.cc b/nihil.util/test_ctype.cc new file mode 100644 index 0000000..62721d1 --- /dev/null +++ b/nihil.util/test_ctype.cc @@ -0,0 +1,373 @@ +/* + * This source code is released into the public domain. + */ + +#include <catch2/catch_test_macros.hpp> + +import nihil.util; + +TEST_CASE("ctype: space", "[ctype]") { + auto is_utf8_space = + nihil::ctype_is(std::ctype_base::space, + std::locale("C.UTF-8")); + + // '\v' (vertical tab) is a space + REQUIRE(nihil::is_space('\v') == true); + REQUIRE(nihil::is_space(L'\v') == true); + + REQUIRE(nihil::is_c_space('\v') == true); + REQUIRE(nihil::is_c_space(L'\v') == true); + + REQUIRE(is_utf8_space('\v') == true); + REQUIRE(is_utf8_space(L'\v') == true); + + // 'x' is not a space + REQUIRE(nihil::is_space('x') == false); + REQUIRE(nihil::is_space(L'x') == false); + + REQUIRE(nihil::is_c_space('x') == false); + REQUIRE(nihil::is_c_space(L'x') == false); + + REQUIRE(is_utf8_space('x') == false); + REQUIRE(is_utf8_space(L'x') == false); + + // U+2003 EM SPACE is a space + REQUIRE(nihil::is_space(L'\u2003') == false); + REQUIRE(nihil::is_c_space(L'\u2003') == false); + REQUIRE(is_utf8_space(L'\u2003') == true); +} + +TEST_CASE("ctype: print", "[ctype]") { + auto is_utf8_print = + nihil::ctype_is(std::ctype_base::print, + std::locale("C.UTF-8")); + + // 'x' is printable + REQUIRE(nihil::is_print('x') == true); + REQUIRE(nihil::is_print(L'x') == true); + + REQUIRE(nihil::is_c_print('x') == true); + REQUIRE(nihil::is_c_print(L'x') == true); + + REQUIRE(is_utf8_print('x') == true); + REQUIRE(is_utf8_print(L'x') == true); + + // '\003' is not printable + REQUIRE(nihil::is_print('\003') == false); + REQUIRE(nihil::is_print(L'\003') == false); + + REQUIRE(nihil::is_c_print('\003') == false); + REQUIRE(nihil::is_c_print(L'\003') == false); + + REQUIRE(is_utf8_print('\003') == false); + REQUIRE(is_utf8_print(L'\003') == false); + + // U+0410 CYRILLIC CAPITAL LETTER A is printable + REQUIRE(nihil::is_print(L'\u0410') == false); + REQUIRE(nihil::is_c_print(L'\u0410') == false); + REQUIRE(is_utf8_print(L'\u0410') == true); +} + +TEST_CASE("ctype: cntrl", "[ctype]") { + auto is_utf8_cntrl = + nihil::ctype_is(std::ctype_base::cntrl, + std::locale("C.UTF-8")); + + // '\003' is a control character + REQUIRE(nihil::is_cntrl('\003') == true); + REQUIRE(nihil::is_cntrl(L'\003') == true); + + REQUIRE(nihil::is_c_cntrl('\003') == true); + REQUIRE(nihil::is_c_cntrl(L'\003') == true); + + REQUIRE(is_utf8_cntrl('\003') == true); + REQUIRE(is_utf8_cntrl(L'\003') == true); + + + // 'x' is not a control character + REQUIRE(nihil::is_cntrl('x') == false); + REQUIRE(nihil::is_cntrl(L'x') == false); + + REQUIRE(nihil::is_c_cntrl('x') == false); + REQUIRE(nihil::is_c_cntrl(L'x') == false); + + REQUIRE(is_utf8_cntrl('x') == false); + REQUIRE(is_utf8_cntrl(L'x') == false); + + // U+00AD SOFT HYPHEN is a control character. + REQUIRE(nihil::is_cntrl(L'\u00ad') == false); + REQUIRE(nihil::is_c_cntrl(L'\u00ad') == false); + REQUIRE(is_utf8_cntrl(L'\u00ad') == true); +} + +TEST_CASE("ctype: upper", "[ctype]") { + auto is_utf8_upper = + nihil::ctype_is(std::ctype_base::upper, + std::locale("C.UTF-8")); + + // 'A' is upper case + REQUIRE(nihil::is_upper('A') == true); + REQUIRE(nihil::is_upper(L'A') == true); + + REQUIRE(nihil::is_c_upper('A') == true); + REQUIRE(nihil::is_c_upper(L'A') == true); + + REQUIRE(is_utf8_upper('A') == true); + REQUIRE(is_utf8_upper(L'A') == true); + + // 'a' is not upper case + REQUIRE(nihil::is_upper('a') == false); + REQUIRE(nihil::is_upper(L'a') == false); + + REQUIRE(nihil::is_c_upper('a') == false); + REQUIRE(nihil::is_c_upper(L'a') == false); + + REQUIRE(is_utf8_upper('a') == false); + REQUIRE(is_utf8_upper(L'a') == false); + + // U+0410 CYRILLIC CAPITAL LETTER A is upper case + REQUIRE(nihil::is_upper(L'\u0410') == false); + REQUIRE(nihil::is_c_upper(L'\u0410') == false); + REQUIRE(is_utf8_upper(L'\u0410') == true); +} + +TEST_CASE("ctype: lower", "[ctype]") { + auto is_utf8_lower = + nihil::ctype_is(std::ctype_base::lower, + std::locale("C.UTF-8")); + + // 'a' is lower case + REQUIRE(nihil::is_lower('a') == true); + REQUIRE(nihil::is_lower(L'a') == true); + + REQUIRE(nihil::is_c_lower('a') == true); + REQUIRE(nihil::is_c_lower(L'a') == true); + + REQUIRE(is_utf8_lower('a') == true); + REQUIRE(is_utf8_lower(L'a') == true); + + // 'A' is not lower case + REQUIRE(nihil::is_lower('A') == false); + REQUIRE(nihil::is_lower(L'A') == false); + + REQUIRE(nihil::is_c_lower('A') == false); + REQUIRE(nihil::is_c_lower(L'A') == false); + + REQUIRE(is_utf8_lower('A') == false); + REQUIRE(is_utf8_lower(L'A') == false); + + // U+0430 CYRILLIC SMALL LETTER A + REQUIRE(nihil::is_lower(L'\u0430') == false); + REQUIRE(nihil::is_c_lower(L'\u0430') == false); + REQUIRE(is_utf8_lower(L'\u0430') == true); +} + +TEST_CASE("ctype: alpha", "[ctype]") { + auto is_utf8_alpha = + nihil::ctype_is(std::ctype_base::alpha, + std::locale("C.UTF-8")); + + // 'a' is alphabetical + REQUIRE(nihil::is_alpha('a') == true); + REQUIRE(nihil::is_alpha(L'a') == true); + + REQUIRE(nihil::is_c_alpha('a') == true); + REQUIRE(nihil::is_c_alpha(L'a') == true); + + REQUIRE(is_utf8_alpha('a') == true); + REQUIRE(is_utf8_alpha(L'a') == true); + + // '1' is not alphabetical + REQUIRE(nihil::is_alpha('1') == false); + REQUIRE(nihil::is_alpha(L'1') == false); + + REQUIRE(nihil::is_c_alpha('1') == false); + REQUIRE(nihil::is_c_alpha(L'1') == false); + + REQUIRE(is_utf8_alpha('1') == false); + REQUIRE(is_utf8_alpha(L'1') == false); + + // U+0430 CYRILLIC SMALL LETTER A + REQUIRE(nihil::is_alpha(L'\u0430') == false); + REQUIRE(nihil::is_c_alpha(L'\u0430') == false); + REQUIRE(is_utf8_alpha(L'\u0430') == true); +} + +TEST_CASE("ctype: digit", "[ctype]") { + auto is_utf8_digit = + nihil::ctype_is(std::ctype_base::digit, + std::locale("C.UTF-8")); + + // '1' is a digit + REQUIRE(nihil::is_digit('1') == true); + REQUIRE(nihil::is_digit(L'1') == true); + + REQUIRE(nihil::is_c_digit('1') == true); + REQUIRE(nihil::is_c_digit(L'1') == true); + + REQUIRE(is_utf8_digit('1') == true); + REQUIRE(is_utf8_digit(L'1') == true); + + // 'a' is not a digit + REQUIRE(nihil::is_digit('a') == false); + REQUIRE(nihil::is_digit(L'a') == false); + + REQUIRE(nihil::is_c_digit('a') == false); + REQUIRE(nihil::is_c_digit(L'a') == false); + + REQUIRE(is_utf8_digit('a') == false); + REQUIRE(is_utf8_digit(L'a') == false); + + // U+0660 ARABIC-INDIC DIGIT ZERO + REQUIRE(nihil::is_digit(L'\u0660') == false); + REQUIRE(nihil::is_c_digit(L'\u0660') == false); + REQUIRE(is_utf8_digit(L'\u0660') == true); +} + +TEST_CASE("ctype: punct", "[ctype]") { + auto is_utf8_punct = + nihil::ctype_is(std::ctype_base::punct, + std::locale("C.UTF-8")); + + // ';' is punctuation + REQUIRE(nihil::is_punct(';') == true); + REQUIRE(nihil::is_punct(L';') == true); + + REQUIRE(nihil::is_c_punct(';') == true); + REQUIRE(nihil::is_c_punct(L';') == true); + + REQUIRE(is_utf8_punct(';') == true); + REQUIRE(is_utf8_punct(L';') == true); + + // 'a' is not punctuation + REQUIRE(nihil::is_punct('a') == false); + REQUIRE(nihil::is_punct(L'a') == false); + + REQUIRE(nihil::is_c_punct('a') == false); + REQUIRE(nihil::is_c_punct(L'a') == false); + + REQUIRE(is_utf8_punct('a') == false); + REQUIRE(is_utf8_punct(L'a') == false); + + // U+00A1 INVERTED EXCLAMATION MARK + REQUIRE(nihil::is_punct(L'\u00A1') == false); + REQUIRE(nihil::is_c_punct(L'\u00A1') == false); + REQUIRE(is_utf8_punct(L'\u00A1') == true); +} + +TEST_CASE("ctype: xdigit", "[ctype]") { + auto is_utf8_xdigit = + nihil::ctype_is(std::ctype_base::xdigit, + std::locale("C.UTF-8")); + + // 'f' is an xdigit + REQUIRE(nihil::is_xdigit('f') == true); + REQUIRE(nihil::is_xdigit(L'f') == true); + + REQUIRE(nihil::is_c_xdigit('f') == true); + REQUIRE(nihil::is_c_xdigit(L'f') == true); + + REQUIRE(is_utf8_xdigit('f') == true); + REQUIRE(is_utf8_xdigit(L'f') == true); + + // 'g' is not an xdigit + REQUIRE(nihil::is_xdigit('g') == false); + REQUIRE(nihil::is_xdigit(L'g') == false); + + REQUIRE(nihil::is_c_xdigit('g') == false); + REQUIRE(nihil::is_c_xdigit(L'g') == false); + + REQUIRE(is_utf8_xdigit('g') == false); + REQUIRE(is_utf8_xdigit(L'g') == false); +} + +TEST_CASE("ctype: blank", "[ctype]") { + auto is_utf8_blank = + nihil::ctype_is(std::ctype_base::blank, + std::locale("C.UTF-8")); + + // '\t' is a blank + REQUIRE(nihil::is_blank('\t') == true); + REQUIRE(nihil::is_blank(L'\t') == true); + + REQUIRE(nihil::is_c_blank('\t') == true); + REQUIRE(nihil::is_c_blank(L'\t') == true); + + REQUIRE(is_utf8_blank('\t') == true); + REQUIRE(is_utf8_blank(L'\t') == true); + + // '\v' is not a blank + REQUIRE(nihil::is_blank('\v') == false); + REQUIRE(nihil::is_blank(L'\v') == false); + + REQUIRE(nihil::is_c_blank('\v') == false); + REQUIRE(nihil::is_c_blank(L'\v') == false); + + REQUIRE(is_utf8_blank('\v') == false); + REQUIRE(is_utf8_blank(L'\v') == false); + + // There don't seem to be any UTF-8 blank characters, at least + // in FreeBSD libc. +} + +TEST_CASE("ctype: alnum", "[ctype]") { + auto is_utf8_alnum = + nihil::ctype_is(std::ctype_base::alnum, + std::locale("C.UTF-8")); + + // 'a' is alphanumeric + REQUIRE(nihil::is_alnum('a') == true); + REQUIRE(nihil::is_alnum(L'a') == true); + + REQUIRE(nihil::is_c_alnum('a') == true); + REQUIRE(nihil::is_c_alnum(L'a') == true); + + REQUIRE(is_utf8_alnum('a') == true); + REQUIRE(is_utf8_alnum(L'a') == true); + + // '\t' is not a alnum + REQUIRE(nihil::is_alnum('\t') == false); + REQUIRE(nihil::is_alnum(L'\t') == false); + + REQUIRE(nihil::is_c_alnum('\t') == false); + REQUIRE(nihil::is_c_alnum(L'\t') == false); + + REQUIRE(is_utf8_alnum('\t') == false); + REQUIRE(is_utf8_alnum(L'\t') == false); + + // U+0430 CYRILLIC SMALL LETTER A + REQUIRE(nihil::is_alnum(L'\u0430') == false); + REQUIRE(nihil::is_c_alnum(L'\u0430') == false); + REQUIRE(is_utf8_alnum(L'\u0430') == true); +} + +TEST_CASE("ctype: graph", "[ctype]") { + auto is_utf8_graph = + nihil::ctype_is(std::ctype_base::graph, + std::locale("C.UTF-8")); + + // 'a' is graphical + REQUIRE(nihil::is_graph('a') == true); + REQUIRE(nihil::is_graph(L'a') == true); + + REQUIRE(nihil::is_c_graph('a') == true); + REQUIRE(nihil::is_c_graph(L'a') == true); + + REQUIRE(is_utf8_graph('a') == true); + REQUIRE(is_utf8_graph(L'a') == true); + + // '\t' is not graphical + REQUIRE(nihil::is_graph('\t') == false); + REQUIRE(nihil::is_graph(L'\t') == false); + + REQUIRE(nihil::is_c_graph('\t') == false); + REQUIRE(nihil::is_c_graph(L'\t') == false); + + REQUIRE(is_utf8_graph('\t') == false); + REQUIRE(is_utf8_graph(L'\t') == false); + + // U+0430 CYRILLIC SMALL LETTER A + REQUIRE(nihil::is_graph(L'\u0430') == false); + REQUIRE(nihil::is_c_graph(L'\u0430') == false); + REQUIRE(is_utf8_graph(L'\u0430') == true); +} diff --git a/nihil.util/test_next_word.cc b/nihil.util/test_next_word.cc new file mode 100644 index 0000000..7e61237 --- /dev/null +++ b/nihil.util/test_next_word.cc @@ -0,0 +1,65 @@ +/* + * This source code is released into the public domain. + */ + +#include <locale> +#include <string> + +#include <catch2/catch_test_macros.hpp> + +import nihil.util; + +TEST_CASE("next_word: basic", "[next_word]") +{ + using namespace std::literals; + auto s = "foo bar baz"sv; + + auto words = nihil::next_word(s); + REQUIRE(words.first == "foo"); + REQUIRE(words.second == " bar baz"); + + auto word = nihil::next_word(&s); + REQUIRE(word == "foo"); + REQUIRE(s == " bar baz"); +} + +TEST_CASE("next_word: multiple spaces", "[next_word]") +{ + using namespace std::literals; + auto s = "foo bar baz"sv; + + auto words = nihil::next_word(s); + REQUIRE(words.first == "foo"); + REQUIRE(words.second == " bar baz"); + + auto word = nihil::next_word(&s); + REQUIRE(word == "foo"); + REQUIRE(s == " bar baz"); +} + +TEST_CASE("next_word: leading spaces", "[next_word]") +{ + using namespace std::literals; + auto s = " \tfoo bar baz"sv; + + auto words = nihil::next_word(s); + REQUIRE(words.first == "foo"); + REQUIRE(words.second == " bar baz"); + + auto word = nihil::next_word(&s); + REQUIRE(word == "foo"); + REQUIRE(s == " bar baz"); +} + +TEST_CASE("next_word: locale", "[next_word]") +{ + using namespace std::literals; + auto s = L"\u2003foo\u2003bar\u2003baz"sv; + + auto words = nihil::next_word(s); + REQUIRE(words.first == s); + + words = nihil::next_word(s, std::locale("C.UTF-8")); + REQUIRE(words.first == L"foo"); + REQUIRE(words.second == L"\u2003bar\u2003baz"); +} diff --git a/nihil.util/test_parse_size.cc b/nihil.util/test_parse_size.cc new file mode 100644 index 0000000..4f4d018 --- /dev/null +++ b/nihil.util/test_parse_size.cc @@ -0,0 +1,168 @@ +/* + * This source code is released into the public domain. + */ + +#include <cstdint> +#include <system_error> + +#include <catch2/catch_test_macros.hpp> + +import nihil.core; +import nihil.util; + +TEST_CASE("parse_size: empty value", "[nihil]") +{ + using namespace nihil; + + auto n = parse_size<std::uint64_t>(""); + REQUIRE(!n); + REQUIRE(n.error() == nihil::errc::empty_string); +} + +TEST_CASE("parse_size: basic", "[nihil]") +{ + using namespace nihil; + + SECTION("bare number") { + auto n = parse_size<std::uint64_t>("1024").value(); + REQUIRE(n == 1024); + } + + SECTION("max value, unsigned") { + auto n = parse_size<std::uint16_t>("65535").value(); + REQUIRE(n == 65535); + } + + SECTION("max value, signed") { + auto n = parse_size<std::uint16_t>("32767").value(); + REQUIRE(n == 32767); + } + + SECTION("overflow by 1, unsigned") { + auto n = parse_size<std::uint16_t>("65536"); + REQUIRE(!n); + REQUIRE(n.error() == std::errc::result_out_of_range); + } + + SECTION("overflow by 1, signed") { + auto n = parse_size<std::int16_t>("32768"); + REQUIRE(!n); + REQUIRE(n.error() == std::errc::result_out_of_range); + } + + SECTION("overflow by many, unsigned") { + auto n = parse_size<std::uint16_t>("100000"); + REQUIRE(!n); + REQUIRE(n.error() == std::errc::result_out_of_range); + } + + SECTION("overflow by many, signed") { + auto n = parse_size<std::int16_t>("100000"); + REQUIRE(!n); + REQUIRE(n.error() == std::errc::result_out_of_range); + } +} + +TEST_CASE("parse_size: invalid multiplier", "[nihil]") +{ + using namespace nihil; + + auto n = parse_size<std::uint64_t>("4z"); + REQUIRE(!n); + REQUIRE(n.error() == nihil::errc::invalid_unit); + + n = parse_size<std::uint64_t>("4kz"); + REQUIRE(!n); + REQUIRE(n.error() == nihil::errc::invalid_unit); +} + +TEST_CASE("parse_size: multipliers", "[nihil]") +{ + using namespace nihil; + + auto sf = static_cast<std::uint64_t>(4); + + SECTION("k") { + auto n = parse_size<std::uint64_t>("4k").value(); + REQUIRE(n == sf * 1024); + } + + SECTION("m") { + auto n = parse_size<std::uint64_t>("4m").value(); + REQUIRE(n == sf * 1024 * 1024); + } + + SECTION("g") { + auto n = parse_size<std::uint64_t>("4g").value(); + REQUIRE(n == sf * 1024 * 1024 * 1024); + } + + SECTION("t") { + auto n = parse_size<std::uint64_t>("4t").value(); + REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024); + } + + SECTION("p") { + auto n = parse_size<std::uint64_t>("4p").value(); + REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024 * 1024); + } +} + +TEST_CASE("parse_size: multiplier overflow", "[nihil]") +{ + using namespace nihil; + + SECTION("signed") { + auto n = parse_size<std::uint16_t>("64k"); + REQUIRE(!n); + REQUIRE(n.error() == std::errc::result_out_of_range); + } + + SECTION("unsigned") { + auto n = parse_size<std::int16_t>("32k"); + REQUIRE(!n); + REQUIRE(n.error() == std::errc::result_out_of_range); + } +} + +TEST_CASE("parse_size: wide", "[nihil]") +{ + using namespace nihil; + + SECTION("bare number") { + auto n = parse_size<std::uint64_t>(L"1024").value(); + REQUIRE(n == 1024); + } +} + +TEST_CASE("parse_size: wide multipliers", "[nihil]") +{ + using namespace nihil; + + auto sf = static_cast<std::uint64_t>(4); + + SECTION("k") { + auto n = parse_size<std::uint64_t>(L"4k").value(); + REQUIRE(n == sf * 1024); + } + + SECTION("m") { + auto n = parse_size<std::uint64_t>(L"4m").value(); + REQUIRE(n == sf * 1024 * 1024); + } + + SECTION("g") { + auto n = parse_size<std::uint64_t>(L"4g").value(); + REQUIRE(n == sf * 1024 * 1024 * 1024); + } + + SECTION("t") { + auto n = parse_size<std::uint64_t>(L"4t").value(); + REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024); + } + + SECTION("p") { + auto n = parse_size<std::uint64_t>(L"4p").value(); + REQUIRE(n == sf * 1024 * 1024 * 1024 * 1024 * 1024); + } +} diff --git a/nihil.util/test_skipws.cc b/nihil.util/test_skipws.cc new file mode 100644 index 0000000..837c1f3 --- /dev/null +++ b/nihil.util/test_skipws.cc @@ -0,0 +1,45 @@ +/* + * This source code is released into the public domain. + */ + +#include <locale> +#include <string> +using namespace std::literals; + +#include <catch2/catch_test_macros.hpp> + +import nihil.util; + +TEST_CASE("skipws: basic", "[skipws]") +{ + REQUIRE(nihil::skipws("foo"sv) == "foo"); + REQUIRE(nihil::skipws(" foo"sv) == "foo"); + REQUIRE(nihil::skipws("foo "sv) == "foo "); + REQUIRE(nihil::skipws("foo bar"sv) == "foo bar"); +} + +TEST_CASE("skipws: pointer", "[skipws]") +{ + auto s = "foo"sv; + nihil::skipws(&s); + REQUIRE(s == "foo"); + + s = " foo"sv; + nihil::skipws(&s); + REQUIRE(s == "foo"); + + s = "foo "sv; + nihil::skipws(&s); + REQUIRE(s == "foo "); + + s = "foo bar"sv; + nihil::skipws(&s); + REQUIRE(s == "foo bar"); +} + +TEST_CASE("skipws: locale", "[skipws]") +{ + // Assume the default locale is C. + REQUIRE(nihil::skipws(L"\u2003foo"sv) == L"\u2003foo"); + REQUIRE(nihil::skipws(L"\u2003foo"sv, std::locale("C.UTF-8")) == L"foo"); +} diff --git a/nihil.util/test_tabulate.cc b/nihil.util/test_tabulate.cc new file mode 100644 index 0000000..8dee796 --- /dev/null +++ b/nihil.util/test_tabulate.cc @@ -0,0 +1,75 @@ +/* + * This source code is released into the public domain. + */ + +#include <iterator> +#include <string> +#include <vector> + +#include <catch2/catch_test_macros.hpp> + +import nihil.util; + +using namespace std::literals; +using namespace nihil; + +TEST_CASE("tabulate: basic", "[tabulate]") +{ + auto input = std::vector{ + std::vector{"a", "foo", "b"}, + std::vector{"bar", "c", "baz"}, + }; + + auto result = std::string(); + tabulate("{:1} {:2} {:3}", input, std::back_inserter(result)); + REQUIRE(result == +"1 2 3\n" +"a foo b\n" +"bar c baz\n"); +} + +TEST_CASE("tabulate: basic wide", "[tabulate]") +{ + auto input = std::vector{ + std::vector{L"a", L"foo", L"b"}, + std::vector{L"bar", L"c", L"baz"}, + }; + + auto result = std::wstring(); + wtabulate(L"{:1} {:2} {:3}", input, std::back_inserter(result)); + + REQUIRE(result == +L"1 2 3\n" +"a foo b\n" +"bar c baz\n"); +} + +TEST_CASE("tabulate: jagged", "[tabulate]") +{ + auto input = std::vector{ + std::vector{"a", "foo", "b"}, + std::vector{"bar", "baz"}, + }; + + auto result = std::string(); + tabulate("{:1} {:2} {:3}", input, std::back_inserter(result)); + REQUIRE(result == +"1 2 3\n" +"a foo b\n" +"bar baz\n"); +} + +TEST_CASE("tabulate: align", "[tabulate]") +{ + auto input = std::vector{ + std::vector{"a", "longvalue", "s"}, + std::vector{"a", "s", "longvalue"}, + }; + + auto result = std::string(); + tabulate("{:1} {<:2} {>:3}", input, std::back_inserter(result)); + REQUIRE(result == +"1 2 3\n" +"a longvalue s\n" +"a s longvalue\n"); +} |
