diff options
| author | Lexi Winter <lexi@le-fay.org> | 2025-06-29 19:25:29 +0100 |
|---|---|---|
| committer | Lexi Winter <lexi@le-fay.org> | 2025-06-29 19:25:29 +0100 |
| commit | bc524d70253a4ab2fe40c3ca3e5666e267c0a4d1 (patch) | |
| tree | 1e629e7b46b1d9972a973bc93fd100bcebd395be /src/catch2/internal/catch_xmlwriter.cpp | |
| download | nihil-bc524d70253a4ab2fe40c3ca3e5666e267c0a4d1.tar.gz nihil-bc524d70253a4ab2fe40c3ca3e5666e267c0a4d1.tar.bz2 | |
import catch2 3.8.1vendor/catch2/3.8.1vendor/catch2
Diffstat (limited to 'src/catch2/internal/catch_xmlwriter.cpp')
| -rw-r--r-- | src/catch2/internal/catch_xmlwriter.cpp | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/src/catch2/internal/catch_xmlwriter.cpp b/src/catch2/internal/catch_xmlwriter.cpp new file mode 100644 index 0000000..ccf63a5 --- /dev/null +++ b/src/catch2/internal/catch_xmlwriter.cpp @@ -0,0 +1,328 @@ + +// Copyright Catch2 Authors +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// https://www.boost.org/LICENSE_1_0.txt) + +// SPDX-License-Identifier: BSL-1.0 +// Note: swapping these two includes around causes MSVC to error out +// while in /permissive- mode. No, I don't know why. +// Tested on VS 2019, 18.{3, 4}.x +#include <catch2/internal/catch_enforce.hpp> +#include <catch2/internal/catch_xmlwriter.hpp> + +#include <cstdint> +#include <iomanip> +#include <type_traits> + +namespace Catch { + +namespace { + + size_t trailingBytes(unsigned char c) { + if ((c & 0xE0) == 0xC0) { + return 2; + } + if ((c & 0xF0) == 0xE0) { + return 3; + } + if ((c & 0xF8) == 0xF0) { + return 4; + } + CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered"); + } + + uint32_t headerValue(unsigned char c) { + if ((c & 0xE0) == 0xC0) { + return c & 0x1F; + } + if ((c & 0xF0) == 0xE0) { + return c & 0x0F; + } + if ((c & 0xF8) == 0xF0) { + return c & 0x07; + } + CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered"); + } + + void hexEscapeChar(std::ostream& os, unsigned char c) { + std::ios_base::fmtflags f(os.flags()); + os << "\\x" + << std::uppercase << std::hex << std::setfill('0') << std::setw(2) + << static_cast<int>(c); + os.flags(f); + } + + constexpr bool shouldNewline(XmlFormatting fmt) { + return !!(static_cast<std::underlying_type_t<XmlFormatting>>(fmt & XmlFormatting::Newline)); + } + + constexpr bool shouldIndent(XmlFormatting fmt) { + return !!(static_cast<std::underlying_type_t<XmlFormatting>>(fmt & XmlFormatting::Indent)); + } + +} // anonymous namespace + + void XmlEncode::encodeTo( std::ostream& os ) const { + // Apostrophe escaping not necessary if we always use " to write attributes + // (see: http://www.w3.org/TR/xml/#syntax) + + for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) { + unsigned char c = static_cast<unsigned char>(m_str[idx]); + switch (c) { + case '<': os << "<"; break; + case '&': os << "&"; break; + + case '>': + // See: http://www.w3.org/TR/xml/#syntax + if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']') + os << ">"; + else + os << c; + break; + + case '\"': + if (m_forWhat == ForAttributes) + os << """; + else + os << c; + break; + + default: + // Check for control characters and invalid utf-8 + + // Escape control characters in standard ascii + // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0 + if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) { + hexEscapeChar(os, c); + break; + } + + // Plain ASCII: Write it to stream + if (c < 0x7F) { + os << c; + break; + } + + // UTF-8 territory + // Check if the encoding is valid and if it is not, hex escape bytes. + // Important: We do not check the exact decoded values for validity, only the encoding format + // First check that this bytes is a valid lead byte: + // This means that it is not encoded as 1111 1XXX + // Or as 10XX XXXX + if (c < 0xC0 || + c >= 0xF8) { + hexEscapeChar(os, c); + break; + } + + auto encBytes = trailingBytes(c); + // Are there enough bytes left to avoid accessing out-of-bounds memory? + if (idx + encBytes - 1 >= m_str.size()) { + hexEscapeChar(os, c); + break; + } + // The header is valid, check data + // The next encBytes bytes must together be a valid utf-8 + // This means: bitpattern 10XX XXXX and the extracted value is sane (ish) + bool valid = true; + uint32_t value = headerValue(c); + for (std::size_t n = 1; n < encBytes; ++n) { + unsigned char nc = static_cast<unsigned char>(m_str[idx + n]); + valid &= ((nc & 0xC0) == 0x80); + value = (value << 6) | (nc & 0x3F); + } + + if ( + // Wrong bit pattern of following bytes + (!valid) || + // Overlong encodings + (value < 0x80) || + (0x80 <= value && value < 0x800 && encBytes > 2) || + (0x800 < value && value < 0x10000 && encBytes > 3) || + // Encoded value out of range + (value >= 0x110000) + ) { + hexEscapeChar(os, c); + break; + } + + // If we got here, this is in fact a valid(ish) utf-8 sequence + for (std::size_t n = 0; n < encBytes; ++n) { + os << m_str[idx + n]; + } + idx += encBytes - 1; + break; + } + } + } + + std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) { + xmlEncode.encodeTo( os ); + return os; + } + + XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer, XmlFormatting fmt ) + : m_writer( writer ), + m_fmt(fmt) + {} + + XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept + : m_writer( other.m_writer ), + m_fmt(other.m_fmt) + { + other.m_writer = nullptr; + other.m_fmt = XmlFormatting::None; + } + XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept { + if ( m_writer ) { + m_writer->endElement(); + } + m_writer = other.m_writer; + other.m_writer = nullptr; + m_fmt = other.m_fmt; + other.m_fmt = XmlFormatting::None; + return *this; + } + + + XmlWriter::ScopedElement::~ScopedElement() { + if (m_writer) { + m_writer->endElement(m_fmt); + } + } + + XmlWriter::ScopedElement& + XmlWriter::ScopedElement::writeText( StringRef text, XmlFormatting fmt ) { + m_writer->writeText( text, fmt ); + return *this; + } + + XmlWriter::ScopedElement& + XmlWriter::ScopedElement::writeAttribute( StringRef name, + StringRef attribute ) { + m_writer->writeAttribute( name, attribute ); + return *this; + } + + + XmlWriter::XmlWriter( std::ostream& os ) : m_os( os ) + { + writeDeclaration(); + } + + XmlWriter::~XmlWriter() { + while (!m_tags.empty()) { + endElement(); + } + newlineIfNecessary(); + } + + XmlWriter& XmlWriter::startElement( std::string const& name, XmlFormatting fmt ) { + ensureTagClosed(); + newlineIfNecessary(); + if (shouldIndent(fmt)) { + m_os << m_indent; + m_indent += " "; + } + m_os << '<' << name; + m_tags.push_back( name ); + m_tagIsOpen = true; + applyFormatting(fmt); + return *this; + } + + XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name, XmlFormatting fmt ) { + ScopedElement scoped( this, fmt ); + startElement( name, fmt ); + return scoped; + } + + XmlWriter& XmlWriter::endElement(XmlFormatting fmt) { + m_indent = m_indent.substr(0, m_indent.size() - 2); + + if( m_tagIsOpen ) { + m_os << "/>"; + m_tagIsOpen = false; + } else { + newlineIfNecessary(); + if (shouldIndent(fmt)) { + m_os << m_indent; + } + m_os << "</" << m_tags.back() << '>'; + } + m_os << std::flush; + applyFormatting(fmt); + m_tags.pop_back(); + return *this; + } + + XmlWriter& XmlWriter::writeAttribute( StringRef name, + StringRef attribute ) { + if( !name.empty() && !attribute.empty() ) + m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"'; + return *this; + } + + XmlWriter& XmlWriter::writeAttribute( StringRef name, bool attribute ) { + writeAttribute(name, (attribute ? "true"_sr : "false"_sr)); + return *this; + } + + XmlWriter& XmlWriter::writeAttribute( StringRef name, + char const* attribute ) { + writeAttribute( name, StringRef( attribute ) ); + return *this; + } + + XmlWriter& XmlWriter::writeText( StringRef text, XmlFormatting fmt ) { + CATCH_ENFORCE(!m_tags.empty(), "Cannot write text as top level element"); + if( !text.empty() ){ + bool tagWasOpen = m_tagIsOpen; + ensureTagClosed(); + if (tagWasOpen && shouldIndent(fmt)) { + m_os << m_indent; + } + m_os << XmlEncode( text, XmlEncode::ForTextNodes ); + applyFormatting(fmt); + } + return *this; + } + + XmlWriter& XmlWriter::writeComment( StringRef text, XmlFormatting fmt ) { + ensureTagClosed(); + if (shouldIndent(fmt)) { + m_os << m_indent; + } + m_os << "<!-- " << text << " -->"; + applyFormatting(fmt); + return *this; + } + + void XmlWriter::writeStylesheetRef( StringRef url ) { + m_os << R"(<?xml-stylesheet type="text/xsl" href=")" << url << R"("?>)" << '\n'; + } + + void XmlWriter::ensureTagClosed() { + if( m_tagIsOpen ) { + m_os << '>' << std::flush; + newlineIfNecessary(); + m_tagIsOpen = false; + } + } + + void XmlWriter::applyFormatting(XmlFormatting fmt) { + m_needsNewline = shouldNewline(fmt); + } + + void XmlWriter::writeDeclaration() { + m_os << R"(<?xml version="1.0" encoding="UTF-8"?>)" << '\n'; + } + + void XmlWriter::newlineIfNecessary() { + if( m_needsNewline ) { + m_os << '\n' << std::flush; + m_needsNewline = false; + } + } +} |
